{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9999891664680519, "eval_steps": 500, "global_step": 55383, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 9.027943290071429e-05, "grad_norm": 2.874858856201172, "learning_rate": 1.9999999597786356e-05, "loss": 0.5785, "step": 5 }, { "epoch": 0.00018055886580142859, "grad_norm": 1.3120938539505005, "learning_rate": 1.999999839114545e-05, "loss": 0.5951, "step": 10 }, { "epoch": 0.00027083829870214285, "grad_norm": 1.8099818229675293, "learning_rate": 1.999999638007738e-05, "loss": 0.4052, "step": 15 }, { "epoch": 0.00036111773160285717, "grad_norm": 2.011772394180298, "learning_rate": 1.9999993564582305e-05, "loss": 0.4159, "step": 20 }, { "epoch": 0.00045139716450357144, "grad_norm": 1.8650308847427368, "learning_rate": 1.999998994466046e-05, "loss": 0.4756, "step": 25 }, { "epoch": 0.0005416765974042857, "grad_norm": 0.7753961682319641, "learning_rate": 1.9999985520312126e-05, "loss": 0.5705, "step": 30 }, { "epoch": 0.0006319560303050001, "grad_norm": 1.2066733837127686, "learning_rate": 1.9999980291537665e-05, "loss": 0.4, "step": 35 }, { "epoch": 0.0007222354632057143, "grad_norm": 1.1299852132797241, "learning_rate": 1.9999974258337497e-05, "loss": 0.5143, "step": 40 }, { "epoch": 0.0008125148961064286, "grad_norm": 1.0047327280044556, "learning_rate": 1.9999967420712108e-05, "loss": 0.4731, "step": 45 }, { "epoch": 0.0009027943290071429, "grad_norm": 1.0650899410247803, "learning_rate": 1.9999959778662046e-05, "loss": 0.4378, "step": 50 }, { "epoch": 0.0009930737619078573, "grad_norm": 1.0157448053359985, "learning_rate": 1.999995133218793e-05, "loss": 0.4402, "step": 55 }, { "epoch": 0.0010833531948085714, "grad_norm": 0.650635302066803, "learning_rate": 1.999994208129043e-05, "loss": 0.4935, "step": 60 }, { "epoch": 0.0011736326277092858, "grad_norm": 0.9630825519561768, "learning_rate": 1.99999320259703e-05, "loss": 0.3306, "step": 65 }, { "epoch": 0.0012639120606100002, "grad_norm": 0.9416863918304443, "learning_rate": 1.999992116622834e-05, "loss": 0.4445, "step": 70 }, { "epoch": 0.0013541914935107143, "grad_norm": 0.8302100896835327, "learning_rate": 1.9999909502065433e-05, "loss": 0.4469, "step": 75 }, { "epoch": 0.0014444709264114287, "grad_norm": 1.1551083326339722, "learning_rate": 1.999989703348251e-05, "loss": 0.3983, "step": 80 }, { "epoch": 0.0015347503593121428, "grad_norm": 1.1567633152008057, "learning_rate": 1.9999883760480585e-05, "loss": 0.4067, "step": 85 }, { "epoch": 0.0016250297922128572, "grad_norm": 1.1382306814193726, "learning_rate": 1.9999869683060708e-05, "loss": 0.4439, "step": 90 }, { "epoch": 0.0017153092251135716, "grad_norm": 0.9161384105682373, "learning_rate": 1.9999854801224025e-05, "loss": 0.4142, "step": 95 }, { "epoch": 0.0018055886580142858, "grad_norm": 1.4126842021942139, "learning_rate": 1.9999839114971727e-05, "loss": 0.4189, "step": 100 }, { "epoch": 0.0018958680909150001, "grad_norm": 0.44530755281448364, "learning_rate": 1.9999822624305083e-05, "loss": 0.3662, "step": 105 }, { "epoch": 0.0019861475238157145, "grad_norm": 1.3806456327438354, "learning_rate": 1.999980532922541e-05, "loss": 0.4043, "step": 110 }, { "epoch": 0.002076426956716429, "grad_norm": 0.7793863415718079, "learning_rate": 1.9999787229734108e-05, "loss": 0.4335, "step": 115 }, { "epoch": 0.002166706389617143, "grad_norm": 0.9610792398452759, "learning_rate": 1.9999768325832626e-05, "loss": 0.455, "step": 120 }, { "epoch": 0.002256985822517857, "grad_norm": 1.081809163093567, "learning_rate": 1.9999748617522488e-05, "loss": 0.3969, "step": 125 }, { "epoch": 0.0023472652554185716, "grad_norm": 1.3969955444335938, "learning_rate": 1.9999728104805278e-05, "loss": 0.4921, "step": 130 }, { "epoch": 0.002437544688319286, "grad_norm": 0.729268491268158, "learning_rate": 1.999970678768265e-05, "loss": 0.3292, "step": 135 }, { "epoch": 0.0025278241212200003, "grad_norm": 1.072990894317627, "learning_rate": 1.9999684666156313e-05, "loss": 0.4629, "step": 140 }, { "epoch": 0.0026181035541207143, "grad_norm": 1.3183773756027222, "learning_rate": 1.999966174022805e-05, "loss": 0.4235, "step": 145 }, { "epoch": 0.0027083829870214286, "grad_norm": 1.0797187089920044, "learning_rate": 1.9999638009899706e-05, "loss": 0.4071, "step": 150 }, { "epoch": 0.002798662419922143, "grad_norm": 0.5764801502227783, "learning_rate": 1.9999613475173184e-05, "loss": 0.4883, "step": 155 }, { "epoch": 0.0028889418528228574, "grad_norm": 1.0525012016296387, "learning_rate": 1.999958813605047e-05, "loss": 0.4519, "step": 160 }, { "epoch": 0.0029792212857235718, "grad_norm": 1.0718201398849487, "learning_rate": 1.9999561992533588e-05, "loss": 0.4336, "step": 165 }, { "epoch": 0.0030695007186242857, "grad_norm": 0.711060106754303, "learning_rate": 1.999953504462465e-05, "loss": 0.5076, "step": 170 }, { "epoch": 0.003159780151525, "grad_norm": 4.838633060455322, "learning_rate": 1.9999507292325822e-05, "loss": 0.4658, "step": 175 }, { "epoch": 0.0032500595844257144, "grad_norm": 1.0590788125991821, "learning_rate": 1.9999478735639333e-05, "loss": 0.3615, "step": 180 }, { "epoch": 0.003340339017326429, "grad_norm": 0.704115092754364, "learning_rate": 1.9999449374567484e-05, "loss": 0.4115, "step": 185 }, { "epoch": 0.003430618450227143, "grad_norm": 0.6360671520233154, "learning_rate": 1.9999419209112637e-05, "loss": 0.4122, "step": 190 }, { "epoch": 0.003520897883127857, "grad_norm": 0.7924306988716125, "learning_rate": 1.9999388239277217e-05, "loss": 0.4475, "step": 195 }, { "epoch": 0.0036111773160285715, "grad_norm": 1.2045217752456665, "learning_rate": 1.9999356465063714e-05, "loss": 0.4365, "step": 200 }, { "epoch": 0.003701456748929286, "grad_norm": 0.6561562418937683, "learning_rate": 1.9999323886474687e-05, "loss": 0.5271, "step": 205 }, { "epoch": 0.0037917361818300003, "grad_norm": 0.776882529258728, "learning_rate": 1.9999290503512755e-05, "loss": 0.4197, "step": 210 }, { "epoch": 0.0038820156147307146, "grad_norm": 0.7396628260612488, "learning_rate": 1.9999256316180604e-05, "loss": 0.425, "step": 215 }, { "epoch": 0.003972295047631429, "grad_norm": 0.872489333152771, "learning_rate": 1.9999221324480986e-05, "loss": 0.4407, "step": 220 }, { "epoch": 0.004062574480532143, "grad_norm": 0.542452335357666, "learning_rate": 1.9999185528416712e-05, "loss": 0.3691, "step": 225 }, { "epoch": 0.004152853913432858, "grad_norm": 0.538821816444397, "learning_rate": 1.9999148927990663e-05, "loss": 0.3504, "step": 230 }, { "epoch": 0.004243133346333572, "grad_norm": 0.6316237449645996, "learning_rate": 1.9999111523205787e-05, "loss": 0.4251, "step": 235 }, { "epoch": 0.004333412779234286, "grad_norm": 0.9031202793121338, "learning_rate": 1.9999073314065084e-05, "loss": 0.5062, "step": 240 }, { "epoch": 0.0044236922121350004, "grad_norm": 0.5457375645637512, "learning_rate": 1.9999034300571636e-05, "loss": 0.4341, "step": 245 }, { "epoch": 0.004513971645035714, "grad_norm": 1.1680082082748413, "learning_rate": 1.999899448272858e-05, "loss": 0.4187, "step": 250 }, { "epoch": 0.004604251077936429, "grad_norm": 1.056076169013977, "learning_rate": 1.9998953860539116e-05, "loss": 0.3579, "step": 255 }, { "epoch": 0.004694530510837143, "grad_norm": 0.7869812846183777, "learning_rate": 1.999891243400651e-05, "loss": 0.4457, "step": 260 }, { "epoch": 0.004784809943737857, "grad_norm": 0.7695295214653015, "learning_rate": 1.9998870203134104e-05, "loss": 0.3827, "step": 265 }, { "epoch": 0.004875089376638572, "grad_norm": 1.0620439052581787, "learning_rate": 1.9998827167925284e-05, "loss": 0.477, "step": 270 }, { "epoch": 0.004965368809539286, "grad_norm": 0.7931966185569763, "learning_rate": 1.999878332838352e-05, "loss": 0.2773, "step": 275 }, { "epoch": 0.005055648242440001, "grad_norm": 0.8393620252609253, "learning_rate": 1.9998738684512335e-05, "loss": 0.4401, "step": 280 }, { "epoch": 0.005145927675340715, "grad_norm": 0.7053926587104797, "learning_rate": 1.999869323631532e-05, "loss": 0.3441, "step": 285 }, { "epoch": 0.0052362071082414285, "grad_norm": 0.5869619846343994, "learning_rate": 1.999864698379613e-05, "loss": 0.402, "step": 290 }, { "epoch": 0.005326486541142143, "grad_norm": 0.5323655009269714, "learning_rate": 1.999859992695849e-05, "loss": 0.3474, "step": 295 }, { "epoch": 0.005416765974042857, "grad_norm": 0.8899840712547302, "learning_rate": 1.9998552065806183e-05, "loss": 0.5102, "step": 300 }, { "epoch": 0.005507045406943572, "grad_norm": 0.5503301620483398, "learning_rate": 1.9998503400343058e-05, "loss": 0.5319, "step": 305 }, { "epoch": 0.005597324839844286, "grad_norm": 0.8930386304855347, "learning_rate": 1.999845393057303e-05, "loss": 0.4279, "step": 310 }, { "epoch": 0.005687604272745, "grad_norm": 0.7680234313011169, "learning_rate": 1.9998403656500082e-05, "loss": 0.445, "step": 315 }, { "epoch": 0.005777883705645715, "grad_norm": 0.6237252354621887, "learning_rate": 1.999835257812825e-05, "loss": 0.3322, "step": 320 }, { "epoch": 0.005868163138546429, "grad_norm": 0.726040244102478, "learning_rate": 1.999830069546165e-05, "loss": 0.3773, "step": 325 }, { "epoch": 0.0059584425714471435, "grad_norm": 0.8470166921615601, "learning_rate": 1.9998248008504454e-05, "loss": 0.4471, "step": 330 }, { "epoch": 0.0060487220043478574, "grad_norm": 0.7286909222602844, "learning_rate": 1.99981945172609e-05, "loss": 0.3542, "step": 335 }, { "epoch": 0.006139001437248571, "grad_norm": 1.338836908340454, "learning_rate": 1.9998140221735292e-05, "loss": 0.2662, "step": 340 }, { "epoch": 0.006229280870149286, "grad_norm": 0.9748361706733704, "learning_rate": 1.9998085121931995e-05, "loss": 0.4134, "step": 345 }, { "epoch": 0.00631956030305, "grad_norm": 0.4502583146095276, "learning_rate": 1.9998029217855444e-05, "loss": 0.4096, "step": 350 }, { "epoch": 0.006409839735950715, "grad_norm": 0.6040946245193481, "learning_rate": 1.9997972509510136e-05, "loss": 0.3897, "step": 355 }, { "epoch": 0.006500119168851429, "grad_norm": 0.8161752223968506, "learning_rate": 1.999791499690063e-05, "loss": 0.3511, "step": 360 }, { "epoch": 0.006590398601752143, "grad_norm": 0.436606764793396, "learning_rate": 1.999785668003156e-05, "loss": 0.4545, "step": 365 }, { "epoch": 0.006680678034652858, "grad_norm": 0.7064688205718994, "learning_rate": 1.9997797558907604e-05, "loss": 0.3563, "step": 370 }, { "epoch": 0.006770957467553572, "grad_norm": 0.5320123434066772, "learning_rate": 1.999773763353353e-05, "loss": 0.4064, "step": 375 }, { "epoch": 0.006861236900454286, "grad_norm": 0.6215265393257141, "learning_rate": 1.999767690391415e-05, "loss": 0.3495, "step": 380 }, { "epoch": 0.006951516333355, "grad_norm": 0.8487299680709839, "learning_rate": 1.9997615370054357e-05, "loss": 0.3919, "step": 385 }, { "epoch": 0.007041795766255714, "grad_norm": 0.7624923586845398, "learning_rate": 1.9997553031959093e-05, "loss": 0.4063, "step": 390 }, { "epoch": 0.007132075199156429, "grad_norm": 0.5340708494186401, "learning_rate": 1.9997489889633385e-05, "loss": 0.3908, "step": 395 }, { "epoch": 0.007222354632057143, "grad_norm": 0.8394657373428345, "learning_rate": 1.9997425943082297e-05, "loss": 0.4193, "step": 400 }, { "epoch": 0.007312634064957858, "grad_norm": 4.428290367126465, "learning_rate": 1.999736119231098e-05, "loss": 0.5262, "step": 405 }, { "epoch": 0.007402913497858572, "grad_norm": 0.6705797910690308, "learning_rate": 1.9997295637324644e-05, "loss": 0.4652, "step": 410 }, { "epoch": 0.0074931929307592866, "grad_norm": 0.5221576690673828, "learning_rate": 1.9997229278128562e-05, "loss": 0.4423, "step": 415 }, { "epoch": 0.0075834723636600005, "grad_norm": 0.7182873487472534, "learning_rate": 1.9997162114728073e-05, "loss": 0.3028, "step": 420 }, { "epoch": 0.0076737517965607144, "grad_norm": 0.6148266196250916, "learning_rate": 1.999709414712858e-05, "loss": 0.3227, "step": 425 }, { "epoch": 0.007764031229461429, "grad_norm": 0.541401743888855, "learning_rate": 1.9997025375335542e-05, "loss": 0.4572, "step": 430 }, { "epoch": 0.007854310662362144, "grad_norm": 0.7312717437744141, "learning_rate": 1.99969557993545e-05, "loss": 0.4082, "step": 435 }, { "epoch": 0.007944590095262858, "grad_norm": 0.7365412712097168, "learning_rate": 1.9996885419191053e-05, "loss": 0.3968, "step": 440 }, { "epoch": 0.008034869528163572, "grad_norm": 0.6317394375801086, "learning_rate": 1.9996814234850855e-05, "loss": 0.3782, "step": 445 }, { "epoch": 0.008125148961064286, "grad_norm": 0.49483615159988403, "learning_rate": 1.9996742246339637e-05, "loss": 0.4659, "step": 450 }, { "epoch": 0.008215428393965, "grad_norm": 0.5639483332633972, "learning_rate": 1.9996669453663186e-05, "loss": 0.3857, "step": 455 }, { "epoch": 0.008305707826865716, "grad_norm": 0.5206676125526428, "learning_rate": 1.9996595856827366e-05, "loss": 0.305, "step": 460 }, { "epoch": 0.00839598725976643, "grad_norm": 0.6142938137054443, "learning_rate": 1.9996521455838087e-05, "loss": 0.3415, "step": 465 }, { "epoch": 0.008486266692667143, "grad_norm": 0.6466097235679626, "learning_rate": 1.999644625070134e-05, "loss": 0.4893, "step": 470 }, { "epoch": 0.008576546125567857, "grad_norm": 0.6824833154678345, "learning_rate": 1.9996370241423172e-05, "loss": 0.6019, "step": 475 }, { "epoch": 0.008666825558468571, "grad_norm": 0.42456185817718506, "learning_rate": 1.99962934280097e-05, "loss": 0.349, "step": 480 }, { "epoch": 0.008757104991369287, "grad_norm": 0.7232670187950134, "learning_rate": 1.9996215810467102e-05, "loss": 0.4968, "step": 485 }, { "epoch": 0.008847384424270001, "grad_norm": 0.7302543520927429, "learning_rate": 1.9996137388801622e-05, "loss": 0.447, "step": 490 }, { "epoch": 0.008937663857170715, "grad_norm": 0.4422917068004608, "learning_rate": 1.9996058163019566e-05, "loss": 0.4172, "step": 495 }, { "epoch": 0.009027943290071429, "grad_norm": 0.51226407289505, "learning_rate": 1.999597813312731e-05, "loss": 0.3597, "step": 500 }, { "epoch": 0.009118222722972143, "grad_norm": 0.7817170023918152, "learning_rate": 1.999589729913129e-05, "loss": 0.4316, "step": 505 }, { "epoch": 0.009208502155872858, "grad_norm": 0.5358217358589172, "learning_rate": 1.999581566103801e-05, "loss": 0.3517, "step": 510 }, { "epoch": 0.009298781588773572, "grad_norm": 0.7174142003059387, "learning_rate": 1.9995733218854038e-05, "loss": 0.4131, "step": 515 }, { "epoch": 0.009389061021674286, "grad_norm": 0.5455174446105957, "learning_rate": 1.9995649972586005e-05, "loss": 0.3305, "step": 520 }, { "epoch": 0.009479340454575, "grad_norm": 1.0940828323364258, "learning_rate": 1.9995565922240606e-05, "loss": 0.4694, "step": 525 }, { "epoch": 0.009569619887475714, "grad_norm": 0.7484090328216553, "learning_rate": 1.99954810678246e-05, "loss": 0.348, "step": 530 }, { "epoch": 0.00965989932037643, "grad_norm": 0.6891335248947144, "learning_rate": 1.999539540934482e-05, "loss": 0.4109, "step": 535 }, { "epoch": 0.009750178753277144, "grad_norm": 0.46111416816711426, "learning_rate": 1.9995308946808153e-05, "loss": 0.373, "step": 540 }, { "epoch": 0.009840458186177858, "grad_norm": 0.5738707780838013, "learning_rate": 1.9995221680221554e-05, "loss": 0.4119, "step": 545 }, { "epoch": 0.009930737619078572, "grad_norm": 2.263345241546631, "learning_rate": 1.9995133609592043e-05, "loss": 0.4038, "step": 550 }, { "epoch": 0.010021017051979286, "grad_norm": 0.5366809368133545, "learning_rate": 1.9995044734926705e-05, "loss": 0.5166, "step": 555 }, { "epoch": 0.010111296484880001, "grad_norm": 0.591261088848114, "learning_rate": 1.999495505623269e-05, "loss": 0.3459, "step": 560 }, { "epoch": 0.010201575917780715, "grad_norm": 0.513255774974823, "learning_rate": 1.999486457351721e-05, "loss": 0.3736, "step": 565 }, { "epoch": 0.01029185535068143, "grad_norm": 0.8982439041137695, "learning_rate": 1.9994773286787544e-05, "loss": 0.3046, "step": 570 }, { "epoch": 0.010382134783582143, "grad_norm": 0.5183768272399902, "learning_rate": 1.9994681196051036e-05, "loss": 0.2987, "step": 575 }, { "epoch": 0.010472414216482857, "grad_norm": 0.595223069190979, "learning_rate": 1.9994588301315092e-05, "loss": 0.4161, "step": 580 }, { "epoch": 0.010562693649383573, "grad_norm": 0.5426445603370667, "learning_rate": 1.999449460258719e-05, "loss": 0.3414, "step": 585 }, { "epoch": 0.010652973082284287, "grad_norm": 0.7920253872871399, "learning_rate": 1.9994400099874864e-05, "loss": 0.3525, "step": 590 }, { "epoch": 0.010743252515185, "grad_norm": 0.3906225264072418, "learning_rate": 1.9994304793185718e-05, "loss": 0.2805, "step": 595 }, { "epoch": 0.010833531948085715, "grad_norm": 0.7094693183898926, "learning_rate": 1.9994208682527413e-05, "loss": 0.373, "step": 600 }, { "epoch": 0.010923811380986428, "grad_norm": 0.583308219909668, "learning_rate": 1.9994111767907685e-05, "loss": 0.4022, "step": 605 }, { "epoch": 0.011014090813887144, "grad_norm": 0.7794875502586365, "learning_rate": 1.9994014049334333e-05, "loss": 0.3641, "step": 610 }, { "epoch": 0.011104370246787858, "grad_norm": 0.613642156124115, "learning_rate": 1.9993915526815214e-05, "loss": 0.3642, "step": 615 }, { "epoch": 0.011194649679688572, "grad_norm": 0.8832647204399109, "learning_rate": 1.999381620035825e-05, "loss": 0.4075, "step": 620 }, { "epoch": 0.011284929112589286, "grad_norm": 0.47653380036354065, "learning_rate": 1.9993716069971435e-05, "loss": 0.2557, "step": 625 }, { "epoch": 0.01137520854549, "grad_norm": 0.6913342475891113, "learning_rate": 1.9993615135662825e-05, "loss": 0.4108, "step": 630 }, { "epoch": 0.011465487978390716, "grad_norm": 0.6558578610420227, "learning_rate": 1.9993513397440537e-05, "loss": 0.3773, "step": 635 }, { "epoch": 0.01155576741129143, "grad_norm": 0.7672843337059021, "learning_rate": 1.9993410855312757e-05, "loss": 0.3645, "step": 640 }, { "epoch": 0.011646046844192143, "grad_norm": 0.7909203171730042, "learning_rate": 1.9993307509287734e-05, "loss": 0.4225, "step": 645 }, { "epoch": 0.011736326277092857, "grad_norm": 0.5562388300895691, "learning_rate": 1.999320335937378e-05, "loss": 0.3487, "step": 650 }, { "epoch": 0.011826605709993571, "grad_norm": 0.49056869745254517, "learning_rate": 1.999309840557927e-05, "loss": 0.3879, "step": 655 }, { "epoch": 0.011916885142894287, "grad_norm": 0.4736185371875763, "learning_rate": 1.9992992647912653e-05, "loss": 0.3523, "step": 660 }, { "epoch": 0.012007164575795001, "grad_norm": 0.5065646767616272, "learning_rate": 1.9992886086382434e-05, "loss": 0.5149, "step": 665 }, { "epoch": 0.012097444008695715, "grad_norm": 0.44907593727111816, "learning_rate": 1.9992778720997185e-05, "loss": 0.3551, "step": 670 }, { "epoch": 0.012187723441596429, "grad_norm": 0.9845952987670898, "learning_rate": 1.999267055176554e-05, "loss": 0.3669, "step": 675 }, { "epoch": 0.012278002874497143, "grad_norm": 0.5695990920066833, "learning_rate": 1.9992561578696205e-05, "loss": 0.336, "step": 680 }, { "epoch": 0.012368282307397858, "grad_norm": 0.6252821683883667, "learning_rate": 1.999245180179794e-05, "loss": 0.3034, "step": 685 }, { "epoch": 0.012458561740298572, "grad_norm": 1.414514183998108, "learning_rate": 1.9992341221079584e-05, "loss": 0.3144, "step": 690 }, { "epoch": 0.012548841173199286, "grad_norm": 0.5516579151153564, "learning_rate": 1.9992229836550024e-05, "loss": 0.3976, "step": 695 }, { "epoch": 0.0126391206061, "grad_norm": 0.6496583223342896, "learning_rate": 1.999211764821823e-05, "loss": 0.3827, "step": 700 }, { "epoch": 0.012729400039000714, "grad_norm": 0.408225953578949, "learning_rate": 1.9992004656093217e-05, "loss": 0.3794, "step": 705 }, { "epoch": 0.01281967947190143, "grad_norm": 0.5428416728973389, "learning_rate": 1.999189086018408e-05, "loss": 0.3612, "step": 710 }, { "epoch": 0.012909958904802144, "grad_norm": 0.6431609392166138, "learning_rate": 1.999177626049997e-05, "loss": 0.3706, "step": 715 }, { "epoch": 0.013000238337702858, "grad_norm": 0.5790135264396667, "learning_rate": 1.9991660857050106e-05, "loss": 0.361, "step": 720 }, { "epoch": 0.013090517770603572, "grad_norm": 0.45080527663230896, "learning_rate": 1.9991544649843776e-05, "loss": 0.2959, "step": 725 }, { "epoch": 0.013180797203504286, "grad_norm": 0.5176795721054077, "learning_rate": 1.999142763889032e-05, "loss": 0.407, "step": 730 }, { "epoch": 0.013271076636405001, "grad_norm": 0.7002350091934204, "learning_rate": 1.999130982419916e-05, "loss": 0.383, "step": 735 }, { "epoch": 0.013361356069305715, "grad_norm": 0.48800963163375854, "learning_rate": 1.9991191205779764e-05, "loss": 0.3827, "step": 740 }, { "epoch": 0.01345163550220643, "grad_norm": 0.6332141160964966, "learning_rate": 1.9991071783641682e-05, "loss": 0.3713, "step": 745 }, { "epoch": 0.013541914935107143, "grad_norm": 0.6171541213989258, "learning_rate": 1.9990951557794514e-05, "loss": 0.4427, "step": 750 }, { "epoch": 0.013632194368007857, "grad_norm": 0.5151376724243164, "learning_rate": 1.9990830528247937e-05, "loss": 0.3107, "step": 755 }, { "epoch": 0.013722473800908573, "grad_norm": 0.3900178074836731, "learning_rate": 1.9990708695011685e-05, "loss": 0.3266, "step": 760 }, { "epoch": 0.013812753233809287, "grad_norm": 0.502527117729187, "learning_rate": 1.9990586058095557e-05, "loss": 0.4022, "step": 765 }, { "epoch": 0.01390303266671, "grad_norm": 0.7116743326187134, "learning_rate": 1.9990462617509418e-05, "loss": 0.4046, "step": 770 }, { "epoch": 0.013993312099610715, "grad_norm": 0.6583302617073059, "learning_rate": 1.99903383732632e-05, "loss": 0.3114, "step": 775 }, { "epoch": 0.014083591532511429, "grad_norm": 1.1406691074371338, "learning_rate": 1.9990213325366893e-05, "loss": 0.3487, "step": 780 }, { "epoch": 0.014173870965412144, "grad_norm": 0.5100271105766296, "learning_rate": 1.9990087473830567e-05, "loss": 0.3689, "step": 785 }, { "epoch": 0.014264150398312858, "grad_norm": 0.6300409436225891, "learning_rate": 1.9989960818664334e-05, "loss": 0.365, "step": 790 }, { "epoch": 0.014354429831213572, "grad_norm": 0.5625520944595337, "learning_rate": 1.9989833359878387e-05, "loss": 0.4753, "step": 795 }, { "epoch": 0.014444709264114286, "grad_norm": 0.6765584349632263, "learning_rate": 1.9989705097482982e-05, "loss": 0.2894, "step": 800 }, { "epoch": 0.014534988697015002, "grad_norm": 0.6390443444252014, "learning_rate": 1.998957603148843e-05, "loss": 0.4293, "step": 805 }, { "epoch": 0.014625268129915716, "grad_norm": 1.139591932296753, "learning_rate": 1.998944616190512e-05, "loss": 0.3266, "step": 810 }, { "epoch": 0.01471554756281643, "grad_norm": 0.677528440952301, "learning_rate": 1.99893154887435e-05, "loss": 0.4459, "step": 815 }, { "epoch": 0.014805826995717144, "grad_norm": 1.0746852159500122, "learning_rate": 1.9989184012014073e-05, "loss": 0.3805, "step": 820 }, { "epoch": 0.014896106428617857, "grad_norm": 0.7234352231025696, "learning_rate": 1.9989051731727424e-05, "loss": 0.2925, "step": 825 }, { "epoch": 0.014986385861518573, "grad_norm": 0.4438038170337677, "learning_rate": 1.998891864789419e-05, "loss": 0.3536, "step": 830 }, { "epoch": 0.015076665294419287, "grad_norm": 0.573958158493042, "learning_rate": 1.9988784760525073e-05, "loss": 0.5212, "step": 835 }, { "epoch": 0.015166944727320001, "grad_norm": 0.6227238178253174, "learning_rate": 1.9988650069630852e-05, "loss": 0.4595, "step": 840 }, { "epoch": 0.015257224160220715, "grad_norm": 0.5828679203987122, "learning_rate": 1.9988514575222356e-05, "loss": 0.3545, "step": 845 }, { "epoch": 0.015347503593121429, "grad_norm": 0.4298834800720215, "learning_rate": 1.9988378277310486e-05, "loss": 0.4366, "step": 850 }, { "epoch": 0.015437783026022145, "grad_norm": 0.5449682474136353, "learning_rate": 1.9988241175906204e-05, "loss": 0.4333, "step": 855 }, { "epoch": 0.015528062458922859, "grad_norm": 1.4678033590316772, "learning_rate": 1.9988103271020545e-05, "loss": 0.3295, "step": 860 }, { "epoch": 0.015618341891823572, "grad_norm": 1.1345720291137695, "learning_rate": 1.9987964562664597e-05, "loss": 0.494, "step": 865 }, { "epoch": 0.015708621324724288, "grad_norm": 0.6344099640846252, "learning_rate": 1.998782505084952e-05, "loss": 0.4833, "step": 870 }, { "epoch": 0.015798900757625002, "grad_norm": 0.4467625916004181, "learning_rate": 1.9987684735586533e-05, "loss": 0.2867, "step": 875 }, { "epoch": 0.015889180190525716, "grad_norm": 0.821846604347229, "learning_rate": 1.998754361688693e-05, "loss": 0.2907, "step": 880 }, { "epoch": 0.01597945962342643, "grad_norm": 0.4069104492664337, "learning_rate": 1.998740169476206e-05, "loss": 0.3291, "step": 885 }, { "epoch": 0.016069739056327144, "grad_norm": 0.7173675298690796, "learning_rate": 1.9987258969223334e-05, "loss": 0.4522, "step": 890 }, { "epoch": 0.016160018489227858, "grad_norm": 0.5012965798377991, "learning_rate": 1.9987115440282242e-05, "loss": 0.3062, "step": 895 }, { "epoch": 0.016250297922128572, "grad_norm": 0.5497738122940063, "learning_rate": 1.9986971107950325e-05, "loss": 0.2864, "step": 900 }, { "epoch": 0.016340577355029286, "grad_norm": 0.6531277894973755, "learning_rate": 1.9986825972239197e-05, "loss": 0.388, "step": 905 }, { "epoch": 0.01643085678793, "grad_norm": 0.622791588306427, "learning_rate": 1.9986680033160528e-05, "loss": 0.4287, "step": 910 }, { "epoch": 0.016521136220830714, "grad_norm": 0.5567917227745056, "learning_rate": 1.998653329072606e-05, "loss": 0.3517, "step": 915 }, { "epoch": 0.01661141565373143, "grad_norm": 0.4722416400909424, "learning_rate": 1.99863857449476e-05, "loss": 0.3337, "step": 920 }, { "epoch": 0.016701695086632145, "grad_norm": 0.6758808493614197, "learning_rate": 1.9986237395837015e-05, "loss": 0.4052, "step": 925 }, { "epoch": 0.01679197451953286, "grad_norm": 0.5101285576820374, "learning_rate": 1.998608824340624e-05, "loss": 0.3068, "step": 930 }, { "epoch": 0.016882253952433573, "grad_norm": 0.6921379566192627, "learning_rate": 1.9985938287667268e-05, "loss": 0.3838, "step": 935 }, { "epoch": 0.016972533385334287, "grad_norm": 0.5071640014648438, "learning_rate": 1.9985787528632167e-05, "loss": 0.3373, "step": 940 }, { "epoch": 0.017062812818235, "grad_norm": 0.5234969854354858, "learning_rate": 1.9985635966313065e-05, "loss": 0.3708, "step": 945 }, { "epoch": 0.017153092251135715, "grad_norm": 0.3468939960002899, "learning_rate": 1.998548360072215e-05, "loss": 0.3431, "step": 950 }, { "epoch": 0.01724337168403643, "grad_norm": 0.5182576179504395, "learning_rate": 1.998533043187168e-05, "loss": 0.3471, "step": 955 }, { "epoch": 0.017333651116937143, "grad_norm": 0.4680151641368866, "learning_rate": 1.9985176459773977e-05, "loss": 0.4855, "step": 960 }, { "epoch": 0.017423930549837856, "grad_norm": 0.5592324733734131, "learning_rate": 1.998502168444143e-05, "loss": 0.3305, "step": 965 }, { "epoch": 0.017514209982738574, "grad_norm": 0.5374587178230286, "learning_rate": 1.9984866105886484e-05, "loss": 0.3335, "step": 970 }, { "epoch": 0.017604489415639288, "grad_norm": 0.4037306308746338, "learning_rate": 1.998470972412166e-05, "loss": 0.4198, "step": 975 }, { "epoch": 0.017694768848540002, "grad_norm": 0.7788845300674438, "learning_rate": 1.998455253915953e-05, "loss": 0.4744, "step": 980 }, { "epoch": 0.017785048281440716, "grad_norm": 0.43953633308410645, "learning_rate": 1.9984394551012745e-05, "loss": 0.4211, "step": 985 }, { "epoch": 0.01787532771434143, "grad_norm": 0.5609700083732605, "learning_rate": 1.998423575969401e-05, "loss": 0.3761, "step": 990 }, { "epoch": 0.017965607147242144, "grad_norm": 0.5562583804130554, "learning_rate": 1.9984076165216106e-05, "loss": 0.4629, "step": 995 }, { "epoch": 0.018055886580142858, "grad_norm": 0.7671899795532227, "learning_rate": 1.9983915767591863e-05, "loss": 0.401, "step": 1000 }, { "epoch": 0.01814616601304357, "grad_norm": 0.44110381603240967, "learning_rate": 1.9983754566834185e-05, "loss": 0.43, "step": 1005 }, { "epoch": 0.018236445445944285, "grad_norm": 1.3212000131607056, "learning_rate": 1.998359256295604e-05, "loss": 0.4265, "step": 1010 }, { "epoch": 0.018326724878845, "grad_norm": 0.6430184841156006, "learning_rate": 1.9983429755970466e-05, "loss": 0.3988, "step": 1015 }, { "epoch": 0.018417004311745717, "grad_norm": 0.921191394329071, "learning_rate": 1.998326614589055e-05, "loss": 0.3036, "step": 1020 }, { "epoch": 0.01850728374464643, "grad_norm": 0.5766294598579407, "learning_rate": 1.998310173272946e-05, "loss": 0.4316, "step": 1025 }, { "epoch": 0.018597563177547145, "grad_norm": 0.4915804862976074, "learning_rate": 1.998293651650042e-05, "loss": 0.2762, "step": 1030 }, { "epoch": 0.01868784261044786, "grad_norm": 0.4122258126735687, "learning_rate": 1.9982770497216718e-05, "loss": 0.3431, "step": 1035 }, { "epoch": 0.018778122043348573, "grad_norm": 0.539563775062561, "learning_rate": 1.9982603674891714e-05, "loss": 0.3714, "step": 1040 }, { "epoch": 0.018868401476249286, "grad_norm": 0.59839928150177, "learning_rate": 1.9982436049538822e-05, "loss": 0.3429, "step": 1045 }, { "epoch": 0.01895868090915, "grad_norm": 0.6879011392593384, "learning_rate": 1.998226762117153e-05, "loss": 0.3609, "step": 1050 }, { "epoch": 0.019048960342050714, "grad_norm": 0.7054517269134521, "learning_rate": 1.9982098389803388e-05, "loss": 0.3349, "step": 1055 }, { "epoch": 0.01913923977495143, "grad_norm": 0.37130406498908997, "learning_rate": 1.9981928355448007e-05, "loss": 0.3453, "step": 1060 }, { "epoch": 0.019229519207852146, "grad_norm": 0.405746728181839, "learning_rate": 1.9981757518119064e-05, "loss": 0.3002, "step": 1065 }, { "epoch": 0.01931979864075286, "grad_norm": 0.49931222200393677, "learning_rate": 1.9981585877830304e-05, "loss": 0.2688, "step": 1070 }, { "epoch": 0.019410078073653574, "grad_norm": 0.7177553176879883, "learning_rate": 1.9981413434595533e-05, "loss": 0.3903, "step": 1075 }, { "epoch": 0.019500357506554288, "grad_norm": 0.46105924248695374, "learning_rate": 1.9981240188428622e-05, "loss": 0.4226, "step": 1080 }, { "epoch": 0.019590636939455, "grad_norm": 0.5767070651054382, "learning_rate": 1.998106613934351e-05, "loss": 0.3401, "step": 1085 }, { "epoch": 0.019680916372355715, "grad_norm": 0.41604000329971313, "learning_rate": 1.9980891287354195e-05, "loss": 0.3416, "step": 1090 }, { "epoch": 0.01977119580525643, "grad_norm": 0.45730865001678467, "learning_rate": 1.9980715632474746e-05, "loss": 0.4165, "step": 1095 }, { "epoch": 0.019861475238157143, "grad_norm": 0.4547334611415863, "learning_rate": 1.998053917471929e-05, "loss": 0.4183, "step": 1100 }, { "epoch": 0.019951754671057857, "grad_norm": 0.5578950643539429, "learning_rate": 1.9980361914102025e-05, "loss": 0.3189, "step": 1105 }, { "epoch": 0.02004203410395857, "grad_norm": 0.6025101542472839, "learning_rate": 1.9980183850637204e-05, "loss": 0.3669, "step": 1110 }, { "epoch": 0.02013231353685929, "grad_norm": 0.7206414341926575, "learning_rate": 1.998000498433916e-05, "loss": 0.375, "step": 1115 }, { "epoch": 0.020222592969760003, "grad_norm": 0.4863177239894867, "learning_rate": 1.997982531522227e-05, "loss": 0.284, "step": 1120 }, { "epoch": 0.020312872402660716, "grad_norm": 0.40691885352134705, "learning_rate": 1.9979644843301e-05, "loss": 0.4479, "step": 1125 }, { "epoch": 0.02040315183556143, "grad_norm": 0.5481736660003662, "learning_rate": 1.997946356858986e-05, "loss": 0.31, "step": 1130 }, { "epoch": 0.020493431268462144, "grad_norm": 0.7137806415557861, "learning_rate": 1.9979281491103433e-05, "loss": 0.4124, "step": 1135 }, { "epoch": 0.02058371070136286, "grad_norm": 0.6022760272026062, "learning_rate": 1.9979098610856368e-05, "loss": 0.4443, "step": 1140 }, { "epoch": 0.020673990134263572, "grad_norm": 0.47027885913848877, "learning_rate": 1.9978914927863375e-05, "loss": 0.3898, "step": 1145 }, { "epoch": 0.020764269567164286, "grad_norm": 0.7193593978881836, "learning_rate": 1.9978730442139226e-05, "loss": 0.3634, "step": 1150 }, { "epoch": 0.020854549000065, "grad_norm": 0.4863266944885254, "learning_rate": 1.9978545153698765e-05, "loss": 0.2502, "step": 1155 }, { "epoch": 0.020944828432965714, "grad_norm": 0.5944661498069763, "learning_rate": 1.99783590625569e-05, "loss": 0.3268, "step": 1160 }, { "epoch": 0.02103510786586643, "grad_norm": 0.6974284648895264, "learning_rate": 1.99781721687286e-05, "loss": 0.4665, "step": 1165 }, { "epoch": 0.021125387298767145, "grad_norm": 0.48131126165390015, "learning_rate": 1.9977984472228895e-05, "loss": 0.3469, "step": 1170 }, { "epoch": 0.02121566673166786, "grad_norm": 0.4336879849433899, "learning_rate": 1.9977795973072886e-05, "loss": 0.3365, "step": 1175 }, { "epoch": 0.021305946164568573, "grad_norm": 0.4767698049545288, "learning_rate": 1.9977606671275735e-05, "loss": 0.4261, "step": 1180 }, { "epoch": 0.021396225597469287, "grad_norm": 0.5864590406417847, "learning_rate": 1.9977416566852673e-05, "loss": 0.3919, "step": 1185 }, { "epoch": 0.02148650503037, "grad_norm": 0.4024285674095154, "learning_rate": 1.9977225659818992e-05, "loss": 0.312, "step": 1190 }, { "epoch": 0.021576784463270715, "grad_norm": 0.7056189775466919, "learning_rate": 1.9977033950190046e-05, "loss": 0.3784, "step": 1195 }, { "epoch": 0.02166706389617143, "grad_norm": 0.4819659888744354, "learning_rate": 1.9976841437981262e-05, "loss": 0.2945, "step": 1200 }, { "epoch": 0.021757343329072143, "grad_norm": 0.5364168882369995, "learning_rate": 1.997664812320812e-05, "loss": 0.3986, "step": 1205 }, { "epoch": 0.021847622761972857, "grad_norm": 0.5155913233757019, "learning_rate": 1.9976454005886176e-05, "loss": 0.353, "step": 1210 }, { "epoch": 0.021937902194873574, "grad_norm": 0.5517520308494568, "learning_rate": 1.997625908603104e-05, "loss": 0.4064, "step": 1215 }, { "epoch": 0.022028181627774288, "grad_norm": 0.4747216999530792, "learning_rate": 1.99760633636584e-05, "loss": 0.3965, "step": 1220 }, { "epoch": 0.022118461060675002, "grad_norm": 0.5541701316833496, "learning_rate": 1.997586683878399e-05, "loss": 0.4402, "step": 1225 }, { "epoch": 0.022208740493575716, "grad_norm": 0.3670824468135834, "learning_rate": 1.9975669511423628e-05, "loss": 0.4575, "step": 1230 }, { "epoch": 0.02229901992647643, "grad_norm": 0.36147984862327576, "learning_rate": 1.9975471381593184e-05, "loss": 0.2835, "step": 1235 }, { "epoch": 0.022389299359377144, "grad_norm": 0.7083765864372253, "learning_rate": 1.9975272449308595e-05, "loss": 0.4307, "step": 1240 }, { "epoch": 0.022479578792277858, "grad_norm": 0.5402193069458008, "learning_rate": 1.9975072714585863e-05, "loss": 0.3787, "step": 1245 }, { "epoch": 0.022569858225178572, "grad_norm": 0.6051380634307861, "learning_rate": 1.997487217744106e-05, "loss": 0.3636, "step": 1250 }, { "epoch": 0.022660137658079286, "grad_norm": 0.7156798243522644, "learning_rate": 1.9974670837890312e-05, "loss": 0.4672, "step": 1255 }, { "epoch": 0.02275041709098, "grad_norm": 1.068514347076416, "learning_rate": 1.997446869594982e-05, "loss": 0.4114, "step": 1260 }, { "epoch": 0.022840696523880717, "grad_norm": 0.926199197769165, "learning_rate": 1.9974265751635838e-05, "loss": 0.3667, "step": 1265 }, { "epoch": 0.02293097595678143, "grad_norm": 1.1801759004592896, "learning_rate": 1.9974062004964702e-05, "loss": 0.405, "step": 1270 }, { "epoch": 0.023021255389682145, "grad_norm": 0.38531816005706787, "learning_rate": 1.9973857455952795e-05, "loss": 0.4074, "step": 1275 }, { "epoch": 0.02311153482258286, "grad_norm": 0.48144111037254333, "learning_rate": 1.997365210461657e-05, "loss": 0.35, "step": 1280 }, { "epoch": 0.023201814255483573, "grad_norm": 0.4885618984699249, "learning_rate": 1.997344595097255e-05, "loss": 0.3415, "step": 1285 }, { "epoch": 0.023292093688384287, "grad_norm": 0.40975892543792725, "learning_rate": 1.997323899503732e-05, "loss": 0.3711, "step": 1290 }, { "epoch": 0.023382373121285, "grad_norm": 0.6145893335342407, "learning_rate": 1.9973031236827523e-05, "loss": 0.288, "step": 1295 }, { "epoch": 0.023472652554185715, "grad_norm": 0.7143402695655823, "learning_rate": 1.997282267635987e-05, "loss": 0.3985, "step": 1300 }, { "epoch": 0.02356293198708643, "grad_norm": 0.5758368372917175, "learning_rate": 1.997261331365115e-05, "loss": 0.3646, "step": 1305 }, { "epoch": 0.023653211419987143, "grad_norm": 0.7814615368843079, "learning_rate": 1.9972403148718193e-05, "loss": 0.4129, "step": 1310 }, { "epoch": 0.02374349085288786, "grad_norm": 0.404526948928833, "learning_rate": 1.9972192181577907e-05, "loss": 0.2616, "step": 1315 }, { "epoch": 0.023833770285788574, "grad_norm": 0.577202320098877, "learning_rate": 1.9971980412247266e-05, "loss": 0.4203, "step": 1320 }, { "epoch": 0.023924049718689288, "grad_norm": 0.5210227370262146, "learning_rate": 1.9971767840743305e-05, "loss": 0.4053, "step": 1325 }, { "epoch": 0.024014329151590002, "grad_norm": 0.5634742379188538, "learning_rate": 1.9971554467083123e-05, "loss": 0.3577, "step": 1330 }, { "epoch": 0.024104608584490716, "grad_norm": 0.6526023745536804, "learning_rate": 1.9971340291283883e-05, "loss": 0.3555, "step": 1335 }, { "epoch": 0.02419488801739143, "grad_norm": 0.5462536811828613, "learning_rate": 1.9971125313362818e-05, "loss": 0.3353, "step": 1340 }, { "epoch": 0.024285167450292144, "grad_norm": 0.7428454756736755, "learning_rate": 1.9970909533337216e-05, "loss": 0.391, "step": 1345 }, { "epoch": 0.024375446883192858, "grad_norm": 0.8522272706031799, "learning_rate": 1.9970692951224437e-05, "loss": 0.416, "step": 1350 }, { "epoch": 0.02446572631609357, "grad_norm": 0.5467855334281921, "learning_rate": 1.9970475567041907e-05, "loss": 0.3844, "step": 1355 }, { "epoch": 0.024556005748994286, "grad_norm": 0.5009450912475586, "learning_rate": 1.9970257380807107e-05, "loss": 0.3596, "step": 1360 }, { "epoch": 0.024646285181895003, "grad_norm": 0.9635720252990723, "learning_rate": 1.9970038392537593e-05, "loss": 0.4373, "step": 1365 }, { "epoch": 0.024736564614795717, "grad_norm": 1.1551811695098877, "learning_rate": 1.9969818602250982e-05, "loss": 0.447, "step": 1370 }, { "epoch": 0.02482684404769643, "grad_norm": 0.4225436747074127, "learning_rate": 1.996959800996495e-05, "loss": 0.3365, "step": 1375 }, { "epoch": 0.024917123480597145, "grad_norm": 0.512582540512085, "learning_rate": 1.996937661569724e-05, "loss": 0.374, "step": 1380 }, { "epoch": 0.02500740291349786, "grad_norm": 0.49112316966056824, "learning_rate": 1.996915441946567e-05, "loss": 0.2438, "step": 1385 }, { "epoch": 0.025097682346398573, "grad_norm": 0.48738208413124084, "learning_rate": 1.9968931421288112e-05, "loss": 0.29, "step": 1390 }, { "epoch": 0.025187961779299287, "grad_norm": 0.5263392925262451, "learning_rate": 1.9968707621182498e-05, "loss": 0.3913, "step": 1395 }, { "epoch": 0.0252782412122, "grad_norm": 0.4985851049423218, "learning_rate": 1.9968483019166838e-05, "loss": 0.3848, "step": 1400 }, { "epoch": 0.025368520645100714, "grad_norm": 0.5502220392227173, "learning_rate": 1.9968257615259192e-05, "loss": 0.2881, "step": 1405 }, { "epoch": 0.02545880007800143, "grad_norm": 0.7383748888969421, "learning_rate": 1.9968031409477703e-05, "loss": 0.3418, "step": 1410 }, { "epoch": 0.025549079510902146, "grad_norm": 0.4471767544746399, "learning_rate": 1.9967804401840558e-05, "loss": 0.3407, "step": 1415 }, { "epoch": 0.02563935894380286, "grad_norm": 2.0812489986419678, "learning_rate": 1.9967576592366023e-05, "loss": 0.3381, "step": 1420 }, { "epoch": 0.025729638376703574, "grad_norm": 0.6298109889030457, "learning_rate": 1.996734798107242e-05, "loss": 0.4014, "step": 1425 }, { "epoch": 0.025819917809604288, "grad_norm": 0.41603001952171326, "learning_rate": 1.9967118567978146e-05, "loss": 0.396, "step": 1430 }, { "epoch": 0.025910197242505, "grad_norm": 1.275228500366211, "learning_rate": 1.996688835310165e-05, "loss": 0.4296, "step": 1435 }, { "epoch": 0.026000476675405716, "grad_norm": 0.886063277721405, "learning_rate": 1.9966657336461452e-05, "loss": 0.4299, "step": 1440 }, { "epoch": 0.02609075610830643, "grad_norm": 0.6977846622467041, "learning_rate": 1.9966425518076136e-05, "loss": 0.4333, "step": 1445 }, { "epoch": 0.026181035541207143, "grad_norm": 0.26713240146636963, "learning_rate": 1.996619289796435e-05, "loss": 0.3639, "step": 1450 }, { "epoch": 0.026271314974107857, "grad_norm": 0.4383004605770111, "learning_rate": 1.9965959476144805e-05, "loss": 0.3157, "step": 1455 }, { "epoch": 0.02636159440700857, "grad_norm": 0.6261841058731079, "learning_rate": 1.9965725252636283e-05, "loss": 0.4359, "step": 1460 }, { "epoch": 0.02645187383990929, "grad_norm": 0.5010408759117126, "learning_rate": 1.9965490227457616e-05, "loss": 0.3469, "step": 1465 }, { "epoch": 0.026542153272810003, "grad_norm": 0.5412659645080566, "learning_rate": 1.9965254400627723e-05, "loss": 0.4394, "step": 1470 }, { "epoch": 0.026632432705710717, "grad_norm": 0.5586135387420654, "learning_rate": 1.9965017772165564e-05, "loss": 0.2828, "step": 1475 }, { "epoch": 0.02672271213861143, "grad_norm": 0.5518330931663513, "learning_rate": 1.996478034209018e-05, "loss": 0.2841, "step": 1480 }, { "epoch": 0.026812991571512144, "grad_norm": 0.39491382241249084, "learning_rate": 1.996454211042067e-05, "loss": 0.3561, "step": 1485 }, { "epoch": 0.02690327100441286, "grad_norm": 0.43156126141548157, "learning_rate": 1.9964303077176193e-05, "loss": 0.3998, "step": 1490 }, { "epoch": 0.026993550437313572, "grad_norm": 0.33977770805358887, "learning_rate": 1.996406324237598e-05, "loss": 0.3794, "step": 1495 }, { "epoch": 0.027083829870214286, "grad_norm": 0.9616397619247437, "learning_rate": 1.9963822606039328e-05, "loss": 0.4311, "step": 1500 }, { "epoch": 0.027174109303115, "grad_norm": 0.6763888597488403, "learning_rate": 1.996358116818559e-05, "loss": 0.3377, "step": 1505 }, { "epoch": 0.027264388736015714, "grad_norm": 0.40334710478782654, "learning_rate": 1.9963338928834188e-05, "loss": 0.3726, "step": 1510 }, { "epoch": 0.02735466816891643, "grad_norm": 0.5339846611022949, "learning_rate": 1.996309588800461e-05, "loss": 0.3261, "step": 1515 }, { "epoch": 0.027444947601817146, "grad_norm": 0.5830851197242737, "learning_rate": 1.996285204571641e-05, "loss": 0.4298, "step": 1520 }, { "epoch": 0.02753522703471786, "grad_norm": 0.5467507243156433, "learning_rate": 1.9962607401989196e-05, "loss": 0.4063, "step": 1525 }, { "epoch": 0.027625506467618573, "grad_norm": 0.7208880186080933, "learning_rate": 1.9962361956842655e-05, "loss": 0.2796, "step": 1530 }, { "epoch": 0.027715785900519287, "grad_norm": 0.5360137820243835, "learning_rate": 1.9962115710296525e-05, "loss": 0.3729, "step": 1535 }, { "epoch": 0.02780606533342, "grad_norm": 0.4716721773147583, "learning_rate": 1.996186866237062e-05, "loss": 0.3949, "step": 1540 }, { "epoch": 0.027896344766320715, "grad_norm": 0.5756022930145264, "learning_rate": 1.996162081308481e-05, "loss": 0.3719, "step": 1545 }, { "epoch": 0.02798662419922143, "grad_norm": 0.4410122036933899, "learning_rate": 1.996137216245903e-05, "loss": 0.3141, "step": 1550 }, { "epoch": 0.028076903632122143, "grad_norm": 0.4997660219669342, "learning_rate": 1.996112271051329e-05, "loss": 0.365, "step": 1555 }, { "epoch": 0.028167183065022857, "grad_norm": 0.47943833470344543, "learning_rate": 1.996087245726765e-05, "loss": 0.4094, "step": 1560 }, { "epoch": 0.028257462497923574, "grad_norm": 0.5912288427352905, "learning_rate": 1.9960621402742246e-05, "loss": 0.4728, "step": 1565 }, { "epoch": 0.02834774193082429, "grad_norm": 0.44753339886665344, "learning_rate": 1.9960369546957266e-05, "loss": 0.4006, "step": 1570 }, { "epoch": 0.028438021363725002, "grad_norm": 0.47656089067459106, "learning_rate": 1.9960116889932983e-05, "loss": 0.3909, "step": 1575 }, { "epoch": 0.028528300796625716, "grad_norm": 0.37938275933265686, "learning_rate": 1.995986343168971e-05, "loss": 0.3484, "step": 1580 }, { "epoch": 0.02861858022952643, "grad_norm": 0.3348734676837921, "learning_rate": 1.9959609172247835e-05, "loss": 0.2864, "step": 1585 }, { "epoch": 0.028708859662427144, "grad_norm": 0.4705940783023834, "learning_rate": 1.9959354111627817e-05, "loss": 0.3737, "step": 1590 }, { "epoch": 0.028799139095327858, "grad_norm": 0.819482684135437, "learning_rate": 1.9959098249850174e-05, "loss": 0.3681, "step": 1595 }, { "epoch": 0.028889418528228572, "grad_norm": 0.826997697353363, "learning_rate": 1.9958841586935484e-05, "loss": 0.3256, "step": 1600 }, { "epoch": 0.028979697961129286, "grad_norm": 0.9151831269264221, "learning_rate": 1.9958584122904398e-05, "loss": 0.3808, "step": 1605 }, { "epoch": 0.029069977394030003, "grad_norm": 0.39642009139060974, "learning_rate": 1.9958325857777625e-05, "loss": 0.3742, "step": 1610 }, { "epoch": 0.029160256826930717, "grad_norm": 0.5577180981636047, "learning_rate": 1.9958066791575943e-05, "loss": 0.2792, "step": 1615 }, { "epoch": 0.02925053625983143, "grad_norm": 0.4958969056606293, "learning_rate": 1.9957806924320188e-05, "loss": 0.4028, "step": 1620 }, { "epoch": 0.029340815692732145, "grad_norm": 0.34428122639656067, "learning_rate": 1.9957546256031266e-05, "loss": 0.4055, "step": 1625 }, { "epoch": 0.02943109512563286, "grad_norm": 0.48582723736763, "learning_rate": 1.9957284786730143e-05, "loss": 0.3664, "step": 1630 }, { "epoch": 0.029521374558533573, "grad_norm": 0.5791293978691101, "learning_rate": 1.995702251643786e-05, "loss": 0.2648, "step": 1635 }, { "epoch": 0.029611653991434287, "grad_norm": 0.4370667636394501, "learning_rate": 1.995675944517551e-05, "loss": 0.3348, "step": 1640 }, { "epoch": 0.029701933424335, "grad_norm": 0.4895092248916626, "learning_rate": 1.995649557296425e-05, "loss": 0.4108, "step": 1645 }, { "epoch": 0.029792212857235715, "grad_norm": 0.6840490102767944, "learning_rate": 1.995623089982532e-05, "loss": 0.3839, "step": 1650 }, { "epoch": 0.02988249229013643, "grad_norm": 0.683653712272644, "learning_rate": 1.9955965425779994e-05, "loss": 0.3478, "step": 1655 }, { "epoch": 0.029972771723037146, "grad_norm": 0.29711541533470154, "learning_rate": 1.9955699150849643e-05, "loss": 0.3065, "step": 1660 }, { "epoch": 0.03006305115593786, "grad_norm": 0.47336554527282715, "learning_rate": 1.9955432075055677e-05, "loss": 0.3156, "step": 1665 }, { "epoch": 0.030153330588838574, "grad_norm": 0.5812807083129883, "learning_rate": 1.9955164198419585e-05, "loss": 0.3848, "step": 1670 }, { "epoch": 0.030243610021739288, "grad_norm": 0.44053414463996887, "learning_rate": 1.9954895520962912e-05, "loss": 0.3991, "step": 1675 }, { "epoch": 0.030333889454640002, "grad_norm": 0.4269275367259979, "learning_rate": 1.9954626042707277e-05, "loss": 0.2343, "step": 1680 }, { "epoch": 0.030424168887540716, "grad_norm": 0.4034358263015747, "learning_rate": 1.9954355763674355e-05, "loss": 0.3132, "step": 1685 }, { "epoch": 0.03051444832044143, "grad_norm": 0.38301345705986023, "learning_rate": 1.9954084683885883e-05, "loss": 0.3286, "step": 1690 }, { "epoch": 0.030604727753342144, "grad_norm": 0.49178457260131836, "learning_rate": 1.995381280336367e-05, "loss": 0.3338, "step": 1695 }, { "epoch": 0.030695007186242858, "grad_norm": 0.5337426662445068, "learning_rate": 1.9953540122129592e-05, "loss": 0.3645, "step": 1700 }, { "epoch": 0.03078528661914357, "grad_norm": 0.4313508868217468, "learning_rate": 1.995326664020558e-05, "loss": 0.3102, "step": 1705 }, { "epoch": 0.03087556605204429, "grad_norm": 0.4087493419647217, "learning_rate": 1.9952992357613633e-05, "loss": 0.4064, "step": 1710 }, { "epoch": 0.030965845484945003, "grad_norm": 1.2327971458435059, "learning_rate": 1.9952717274375814e-05, "loss": 0.3662, "step": 1715 }, { "epoch": 0.031056124917845717, "grad_norm": 0.5065000057220459, "learning_rate": 1.9952441390514255e-05, "loss": 0.2931, "step": 1720 }, { "epoch": 0.03114640435074643, "grad_norm": 0.45024505257606506, "learning_rate": 1.995216470605115e-05, "loss": 0.4285, "step": 1725 }, { "epoch": 0.031236683783647145, "grad_norm": 0.3390149176120758, "learning_rate": 1.995188722100875e-05, "loss": 0.4599, "step": 1730 }, { "epoch": 0.03132696321654786, "grad_norm": 0.3781318962574005, "learning_rate": 1.9951608935409382e-05, "loss": 0.3843, "step": 1735 }, { "epoch": 0.031417242649448576, "grad_norm": 0.5293467044830322, "learning_rate": 1.9951329849275432e-05, "loss": 0.3104, "step": 1740 }, { "epoch": 0.03150752208234929, "grad_norm": 1.1434433460235596, "learning_rate": 1.9951049962629346e-05, "loss": 0.4619, "step": 1745 }, { "epoch": 0.031597801515250004, "grad_norm": 0.5122627019882202, "learning_rate": 1.9950769275493645e-05, "loss": 0.401, "step": 1750 }, { "epoch": 0.03168808094815072, "grad_norm": 0.9754158854484558, "learning_rate": 1.99504877878909e-05, "loss": 0.4172, "step": 1755 }, { "epoch": 0.03177836038105143, "grad_norm": 0.5180284976959229, "learning_rate": 1.9950205499843766e-05, "loss": 0.28, "step": 1760 }, { "epoch": 0.031868639813952146, "grad_norm": 0.5530410408973694, "learning_rate": 1.994992241137494e-05, "loss": 0.3866, "step": 1765 }, { "epoch": 0.03195891924685286, "grad_norm": 1.103765845298767, "learning_rate": 1.99496385225072e-05, "loss": 0.3099, "step": 1770 }, { "epoch": 0.032049198679753574, "grad_norm": 0.5604039430618286, "learning_rate": 1.9949353833263377e-05, "loss": 0.3052, "step": 1775 }, { "epoch": 0.03213947811265429, "grad_norm": 0.49411916732788086, "learning_rate": 1.9949068343666384e-05, "loss": 0.2955, "step": 1780 }, { "epoch": 0.032229757545555, "grad_norm": 1.085943579673767, "learning_rate": 1.9948782053739174e-05, "loss": 0.4603, "step": 1785 }, { "epoch": 0.032320036978455716, "grad_norm": 0.4952246844768524, "learning_rate": 1.9948494963504786e-05, "loss": 0.4334, "step": 1790 }, { "epoch": 0.03241031641135643, "grad_norm": 0.45003145933151245, "learning_rate": 1.994820707298631e-05, "loss": 0.4105, "step": 1795 }, { "epoch": 0.032500595844257144, "grad_norm": 0.525973916053772, "learning_rate": 1.9947918382206906e-05, "loss": 0.3267, "step": 1800 }, { "epoch": 0.03259087527715786, "grad_norm": 0.5335582494735718, "learning_rate": 1.9947628891189796e-05, "loss": 0.4224, "step": 1805 }, { "epoch": 0.03268115471005857, "grad_norm": 0.6103760004043579, "learning_rate": 1.9947338599958267e-05, "loss": 0.2956, "step": 1810 }, { "epoch": 0.032771434142959285, "grad_norm": 0.5102728009223938, "learning_rate": 1.9947047508535674e-05, "loss": 0.2478, "step": 1815 }, { "epoch": 0.03286171357586, "grad_norm": 0.42971786856651306, "learning_rate": 1.994675561694543e-05, "loss": 0.3884, "step": 1820 }, { "epoch": 0.03295199300876071, "grad_norm": 0.6633021831512451, "learning_rate": 1.9946462925211018e-05, "loss": 0.3489, "step": 1825 }, { "epoch": 0.03304227244166143, "grad_norm": 0.38056522607803345, "learning_rate": 1.994616943335598e-05, "loss": 0.4348, "step": 1830 }, { "epoch": 0.03313255187456215, "grad_norm": 0.6503346562385559, "learning_rate": 1.9945875141403924e-05, "loss": 0.3851, "step": 1835 }, { "epoch": 0.03322283130746286, "grad_norm": 0.3538060188293457, "learning_rate": 1.9945580049378527e-05, "loss": 0.3571, "step": 1840 }, { "epoch": 0.033313110740363576, "grad_norm": 0.5887143015861511, "learning_rate": 1.994528415730353e-05, "loss": 0.4758, "step": 1845 }, { "epoch": 0.03340339017326429, "grad_norm": 0.5996758937835693, "learning_rate": 1.994498746520273e-05, "loss": 0.3693, "step": 1850 }, { "epoch": 0.033493669606165004, "grad_norm": 1.738226056098938, "learning_rate": 1.9944689973099997e-05, "loss": 0.4114, "step": 1855 }, { "epoch": 0.03358394903906572, "grad_norm": 0.3167506754398346, "learning_rate": 1.9944391681019257e-05, "loss": 0.3131, "step": 1860 }, { "epoch": 0.03367422847196643, "grad_norm": 0.5447887778282166, "learning_rate": 1.9944092588984508e-05, "loss": 0.3885, "step": 1865 }, { "epoch": 0.033764507904867146, "grad_norm": 0.39655378460884094, "learning_rate": 1.9943792697019816e-05, "loss": 0.3641, "step": 1870 }, { "epoch": 0.03385478733776786, "grad_norm": 1.3213824033737183, "learning_rate": 1.9943492005149297e-05, "loss": 0.4111, "step": 1875 }, { "epoch": 0.033945066770668574, "grad_norm": 0.4718227982521057, "learning_rate": 1.994319051339714e-05, "loss": 0.3694, "step": 1880 }, { "epoch": 0.03403534620356929, "grad_norm": 0.5119079947471619, "learning_rate": 1.9942888221787604e-05, "loss": 0.3118, "step": 1885 }, { "epoch": 0.03412562563647, "grad_norm": 0.5912514328956604, "learning_rate": 1.9942585130345e-05, "loss": 0.3498, "step": 1890 }, { "epoch": 0.034215905069370715, "grad_norm": 1.2097115516662598, "learning_rate": 1.9942281239093716e-05, "loss": 0.3146, "step": 1895 }, { "epoch": 0.03430618450227143, "grad_norm": 0.5831785798072815, "learning_rate": 1.994197654805819e-05, "loss": 0.3613, "step": 1900 }, { "epoch": 0.03439646393517214, "grad_norm": 0.7582271099090576, "learning_rate": 1.9941671057262937e-05, "loss": 0.3127, "step": 1905 }, { "epoch": 0.03448674336807286, "grad_norm": 0.702990710735321, "learning_rate": 1.994136476673253e-05, "loss": 0.2743, "step": 1910 }, { "epoch": 0.03457702280097357, "grad_norm": 0.5540505647659302, "learning_rate": 1.9941057676491605e-05, "loss": 0.367, "step": 1915 }, { "epoch": 0.034667302233874285, "grad_norm": 0.39690011739730835, "learning_rate": 1.9940749786564876e-05, "loss": 0.2994, "step": 1920 }, { "epoch": 0.034757581666775, "grad_norm": 0.32354414463043213, "learning_rate": 1.9940441096977095e-05, "loss": 0.4053, "step": 1925 }, { "epoch": 0.03484786109967571, "grad_norm": 0.4380877912044525, "learning_rate": 1.994013160775311e-05, "loss": 0.5256, "step": 1930 }, { "epoch": 0.034938140532576434, "grad_norm": 0.5137713551521301, "learning_rate": 1.9939821318917804e-05, "loss": 0.3162, "step": 1935 }, { "epoch": 0.03502841996547715, "grad_norm": 0.4871731400489807, "learning_rate": 1.993951023049614e-05, "loss": 0.2895, "step": 1940 }, { "epoch": 0.03511869939837786, "grad_norm": 0.506823718547821, "learning_rate": 1.993919834251315e-05, "loss": 0.3808, "step": 1945 }, { "epoch": 0.035208978831278576, "grad_norm": 0.5929615497589111, "learning_rate": 1.9938885654993914e-05, "loss": 0.3662, "step": 1950 }, { "epoch": 0.03529925826417929, "grad_norm": 0.34095823764801025, "learning_rate": 1.993857216796359e-05, "loss": 0.3991, "step": 1955 }, { "epoch": 0.035389537697080004, "grad_norm": 0.46202680468559265, "learning_rate": 1.99382578814474e-05, "loss": 0.4335, "step": 1960 }, { "epoch": 0.03547981712998072, "grad_norm": 0.5477923154830933, "learning_rate": 1.993794279547062e-05, "loss": 0.4666, "step": 1965 }, { "epoch": 0.03557009656288143, "grad_norm": 1.0245890617370605, "learning_rate": 1.9937626910058597e-05, "loss": 0.3747, "step": 1970 }, { "epoch": 0.035660375995782145, "grad_norm": 1.0427522659301758, "learning_rate": 1.993731022523674e-05, "loss": 0.3872, "step": 1975 }, { "epoch": 0.03575065542868286, "grad_norm": 0.5643081665039062, "learning_rate": 1.993699274103053e-05, "loss": 0.2606, "step": 1980 }, { "epoch": 0.03584093486158357, "grad_norm": 0.4256623387336731, "learning_rate": 1.99366744574655e-05, "loss": 0.2413, "step": 1985 }, { "epoch": 0.03593121429448429, "grad_norm": 0.5165201425552368, "learning_rate": 1.9936355374567257e-05, "loss": 0.3004, "step": 1990 }, { "epoch": 0.036021493727385, "grad_norm": 0.3593834340572357, "learning_rate": 1.993603549236147e-05, "loss": 0.3883, "step": 1995 }, { "epoch": 0.036111773160285715, "grad_norm": 1.036332368850708, "learning_rate": 1.993571481087387e-05, "loss": 0.4099, "step": 2000 }, { "epoch": 0.03620205259318643, "grad_norm": 0.5267215371131897, "learning_rate": 1.9935393330130254e-05, "loss": 0.3242, "step": 2005 }, { "epoch": 0.03629233202608714, "grad_norm": 0.4293646216392517, "learning_rate": 1.993507105015648e-05, "loss": 0.3024, "step": 2010 }, { "epoch": 0.03638261145898786, "grad_norm": 0.9337967038154602, "learning_rate": 1.9934747970978475e-05, "loss": 0.3096, "step": 2015 }, { "epoch": 0.03647289089188857, "grad_norm": 0.48764199018478394, "learning_rate": 1.993442409262223e-05, "loss": 0.3644, "step": 2020 }, { "epoch": 0.036563170324789285, "grad_norm": 0.4670167565345764, "learning_rate": 1.9934099415113795e-05, "loss": 0.3789, "step": 2025 }, { "epoch": 0.03665344975769, "grad_norm": 0.4873073995113373, "learning_rate": 1.993377393847929e-05, "loss": 0.3646, "step": 2030 }, { "epoch": 0.03674372919059072, "grad_norm": 0.6277158260345459, "learning_rate": 1.9933447662744897e-05, "loss": 0.3741, "step": 2035 }, { "epoch": 0.036834008623491434, "grad_norm": 0.6023591756820679, "learning_rate": 1.9933120587936863e-05, "loss": 0.3908, "step": 2040 }, { "epoch": 0.03692428805639215, "grad_norm": 0.498462438583374, "learning_rate": 1.9932792714081502e-05, "loss": 0.3714, "step": 2045 }, { "epoch": 0.03701456748929286, "grad_norm": 0.3748553693294525, "learning_rate": 1.9932464041205182e-05, "loss": 0.4282, "step": 2050 }, { "epoch": 0.037104846922193575, "grad_norm": 0.5001702308654785, "learning_rate": 1.9932134569334346e-05, "loss": 0.3829, "step": 2055 }, { "epoch": 0.03719512635509429, "grad_norm": 0.4409927725791931, "learning_rate": 1.9931804298495497e-05, "loss": 0.3179, "step": 2060 }, { "epoch": 0.037285405787995, "grad_norm": 0.46347978711128235, "learning_rate": 1.9931473228715203e-05, "loss": 0.3888, "step": 2065 }, { "epoch": 0.03737568522089572, "grad_norm": 0.5000243782997131, "learning_rate": 1.9931141360020097e-05, "loss": 0.2796, "step": 2070 }, { "epoch": 0.03746596465379643, "grad_norm": 0.556724488735199, "learning_rate": 1.993080869243688e-05, "loss": 0.4378, "step": 2075 }, { "epoch": 0.037556244086697145, "grad_norm": 0.3234434723854065, "learning_rate": 1.99304752259923e-05, "loss": 0.3901, "step": 2080 }, { "epoch": 0.03764652351959786, "grad_norm": 0.42605525255203247, "learning_rate": 1.9930140960713195e-05, "loss": 0.378, "step": 2085 }, { "epoch": 0.03773680295249857, "grad_norm": 0.833888828754425, "learning_rate": 1.9929805896626446e-05, "loss": 0.386, "step": 2090 }, { "epoch": 0.03782708238539929, "grad_norm": 0.44782599806785583, "learning_rate": 1.992947003375901e-05, "loss": 0.3741, "step": 2095 }, { "epoch": 0.0379173618183, "grad_norm": 0.6248586773872375, "learning_rate": 1.9929133372137907e-05, "loss": 0.3939, "step": 2100 }, { "epoch": 0.038007641251200715, "grad_norm": 0.444442480802536, "learning_rate": 1.992879591179021e-05, "loss": 0.4162, "step": 2105 }, { "epoch": 0.03809792068410143, "grad_norm": 0.4670912027359009, "learning_rate": 1.9928457652743074e-05, "loss": 0.3047, "step": 2110 }, { "epoch": 0.03818820011700214, "grad_norm": 0.4275720715522766, "learning_rate": 1.9928118595023707e-05, "loss": 0.3561, "step": 2115 }, { "epoch": 0.03827847954990286, "grad_norm": 0.45719200372695923, "learning_rate": 1.9927778738659384e-05, "loss": 0.3426, "step": 2120 }, { "epoch": 0.03836875898280357, "grad_norm": 0.6436276435852051, "learning_rate": 1.9927438083677445e-05, "loss": 0.3548, "step": 2125 }, { "epoch": 0.03845903841570429, "grad_norm": 0.5185565948486328, "learning_rate": 1.9927096630105285e-05, "loss": 0.3818, "step": 2130 }, { "epoch": 0.038549317848605005, "grad_norm": 0.5306310057640076, "learning_rate": 1.9926754377970385e-05, "loss": 0.3823, "step": 2135 }, { "epoch": 0.03863959728150572, "grad_norm": 0.48837965726852417, "learning_rate": 1.9926411327300266e-05, "loss": 0.433, "step": 2140 }, { "epoch": 0.03872987671440643, "grad_norm": 0.46864208579063416, "learning_rate": 1.992606747812253e-05, "loss": 0.3655, "step": 2145 }, { "epoch": 0.03882015614730715, "grad_norm": 0.4254169464111328, "learning_rate": 1.9925722830464832e-05, "loss": 0.3287, "step": 2150 }, { "epoch": 0.03891043558020786, "grad_norm": 0.6717060208320618, "learning_rate": 1.9925377384354903e-05, "loss": 0.3643, "step": 2155 }, { "epoch": 0.039000715013108575, "grad_norm": 0.40732356905937195, "learning_rate": 1.9925031139820526e-05, "loss": 0.325, "step": 2160 }, { "epoch": 0.03909099444600929, "grad_norm": 0.48590540885925293, "learning_rate": 1.9924684096889556e-05, "loss": 0.3351, "step": 2165 }, { "epoch": 0.03918127387891, "grad_norm": 0.5301512479782104, "learning_rate": 1.992433625558991e-05, "loss": 0.3271, "step": 2170 }, { "epoch": 0.03927155331181072, "grad_norm": 0.48758119344711304, "learning_rate": 1.9923987615949568e-05, "loss": 0.3486, "step": 2175 }, { "epoch": 0.03936183274471143, "grad_norm": 0.4892788231372833, "learning_rate": 1.9923638177996575e-05, "loss": 0.4185, "step": 2180 }, { "epoch": 0.039452112177612145, "grad_norm": 0.4694100618362427, "learning_rate": 1.9923287941759048e-05, "loss": 0.374, "step": 2185 }, { "epoch": 0.03954239161051286, "grad_norm": 0.42079830169677734, "learning_rate": 1.9922936907265152e-05, "loss": 0.4314, "step": 2190 }, { "epoch": 0.03963267104341357, "grad_norm": 0.4797016978263855, "learning_rate": 1.9922585074543125e-05, "loss": 0.3264, "step": 2195 }, { "epoch": 0.03972295047631429, "grad_norm": 0.49687549471855164, "learning_rate": 1.992223244362128e-05, "loss": 0.3126, "step": 2200 }, { "epoch": 0.039813229909215, "grad_norm": 0.43118202686309814, "learning_rate": 1.9921879014527973e-05, "loss": 0.3377, "step": 2205 }, { "epoch": 0.039903509342115714, "grad_norm": 0.5688696503639221, "learning_rate": 1.9921524787291637e-05, "loss": 0.3186, "step": 2210 }, { "epoch": 0.03999378877501643, "grad_norm": 0.5296006798744202, "learning_rate": 1.992116976194077e-05, "loss": 0.3401, "step": 2215 }, { "epoch": 0.04008406820791714, "grad_norm": 0.5843896865844727, "learning_rate": 1.992081393850393e-05, "loss": 0.3502, "step": 2220 }, { "epoch": 0.040174347640817856, "grad_norm": 0.44793540239334106, "learning_rate": 1.9920457317009737e-05, "loss": 0.3152, "step": 2225 }, { "epoch": 0.04026462707371858, "grad_norm": 0.41331714391708374, "learning_rate": 1.9920099897486885e-05, "loss": 0.3549, "step": 2230 }, { "epoch": 0.04035490650661929, "grad_norm": 0.6284789443016052, "learning_rate": 1.9919741679964122e-05, "loss": 0.3694, "step": 2235 }, { "epoch": 0.040445185939520005, "grad_norm": 0.5507609844207764, "learning_rate": 1.9919382664470264e-05, "loss": 0.4005, "step": 2240 }, { "epoch": 0.04053546537242072, "grad_norm": 0.5600115656852722, "learning_rate": 1.991902285103419e-05, "loss": 0.3131, "step": 2245 }, { "epoch": 0.04062574480532143, "grad_norm": 0.42502927780151367, "learning_rate": 1.9918662239684848e-05, "loss": 0.2977, "step": 2250 }, { "epoch": 0.04071602423822215, "grad_norm": 0.5070359706878662, "learning_rate": 1.9918300830451243e-05, "loss": 0.4613, "step": 2255 }, { "epoch": 0.04080630367112286, "grad_norm": 1.3776845932006836, "learning_rate": 1.9917938623362447e-05, "loss": 0.4404, "step": 2260 }, { "epoch": 0.040896583104023575, "grad_norm": 0.45074161887168884, "learning_rate": 1.9917575618447604e-05, "loss": 0.2912, "step": 2265 }, { "epoch": 0.04098686253692429, "grad_norm": 0.41434893012046814, "learning_rate": 1.9917211815735906e-05, "loss": 0.3021, "step": 2270 }, { "epoch": 0.041077141969825, "grad_norm": 1.3090029954910278, "learning_rate": 1.9916847215256626e-05, "loss": 0.2964, "step": 2275 }, { "epoch": 0.04116742140272572, "grad_norm": 0.45356735587120056, "learning_rate": 1.991648181703909e-05, "loss": 0.286, "step": 2280 }, { "epoch": 0.04125770083562643, "grad_norm": 0.5527782440185547, "learning_rate": 1.9916115621112687e-05, "loss": 0.3823, "step": 2285 }, { "epoch": 0.041347980268527144, "grad_norm": 0.30888089537620544, "learning_rate": 1.9915748627506884e-05, "loss": 0.4878, "step": 2290 }, { "epoch": 0.04143825970142786, "grad_norm": 0.42406365275382996, "learning_rate": 1.9915380836251198e-05, "loss": 0.3513, "step": 2295 }, { "epoch": 0.04152853913432857, "grad_norm": 0.4733360707759857, "learning_rate": 1.991501224737521e-05, "loss": 0.3513, "step": 2300 }, { "epoch": 0.041618818567229286, "grad_norm": 0.38414448499679565, "learning_rate": 1.9914642860908583e-05, "loss": 0.4567, "step": 2305 }, { "epoch": 0.04170909800013, "grad_norm": 0.47997698187828064, "learning_rate": 1.991427267688102e-05, "loss": 0.309, "step": 2310 }, { "epoch": 0.041799377433030714, "grad_norm": 0.5346536636352539, "learning_rate": 1.9913901695322308e-05, "loss": 0.4237, "step": 2315 }, { "epoch": 0.04188965686593143, "grad_norm": 0.4030945897102356, "learning_rate": 1.9913529916262283e-05, "loss": 0.2438, "step": 2320 }, { "epoch": 0.04197993629883214, "grad_norm": 0.686960756778717, "learning_rate": 1.9913157339730854e-05, "loss": 0.4153, "step": 2325 }, { "epoch": 0.04207021573173286, "grad_norm": 0.4791950285434723, "learning_rate": 1.9912783965757995e-05, "loss": 0.3374, "step": 2330 }, { "epoch": 0.04216049516463358, "grad_norm": 0.4995405375957489, "learning_rate": 1.9912409794373736e-05, "loss": 0.3707, "step": 2335 }, { "epoch": 0.04225077459753429, "grad_norm": 0.35879209637641907, "learning_rate": 1.991203482560818e-05, "loss": 0.3106, "step": 2340 }, { "epoch": 0.042341054030435005, "grad_norm": 0.4058288335800171, "learning_rate": 1.9911659059491487e-05, "loss": 0.366, "step": 2345 }, { "epoch": 0.04243133346333572, "grad_norm": 0.40655022859573364, "learning_rate": 1.9911282496053893e-05, "loss": 0.3075, "step": 2350 }, { "epoch": 0.04252161289623643, "grad_norm": 0.5874953269958496, "learning_rate": 1.991090513532568e-05, "loss": 0.4062, "step": 2355 }, { "epoch": 0.042611892329137147, "grad_norm": 0.4730669856071472, "learning_rate": 1.991052697733721e-05, "loss": 0.3985, "step": 2360 }, { "epoch": 0.04270217176203786, "grad_norm": 0.4676113724708557, "learning_rate": 1.99101480221189e-05, "loss": 0.3341, "step": 2365 }, { "epoch": 0.042792451194938574, "grad_norm": 0.44648388028144836, "learning_rate": 1.9909768269701236e-05, "loss": 0.3104, "step": 2370 }, { "epoch": 0.04288273062783929, "grad_norm": 0.38011738657951355, "learning_rate": 1.9909387720114764e-05, "loss": 0.3112, "step": 2375 }, { "epoch": 0.04297301006074, "grad_norm": 0.31403690576553345, "learning_rate": 1.9909006373390102e-05, "loss": 0.2566, "step": 2380 }, { "epoch": 0.043063289493640716, "grad_norm": 0.4737754464149475, "learning_rate": 1.990862422955792e-05, "loss": 0.3378, "step": 2385 }, { "epoch": 0.04315356892654143, "grad_norm": 0.374519944190979, "learning_rate": 1.9908241288648963e-05, "loss": 0.4081, "step": 2390 }, { "epoch": 0.043243848359442144, "grad_norm": 0.42596983909606934, "learning_rate": 1.9907857550694034e-05, "loss": 0.4021, "step": 2395 }, { "epoch": 0.04333412779234286, "grad_norm": 0.490863561630249, "learning_rate": 1.9907473015724e-05, "loss": 0.372, "step": 2400 }, { "epoch": 0.04342440722524357, "grad_norm": 0.3635746240615845, "learning_rate": 1.9907087683769795e-05, "loss": 0.2196, "step": 2405 }, { "epoch": 0.043514686658144286, "grad_norm": 0.4454294443130493, "learning_rate": 1.9906701554862423e-05, "loss": 0.3902, "step": 2410 }, { "epoch": 0.043604966091045, "grad_norm": 0.325874388217926, "learning_rate": 1.9906314629032936e-05, "loss": 0.2661, "step": 2415 }, { "epoch": 0.043695245523945714, "grad_norm": 0.7062177062034607, "learning_rate": 1.9905926906312466e-05, "loss": 0.2619, "step": 2420 }, { "epoch": 0.04378552495684643, "grad_norm": 0.635353684425354, "learning_rate": 1.9905538386732196e-05, "loss": 0.3557, "step": 2425 }, { "epoch": 0.04387580438974715, "grad_norm": 0.8798350095748901, "learning_rate": 1.9905149070323383e-05, "loss": 0.3551, "step": 2430 }, { "epoch": 0.04396608382264786, "grad_norm": 0.7844729423522949, "learning_rate": 1.9904758957117345e-05, "loss": 0.364, "step": 2435 }, { "epoch": 0.044056363255548577, "grad_norm": 0.49181339144706726, "learning_rate": 1.9904368047145462e-05, "loss": 0.2548, "step": 2440 }, { "epoch": 0.04414664268844929, "grad_norm": 0.31904664635658264, "learning_rate": 1.9903976340439186e-05, "loss": 0.4465, "step": 2445 }, { "epoch": 0.044236922121350004, "grad_norm": 0.33480775356292725, "learning_rate": 1.990358383703002e-05, "loss": 0.3779, "step": 2450 }, { "epoch": 0.04432720155425072, "grad_norm": 0.541325032711029, "learning_rate": 1.990319053694954e-05, "loss": 0.393, "step": 2455 }, { "epoch": 0.04441748098715143, "grad_norm": 0.5086470246315002, "learning_rate": 1.9902796440229383e-05, "loss": 0.3949, "step": 2460 }, { "epoch": 0.044507760420052146, "grad_norm": 0.813579797744751, "learning_rate": 1.9902401546901253e-05, "loss": 0.3863, "step": 2465 }, { "epoch": 0.04459803985295286, "grad_norm": 0.42829373478889465, "learning_rate": 1.9902005856996918e-05, "loss": 0.3079, "step": 2470 }, { "epoch": 0.044688319285853574, "grad_norm": 0.6729962825775146, "learning_rate": 1.9901609370548204e-05, "loss": 0.4118, "step": 2475 }, { "epoch": 0.04477859871875429, "grad_norm": 0.3326593041419983, "learning_rate": 1.990121208758701e-05, "loss": 0.3975, "step": 2480 }, { "epoch": 0.044868878151655, "grad_norm": 0.4417182505130768, "learning_rate": 1.990081400814529e-05, "loss": 0.3568, "step": 2485 }, { "epoch": 0.044959157584555716, "grad_norm": 0.38405391573905945, "learning_rate": 1.990041513225507e-05, "loss": 0.3257, "step": 2490 }, { "epoch": 0.04504943701745643, "grad_norm": 0.48008227348327637, "learning_rate": 1.990001545994844e-05, "loss": 0.4107, "step": 2495 }, { "epoch": 0.045139716450357144, "grad_norm": 0.4501303732395172, "learning_rate": 1.9899614991257542e-05, "loss": 0.2903, "step": 2500 }, { "epoch": 0.04522999588325786, "grad_norm": 0.51317298412323, "learning_rate": 1.9899213726214593e-05, "loss": 0.3633, "step": 2505 }, { "epoch": 0.04532027531615857, "grad_norm": 0.4251177906990051, "learning_rate": 1.9898811664851878e-05, "loss": 0.3627, "step": 2510 }, { "epoch": 0.045410554749059286, "grad_norm": 0.45249614119529724, "learning_rate": 1.9898408807201736e-05, "loss": 0.3179, "step": 2515 }, { "epoch": 0.04550083418196, "grad_norm": 0.42034104466438293, "learning_rate": 1.989800515329657e-05, "loss": 0.3904, "step": 2520 }, { "epoch": 0.045591113614860714, "grad_norm": 0.6970406770706177, "learning_rate": 1.9897600703168857e-05, "loss": 0.4268, "step": 2525 }, { "epoch": 0.045681393047761434, "grad_norm": 0.4760364592075348, "learning_rate": 1.989719545685113e-05, "loss": 0.3631, "step": 2530 }, { "epoch": 0.04577167248066215, "grad_norm": 0.5618122220039368, "learning_rate": 1.9896789414375988e-05, "loss": 0.2708, "step": 2535 }, { "epoch": 0.04586195191356286, "grad_norm": 0.481415718793869, "learning_rate": 1.9896382575776093e-05, "loss": 0.3261, "step": 2540 }, { "epoch": 0.045952231346463576, "grad_norm": 0.5187461972236633, "learning_rate": 1.989597494108418e-05, "loss": 0.3711, "step": 2545 }, { "epoch": 0.04604251077936429, "grad_norm": 0.4853019714355469, "learning_rate": 1.9895566510333024e-05, "loss": 0.3891, "step": 2550 }, { "epoch": 0.046132790212265004, "grad_norm": 0.7737001776695251, "learning_rate": 1.9895157283555495e-05, "loss": 0.2272, "step": 2555 }, { "epoch": 0.04622306964516572, "grad_norm": 0.5728014707565308, "learning_rate": 1.9894747260784507e-05, "loss": 0.3846, "step": 2560 }, { "epoch": 0.04631334907806643, "grad_norm": 0.3807612657546997, "learning_rate": 1.9894336442053043e-05, "loss": 0.3339, "step": 2565 }, { "epoch": 0.046403628510967146, "grad_norm": 0.405606746673584, "learning_rate": 1.989392482739415e-05, "loss": 0.3682, "step": 2570 }, { "epoch": 0.04649390794386786, "grad_norm": 0.4326905906200409, "learning_rate": 1.9893512416840944e-05, "loss": 0.3738, "step": 2575 }, { "epoch": 0.046584187376768574, "grad_norm": 0.46612468361854553, "learning_rate": 1.989309921042659e-05, "loss": 0.2992, "step": 2580 }, { "epoch": 0.04667446680966929, "grad_norm": 0.4508918821811676, "learning_rate": 1.9892685208184343e-05, "loss": 0.3831, "step": 2585 }, { "epoch": 0.04676474624257, "grad_norm": 0.5471922159194946, "learning_rate": 1.9892270410147492e-05, "loss": 0.3873, "step": 2590 }, { "epoch": 0.046855025675470716, "grad_norm": 0.4468011260032654, "learning_rate": 1.9891854816349415e-05, "loss": 0.3566, "step": 2595 }, { "epoch": 0.04694530510837143, "grad_norm": 0.5099403262138367, "learning_rate": 1.9891438426823532e-05, "loss": 0.3611, "step": 2600 }, { "epoch": 0.047035584541272143, "grad_norm": 0.5567428469657898, "learning_rate": 1.989102124160335e-05, "loss": 0.3352, "step": 2605 }, { "epoch": 0.04712586397417286, "grad_norm": 0.4418875277042389, "learning_rate": 1.9890603260722424e-05, "loss": 0.3167, "step": 2610 }, { "epoch": 0.04721614340707357, "grad_norm": 0.47677722573280334, "learning_rate": 1.9890184484214375e-05, "loss": 0.3716, "step": 2615 }, { "epoch": 0.047306422839974285, "grad_norm": 0.5190219879150391, "learning_rate": 1.9889764912112893e-05, "loss": 0.3514, "step": 2620 }, { "epoch": 0.047396702272875, "grad_norm": 0.3794596195220947, "learning_rate": 1.988934454445173e-05, "loss": 0.3697, "step": 2625 }, { "epoch": 0.04748698170577572, "grad_norm": 0.375012069940567, "learning_rate": 1.9888923381264703e-05, "loss": 0.2698, "step": 2630 }, { "epoch": 0.047577261138676434, "grad_norm": 0.5114025473594666, "learning_rate": 1.9888501422585687e-05, "loss": 0.3313, "step": 2635 }, { "epoch": 0.04766754057157715, "grad_norm": 0.5759722590446472, "learning_rate": 1.9888078668448627e-05, "loss": 0.4041, "step": 2640 }, { "epoch": 0.04775782000447786, "grad_norm": 0.6198399662971497, "learning_rate": 1.988765511888753e-05, "loss": 0.2688, "step": 2645 }, { "epoch": 0.047848099437378576, "grad_norm": 0.4790531098842621, "learning_rate": 1.9887230773936474e-05, "loss": 0.3142, "step": 2650 }, { "epoch": 0.04793837887027929, "grad_norm": 0.4564208388328552, "learning_rate": 1.9886805633629587e-05, "loss": 0.3542, "step": 2655 }, { "epoch": 0.048028658303180004, "grad_norm": 0.3777180016040802, "learning_rate": 1.9886379698001074e-05, "loss": 0.3278, "step": 2660 }, { "epoch": 0.04811893773608072, "grad_norm": 0.34479618072509766, "learning_rate": 1.9885952967085188e-05, "loss": 0.3324, "step": 2665 }, { "epoch": 0.04820921716898143, "grad_norm": 0.5915319323539734, "learning_rate": 1.9885525440916268e-05, "loss": 0.2877, "step": 2670 }, { "epoch": 0.048299496601882146, "grad_norm": 0.4588766396045685, "learning_rate": 1.98850971195287e-05, "loss": 0.4066, "step": 2675 }, { "epoch": 0.04838977603478286, "grad_norm": 0.4266802966594696, "learning_rate": 1.9884668002956943e-05, "loss": 0.3398, "step": 2680 }, { "epoch": 0.048480055467683573, "grad_norm": 0.3396337032318115, "learning_rate": 1.988423809123551e-05, "loss": 0.3159, "step": 2685 }, { "epoch": 0.04857033490058429, "grad_norm": 1.1436978578567505, "learning_rate": 1.988380738439899e-05, "loss": 0.4754, "step": 2690 }, { "epoch": 0.048660614333485, "grad_norm": 0.47760069370269775, "learning_rate": 1.9883375882482027e-05, "loss": 0.2486, "step": 2695 }, { "epoch": 0.048750893766385715, "grad_norm": 0.5436611175537109, "learning_rate": 1.9882943585519338e-05, "loss": 0.3299, "step": 2700 }, { "epoch": 0.04884117319928643, "grad_norm": 0.482835054397583, "learning_rate": 1.9882510493545687e-05, "loss": 0.4476, "step": 2705 }, { "epoch": 0.04893145263218714, "grad_norm": 0.5951783657073975, "learning_rate": 1.988207660659592e-05, "loss": 0.3985, "step": 2710 }, { "epoch": 0.04902173206508786, "grad_norm": 1.275805115699768, "learning_rate": 1.9881641924704945e-05, "loss": 0.3928, "step": 2715 }, { "epoch": 0.04911201149798857, "grad_norm": 0.49138695001602173, "learning_rate": 1.988120644790772e-05, "loss": 0.318, "step": 2720 }, { "epoch": 0.04920229093088929, "grad_norm": 0.3850145936012268, "learning_rate": 1.9880770176239278e-05, "loss": 0.3826, "step": 2725 }, { "epoch": 0.049292570363790006, "grad_norm": 0.455759197473526, "learning_rate": 1.988033310973472e-05, "loss": 0.22, "step": 2730 }, { "epoch": 0.04938284979669072, "grad_norm": 0.39483642578125, "learning_rate": 1.98798952484292e-05, "loss": 0.3783, "step": 2735 }, { "epoch": 0.049473129229591434, "grad_norm": 0.3339937925338745, "learning_rate": 1.9879456592357934e-05, "loss": 0.3165, "step": 2740 }, { "epoch": 0.04956340866249215, "grad_norm": 0.40515002608299255, "learning_rate": 1.987901714155622e-05, "loss": 0.3021, "step": 2745 }, { "epoch": 0.04965368809539286, "grad_norm": 0.5087426900863647, "learning_rate": 1.9878576896059408e-05, "loss": 0.3483, "step": 2750 }, { "epoch": 0.049743967528293576, "grad_norm": 0.4974749982357025, "learning_rate": 1.9878135855902908e-05, "loss": 0.302, "step": 2755 }, { "epoch": 0.04983424696119429, "grad_norm": 0.48196542263031006, "learning_rate": 1.9877694021122194e-05, "loss": 0.4315, "step": 2760 }, { "epoch": 0.049924526394095003, "grad_norm": 0.511130154132843, "learning_rate": 1.987725139175282e-05, "loss": 0.35, "step": 2765 }, { "epoch": 0.05001480582699572, "grad_norm": 0.48580029606819153, "learning_rate": 1.987680796783038e-05, "loss": 0.3647, "step": 2770 }, { "epoch": 0.05010508525989643, "grad_norm": 0.538997232913971, "learning_rate": 1.9876363749390554e-05, "loss": 0.3305, "step": 2775 }, { "epoch": 0.050195364692797145, "grad_norm": 0.38143017888069153, "learning_rate": 1.9875918736469072e-05, "loss": 0.2437, "step": 2780 }, { "epoch": 0.05028564412569786, "grad_norm": 0.44034236669540405, "learning_rate": 1.9875472929101732e-05, "loss": 0.2925, "step": 2785 }, { "epoch": 0.05037592355859857, "grad_norm": 0.4986114799976349, "learning_rate": 1.9875026327324396e-05, "loss": 0.2893, "step": 2790 }, { "epoch": 0.05046620299149929, "grad_norm": 0.6320555806159973, "learning_rate": 1.9874578931172993e-05, "loss": 0.2763, "step": 2795 }, { "epoch": 0.0505564824244, "grad_norm": 0.5865118503570557, "learning_rate": 1.9874130740683507e-05, "loss": 0.4108, "step": 2800 }, { "epoch": 0.050646761857300715, "grad_norm": 0.541379988193512, "learning_rate": 1.9873681755891996e-05, "loss": 0.4155, "step": 2805 }, { "epoch": 0.05073704129020143, "grad_norm": 0.49120455980300903, "learning_rate": 1.9873231976834576e-05, "loss": 0.3032, "step": 2810 }, { "epoch": 0.05082732072310214, "grad_norm": 0.5522422194480896, "learning_rate": 1.987278140354743e-05, "loss": 0.4814, "step": 2815 }, { "epoch": 0.05091760015600286, "grad_norm": 0.34580957889556885, "learning_rate": 1.9872330036066803e-05, "loss": 0.3633, "step": 2820 }, { "epoch": 0.05100787958890358, "grad_norm": 0.5104445219039917, "learning_rate": 1.9871877874429e-05, "loss": 0.3664, "step": 2825 }, { "epoch": 0.05109815902180429, "grad_norm": 0.5858346223831177, "learning_rate": 1.98714249186704e-05, "loss": 0.3519, "step": 2830 }, { "epoch": 0.051188438454705006, "grad_norm": 0.43192100524902344, "learning_rate": 1.9870971168827436e-05, "loss": 0.3973, "step": 2835 }, { "epoch": 0.05127871788760572, "grad_norm": 0.380086213350296, "learning_rate": 1.987051662493661e-05, "loss": 0.2811, "step": 2840 }, { "epoch": 0.05136899732050643, "grad_norm": 0.7647920250892639, "learning_rate": 1.987006128703449e-05, "loss": 0.4462, "step": 2845 }, { "epoch": 0.05145927675340715, "grad_norm": 0.4594999849796295, "learning_rate": 1.98696051551577e-05, "loss": 0.2968, "step": 2850 }, { "epoch": 0.05154955618630786, "grad_norm": 0.43784573674201965, "learning_rate": 1.9869148229342933e-05, "loss": 0.3771, "step": 2855 }, { "epoch": 0.051639835619208575, "grad_norm": 0.454778254032135, "learning_rate": 1.9868690509626945e-05, "loss": 0.3421, "step": 2860 }, { "epoch": 0.05173011505210929, "grad_norm": 0.5888170599937439, "learning_rate": 1.9868231996046563e-05, "loss": 0.3305, "step": 2865 }, { "epoch": 0.05182039448501, "grad_norm": 0.3069855868816376, "learning_rate": 1.9867772688638663e-05, "loss": 0.2406, "step": 2870 }, { "epoch": 0.05191067391791072, "grad_norm": 0.3524327874183655, "learning_rate": 1.9867312587440197e-05, "loss": 0.2714, "step": 2875 }, { "epoch": 0.05200095335081143, "grad_norm": 0.574265718460083, "learning_rate": 1.9866851692488175e-05, "loss": 0.4171, "step": 2880 }, { "epoch": 0.052091232783712145, "grad_norm": 0.593289315700531, "learning_rate": 1.9866390003819674e-05, "loss": 0.4074, "step": 2885 }, { "epoch": 0.05218151221661286, "grad_norm": 0.4223598837852478, "learning_rate": 1.9865927521471832e-05, "loss": 0.2159, "step": 2890 }, { "epoch": 0.05227179164951357, "grad_norm": 0.6746003031730652, "learning_rate": 1.9865464245481854e-05, "loss": 0.4203, "step": 2895 }, { "epoch": 0.05236207108241429, "grad_norm": 0.3579443395137787, "learning_rate": 1.9865000175887005e-05, "loss": 0.2457, "step": 2900 }, { "epoch": 0.052452350515315, "grad_norm": 0.4429759979248047, "learning_rate": 1.9864535312724617e-05, "loss": 0.3532, "step": 2905 }, { "epoch": 0.052542629948215715, "grad_norm": 0.4187673032283783, "learning_rate": 1.9864069656032087e-05, "loss": 0.3091, "step": 2910 }, { "epoch": 0.05263290938111643, "grad_norm": 0.48153671622276306, "learning_rate": 1.9863603205846867e-05, "loss": 0.3176, "step": 2915 }, { "epoch": 0.05272318881401714, "grad_norm": 0.5366928577423096, "learning_rate": 1.986313596220649e-05, "loss": 0.3418, "step": 2920 }, { "epoch": 0.05281346824691786, "grad_norm": 0.3011767268180847, "learning_rate": 1.9862667925148537e-05, "loss": 0.3572, "step": 2925 }, { "epoch": 0.05290374767981858, "grad_norm": 0.41139376163482666, "learning_rate": 1.9862199094710655e-05, "loss": 0.3261, "step": 2930 }, { "epoch": 0.05299402711271929, "grad_norm": 1.1922186613082886, "learning_rate": 1.9861729470930562e-05, "loss": 0.2384, "step": 2935 }, { "epoch": 0.053084306545620005, "grad_norm": 0.5575213432312012, "learning_rate": 1.9861259053846034e-05, "loss": 0.2874, "step": 2940 }, { "epoch": 0.05317458597852072, "grad_norm": 0.6539455652236938, "learning_rate": 1.9860787843494917e-05, "loss": 0.3129, "step": 2945 }, { "epoch": 0.05326486541142143, "grad_norm": 0.5355772972106934, "learning_rate": 1.9860315839915107e-05, "loss": 0.3979, "step": 2950 }, { "epoch": 0.05335514484432215, "grad_norm": 0.3928343653678894, "learning_rate": 1.9859843043144586e-05, "loss": 0.3473, "step": 2955 }, { "epoch": 0.05344542427722286, "grad_norm": 0.33358311653137207, "learning_rate": 1.9859369453221374e-05, "loss": 0.4318, "step": 2960 }, { "epoch": 0.053535703710123575, "grad_norm": 0.4088621437549591, "learning_rate": 1.9858895070183575e-05, "loss": 0.4127, "step": 2965 }, { "epoch": 0.05362598314302429, "grad_norm": 0.419476717710495, "learning_rate": 1.985841989406935e-05, "loss": 0.2679, "step": 2970 }, { "epoch": 0.053716262575925, "grad_norm": 0.5436922311782837, "learning_rate": 1.985794392491692e-05, "loss": 0.351, "step": 2975 }, { "epoch": 0.05380654200882572, "grad_norm": 0.34839963912963867, "learning_rate": 1.985746716276458e-05, "loss": 0.4118, "step": 2980 }, { "epoch": 0.05389682144172643, "grad_norm": 0.5277071595191956, "learning_rate": 1.9856989607650675e-05, "loss": 0.3752, "step": 2985 }, { "epoch": 0.053987100874627145, "grad_norm": 0.5830458998680115, "learning_rate": 1.9856511259613622e-05, "loss": 0.3538, "step": 2990 }, { "epoch": 0.05407738030752786, "grad_norm": 0.3557010889053345, "learning_rate": 1.9856032118691906e-05, "loss": 0.2958, "step": 2995 }, { "epoch": 0.05416765974042857, "grad_norm": 0.5906873941421509, "learning_rate": 1.9855552184924062e-05, "loss": 0.3612, "step": 3000 }, { "epoch": 0.054257939173329287, "grad_norm": 0.569794237613678, "learning_rate": 1.98550714583487e-05, "loss": 0.364, "step": 3005 }, { "epoch": 0.05434821860623, "grad_norm": 0.3360729515552521, "learning_rate": 1.9854589939004497e-05, "loss": 0.3981, "step": 3010 }, { "epoch": 0.054438498039130714, "grad_norm": 0.4309172034263611, "learning_rate": 1.9854107626930185e-05, "loss": 0.3597, "step": 3015 }, { "epoch": 0.05452877747203143, "grad_norm": 0.31440845131874084, "learning_rate": 1.9853624522164558e-05, "loss": 0.4754, "step": 3020 }, { "epoch": 0.05461905690493215, "grad_norm": 0.3023746609687805, "learning_rate": 1.9853140624746478e-05, "loss": 0.3672, "step": 3025 }, { "epoch": 0.05470933633783286, "grad_norm": 0.36276915669441223, "learning_rate": 1.9852655934714878e-05, "loss": 0.3996, "step": 3030 }, { "epoch": 0.05479961577073358, "grad_norm": 0.493784636259079, "learning_rate": 1.9852170452108742e-05, "loss": 0.3379, "step": 3035 }, { "epoch": 0.05488989520363429, "grad_norm": 0.5143473148345947, "learning_rate": 1.9851684176967122e-05, "loss": 0.2968, "step": 3040 }, { "epoch": 0.054980174636535005, "grad_norm": 0.47927024960517883, "learning_rate": 1.9851197109329147e-05, "loss": 0.3886, "step": 3045 }, { "epoch": 0.05507045406943572, "grad_norm": 0.35105928778648376, "learning_rate": 1.9850709249233984e-05, "loss": 0.3185, "step": 3050 }, { "epoch": 0.05516073350233643, "grad_norm": 1.5786962509155273, "learning_rate": 1.985022059672088e-05, "loss": 0.4056, "step": 3055 }, { "epoch": 0.05525101293523715, "grad_norm": 0.4125722050666809, "learning_rate": 1.9849731151829154e-05, "loss": 0.3661, "step": 3060 }, { "epoch": 0.05534129236813786, "grad_norm": 0.4464266002178192, "learning_rate": 1.984924091459817e-05, "loss": 0.4217, "step": 3065 }, { "epoch": 0.055431571801038575, "grad_norm": 0.3512585759162903, "learning_rate": 1.9848749885067362e-05, "loss": 0.354, "step": 3070 }, { "epoch": 0.05552185123393929, "grad_norm": 0.7285350561141968, "learning_rate": 1.9848258063276237e-05, "loss": 0.3269, "step": 3075 }, { "epoch": 0.05561213066684, "grad_norm": 0.5315104126930237, "learning_rate": 1.984776544926435e-05, "loss": 0.2996, "step": 3080 }, { "epoch": 0.055702410099740716, "grad_norm": 0.40287673473358154, "learning_rate": 1.9847272043071336e-05, "loss": 0.3824, "step": 3085 }, { "epoch": 0.05579268953264143, "grad_norm": 0.47473129630088806, "learning_rate": 1.9846777844736883e-05, "loss": 0.3019, "step": 3090 }, { "epoch": 0.055882968965542144, "grad_norm": 0.30406618118286133, "learning_rate": 1.984628285430074e-05, "loss": 0.3529, "step": 3095 }, { "epoch": 0.05597324839844286, "grad_norm": 0.5010662078857422, "learning_rate": 1.984578707180274e-05, "loss": 0.3797, "step": 3100 }, { "epoch": 0.05606352783134357, "grad_norm": 0.4138856828212738, "learning_rate": 1.984529049728275e-05, "loss": 0.2762, "step": 3105 }, { "epoch": 0.056153807264244286, "grad_norm": 0.3830830454826355, "learning_rate": 1.9844793130780723e-05, "loss": 0.417, "step": 3110 }, { "epoch": 0.056244086697145, "grad_norm": 0.5126296877861023, "learning_rate": 1.984429497233667e-05, "loss": 0.3476, "step": 3115 }, { "epoch": 0.056334366130045714, "grad_norm": 0.49585217237472534, "learning_rate": 1.9843796021990656e-05, "loss": 0.3795, "step": 3120 }, { "epoch": 0.056424645562946435, "grad_norm": 0.4351014792919159, "learning_rate": 1.9843296279782825e-05, "loss": 0.3296, "step": 3125 }, { "epoch": 0.05651492499584715, "grad_norm": 0.37983760237693787, "learning_rate": 1.9842795745753376e-05, "loss": 0.351, "step": 3130 }, { "epoch": 0.05660520442874786, "grad_norm": 0.7745433449745178, "learning_rate": 1.9842294419942572e-05, "loss": 0.3034, "step": 3135 }, { "epoch": 0.05669548386164858, "grad_norm": 0.553109884262085, "learning_rate": 1.9841792302390747e-05, "loss": 0.3261, "step": 3140 }, { "epoch": 0.05678576329454929, "grad_norm": 0.6224424242973328, "learning_rate": 1.9841289393138287e-05, "loss": 0.2753, "step": 3145 }, { "epoch": 0.056876042727450005, "grad_norm": 0.6656163334846497, "learning_rate": 1.9840785692225645e-05, "loss": 0.3733, "step": 3150 }, { "epoch": 0.05696632216035072, "grad_norm": 0.4527309834957123, "learning_rate": 1.9840281199693345e-05, "loss": 0.4109, "step": 3155 }, { "epoch": 0.05705660159325143, "grad_norm": 0.41423696279525757, "learning_rate": 1.983977591558197e-05, "loss": 0.357, "step": 3160 }, { "epoch": 0.057146881026152146, "grad_norm": 0.5543522238731384, "learning_rate": 1.9839269839932162e-05, "loss": 0.393, "step": 3165 }, { "epoch": 0.05723716045905286, "grad_norm": 0.6002402305603027, "learning_rate": 1.9838762972784634e-05, "loss": 0.3379, "step": 3170 }, { "epoch": 0.057327439891953574, "grad_norm": 0.38091033697128296, "learning_rate": 1.9838255314180163e-05, "loss": 0.3197, "step": 3175 }, { "epoch": 0.05741771932485429, "grad_norm": 0.34828096628189087, "learning_rate": 1.9837746864159578e-05, "loss": 0.3275, "step": 3180 }, { "epoch": 0.057507998757755, "grad_norm": 0.5166292190551758, "learning_rate": 1.9837237622763788e-05, "loss": 0.345, "step": 3185 }, { "epoch": 0.057598278190655716, "grad_norm": 0.37860623002052307, "learning_rate": 1.9836727590033756e-05, "loss": 0.2667, "step": 3190 }, { "epoch": 0.05768855762355643, "grad_norm": 0.48768389225006104, "learning_rate": 1.9836216766010507e-05, "loss": 0.4015, "step": 3195 }, { "epoch": 0.057778837056457144, "grad_norm": 0.44820886850357056, "learning_rate": 1.9835705150735135e-05, "loss": 0.3085, "step": 3200 }, { "epoch": 0.05786911648935786, "grad_norm": 0.9818539023399353, "learning_rate": 1.98351927442488e-05, "loss": 0.353, "step": 3205 }, { "epoch": 0.05795939592225857, "grad_norm": 0.42301586270332336, "learning_rate": 1.9834679546592713e-05, "loss": 0.3963, "step": 3210 }, { "epoch": 0.058049675355159286, "grad_norm": 0.537584125995636, "learning_rate": 1.9834165557808162e-05, "loss": 0.3601, "step": 3215 }, { "epoch": 0.05813995478806001, "grad_norm": 0.5891759395599365, "learning_rate": 1.9833650777936495e-05, "loss": 0.4072, "step": 3220 }, { "epoch": 0.05823023422096072, "grad_norm": 0.5722532272338867, "learning_rate": 1.9833135207019122e-05, "loss": 0.376, "step": 3225 }, { "epoch": 0.058320513653861435, "grad_norm": 0.45028820633888245, "learning_rate": 1.983261884509751e-05, "loss": 0.287, "step": 3230 }, { "epoch": 0.05841079308676215, "grad_norm": 0.4146955609321594, "learning_rate": 1.983210169221321e-05, "loss": 0.428, "step": 3235 }, { "epoch": 0.05850107251966286, "grad_norm": 0.48254334926605225, "learning_rate": 1.9831583748407807e-05, "loss": 0.2921, "step": 3240 }, { "epoch": 0.058591351952563576, "grad_norm": 0.5707606077194214, "learning_rate": 1.983106501372298e-05, "loss": 0.326, "step": 3245 }, { "epoch": 0.05868163138546429, "grad_norm": 0.43480393290519714, "learning_rate": 1.983054548820045e-05, "loss": 0.3006, "step": 3250 }, { "epoch": 0.058771910818365004, "grad_norm": 0.29172283411026, "learning_rate": 1.983002517188201e-05, "loss": 0.3144, "step": 3255 }, { "epoch": 0.05886219025126572, "grad_norm": 0.6074573993682861, "learning_rate": 1.982950406480951e-05, "loss": 0.3375, "step": 3260 }, { "epoch": 0.05895246968416643, "grad_norm": 0.42056509852409363, "learning_rate": 1.9828982167024882e-05, "loss": 0.3143, "step": 3265 }, { "epoch": 0.059042749117067146, "grad_norm": 0.6018386483192444, "learning_rate": 1.98284594785701e-05, "loss": 0.4923, "step": 3270 }, { "epoch": 0.05913302854996786, "grad_norm": 0.32788190245628357, "learning_rate": 1.9827935999487215e-05, "loss": 0.3249, "step": 3275 }, { "epoch": 0.059223307982868574, "grad_norm": 0.31291717290878296, "learning_rate": 1.9827411729818332e-05, "loss": 0.2892, "step": 3280 }, { "epoch": 0.05931358741576929, "grad_norm": 0.5440911650657654, "learning_rate": 1.9826886669605633e-05, "loss": 0.2986, "step": 3285 }, { "epoch": 0.05940386684867, "grad_norm": 0.2649855315685272, "learning_rate": 1.9826360818891342e-05, "loss": 0.287, "step": 3290 }, { "epoch": 0.059494146281570716, "grad_norm": 0.6282209753990173, "learning_rate": 1.9825834177717774e-05, "loss": 0.3779, "step": 3295 }, { "epoch": 0.05958442571447143, "grad_norm": 0.37331846356391907, "learning_rate": 1.9825306746127286e-05, "loss": 0.3176, "step": 3300 }, { "epoch": 0.059674705147372144, "grad_norm": 0.43291202187538147, "learning_rate": 1.9824778524162304e-05, "loss": 0.4459, "step": 3305 }, { "epoch": 0.05976498458027286, "grad_norm": 1.268259882926941, "learning_rate": 1.9824249511865327e-05, "loss": 0.2672, "step": 3310 }, { "epoch": 0.05985526401317357, "grad_norm": 0.6148844957351685, "learning_rate": 1.98237197092789e-05, "loss": 0.3542, "step": 3315 }, { "epoch": 0.05994554344607429, "grad_norm": 0.43036696314811707, "learning_rate": 1.9823189116445655e-05, "loss": 0.3425, "step": 3320 }, { "epoch": 0.060035822878975006, "grad_norm": 0.45163917541503906, "learning_rate": 1.9822657733408263e-05, "loss": 0.3049, "step": 3325 }, { "epoch": 0.06012610231187572, "grad_norm": 0.32905444502830505, "learning_rate": 1.9822125560209475e-05, "loss": 0.3319, "step": 3330 }, { "epoch": 0.060216381744776434, "grad_norm": 0.44667723774909973, "learning_rate": 1.9821592596892095e-05, "loss": 0.4263, "step": 3335 }, { "epoch": 0.06030666117767715, "grad_norm": 0.36804917454719543, "learning_rate": 1.9821058843499005e-05, "loss": 0.3912, "step": 3340 }, { "epoch": 0.06039694061057786, "grad_norm": 0.4349153935909271, "learning_rate": 1.9820524300073138e-05, "loss": 0.3845, "step": 3345 }, { "epoch": 0.060487220043478576, "grad_norm": 0.32116422057151794, "learning_rate": 1.981998896665749e-05, "loss": 0.386, "step": 3350 }, { "epoch": 0.06057749947637929, "grad_norm": 0.4326367974281311, "learning_rate": 1.981945284329513e-05, "loss": 0.3779, "step": 3355 }, { "epoch": 0.060667778909280004, "grad_norm": 0.49139365553855896, "learning_rate": 1.981891593002918e-05, "loss": 0.3826, "step": 3360 }, { "epoch": 0.06075805834218072, "grad_norm": 0.27795523405075073, "learning_rate": 1.981837822690284e-05, "loss": 0.2998, "step": 3365 }, { "epoch": 0.06084833777508143, "grad_norm": 0.4365173578262329, "learning_rate": 1.9817839733959353e-05, "loss": 0.3052, "step": 3370 }, { "epoch": 0.060938617207982146, "grad_norm": 0.4508330523967743, "learning_rate": 1.981730045124204e-05, "loss": 0.3769, "step": 3375 }, { "epoch": 0.06102889664088286, "grad_norm": 0.2785418629646301, "learning_rate": 1.9816760378794293e-05, "loss": 0.3254, "step": 3380 }, { "epoch": 0.061119176073783574, "grad_norm": 0.4931766390800476, "learning_rate": 1.9816219516659544e-05, "loss": 0.4362, "step": 3385 }, { "epoch": 0.06120945550668429, "grad_norm": 0.3930407166481018, "learning_rate": 1.98156778648813e-05, "loss": 0.3377, "step": 3390 }, { "epoch": 0.061299734939585, "grad_norm": 0.542591392993927, "learning_rate": 1.9815135423503147e-05, "loss": 0.3106, "step": 3395 }, { "epoch": 0.061390014372485716, "grad_norm": 0.5310285091400146, "learning_rate": 1.9814592192568705e-05, "loss": 0.3922, "step": 3400 }, { "epoch": 0.06148029380538643, "grad_norm": 0.4742412865161896, "learning_rate": 1.9814048172121687e-05, "loss": 0.3593, "step": 3405 }, { "epoch": 0.06157057323828714, "grad_norm": 0.4899846315383911, "learning_rate": 1.9813503362205844e-05, "loss": 0.4096, "step": 3410 }, { "epoch": 0.06166085267118786, "grad_norm": 0.3952639400959015, "learning_rate": 1.981295776286501e-05, "loss": 0.3031, "step": 3415 }, { "epoch": 0.06175113210408858, "grad_norm": 0.2628712058067322, "learning_rate": 1.9812411374143066e-05, "loss": 0.39, "step": 3420 }, { "epoch": 0.06184141153698929, "grad_norm": 0.44826173782348633, "learning_rate": 1.9811864196083974e-05, "loss": 0.431, "step": 3425 }, { "epoch": 0.061931690969890006, "grad_norm": 0.3274790048599243, "learning_rate": 1.9811316228731745e-05, "loss": 0.3531, "step": 3430 }, { "epoch": 0.06202197040279072, "grad_norm": 0.4401794672012329, "learning_rate": 1.9810767472130462e-05, "loss": 0.4226, "step": 3435 }, { "epoch": 0.062112249835691434, "grad_norm": 0.3578872084617615, "learning_rate": 1.9810217926324266e-05, "loss": 0.3608, "step": 3440 }, { "epoch": 0.06220252926859215, "grad_norm": 0.5258836150169373, "learning_rate": 1.9809667591357364e-05, "loss": 0.3388, "step": 3445 }, { "epoch": 0.06229280870149286, "grad_norm": 0.4620509743690491, "learning_rate": 1.9809116467274027e-05, "loss": 0.302, "step": 3450 }, { "epoch": 0.062383088134393576, "grad_norm": 0.7103094458580017, "learning_rate": 1.9808564554118593e-05, "loss": 0.4503, "step": 3455 }, { "epoch": 0.06247336756729429, "grad_norm": 0.4419422447681427, "learning_rate": 1.9808011851935453e-05, "loss": 0.3287, "step": 3460 }, { "epoch": 0.062563647000195, "grad_norm": 0.34888213872909546, "learning_rate": 1.980745836076907e-05, "loss": 0.3216, "step": 3465 }, { "epoch": 0.06265392643309572, "grad_norm": 0.4253569543361664, "learning_rate": 1.9806904080663973e-05, "loss": 0.2493, "step": 3470 }, { "epoch": 0.06274420586599644, "grad_norm": 0.32277703285217285, "learning_rate": 1.9806349011664743e-05, "loss": 0.2501, "step": 3475 }, { "epoch": 0.06283448529889715, "grad_norm": 0.6440743803977966, "learning_rate": 1.9805793153816033e-05, "loss": 0.4372, "step": 3480 }, { "epoch": 0.06292476473179787, "grad_norm": 0.34801673889160156, "learning_rate": 1.980523650716256e-05, "loss": 0.3797, "step": 3485 }, { "epoch": 0.06301504416469858, "grad_norm": 0.789560079574585, "learning_rate": 1.98046790717491e-05, "loss": 0.3145, "step": 3490 }, { "epoch": 0.0631053235975993, "grad_norm": 0.33960312604904175, "learning_rate": 1.9804120847620497e-05, "loss": 0.3943, "step": 3495 }, { "epoch": 0.06319560303050001, "grad_norm": 0.5149698853492737, "learning_rate": 1.9803561834821657e-05, "loss": 0.2716, "step": 3500 }, { "epoch": 0.06328588246340072, "grad_norm": 0.5456732511520386, "learning_rate": 1.9803002033397542e-05, "loss": 0.4073, "step": 3505 }, { "epoch": 0.06337616189630144, "grad_norm": 0.4857495129108429, "learning_rate": 1.9802441443393187e-05, "loss": 0.3824, "step": 3510 }, { "epoch": 0.06346644132920215, "grad_norm": 0.458262175321579, "learning_rate": 1.9801880064853694e-05, "loss": 0.356, "step": 3515 }, { "epoch": 0.06355672076210286, "grad_norm": 0.4099235534667969, "learning_rate": 1.9801317897824214e-05, "loss": 0.3158, "step": 3520 }, { "epoch": 0.06364700019500358, "grad_norm": 0.45231691002845764, "learning_rate": 1.9800754942349967e-05, "loss": 0.393, "step": 3525 }, { "epoch": 0.06373727962790429, "grad_norm": 0.3657291829586029, "learning_rate": 1.9800191198476248e-05, "loss": 0.3686, "step": 3530 }, { "epoch": 0.063827559060805, "grad_norm": 0.4763554036617279, "learning_rate": 1.9799626666248405e-05, "loss": 0.3492, "step": 3535 }, { "epoch": 0.06391783849370572, "grad_norm": 0.4591434597969055, "learning_rate": 1.9799061345711837e-05, "loss": 0.2811, "step": 3540 }, { "epoch": 0.06400811792660643, "grad_norm": 0.3183038830757141, "learning_rate": 1.9798495236912035e-05, "loss": 0.3524, "step": 3545 }, { "epoch": 0.06409839735950715, "grad_norm": 0.2819090187549591, "learning_rate": 1.9797928339894537e-05, "loss": 0.313, "step": 3550 }, { "epoch": 0.06418867679240786, "grad_norm": 0.4335143268108368, "learning_rate": 1.9797360654704937e-05, "loss": 0.3756, "step": 3555 }, { "epoch": 0.06427895622530858, "grad_norm": 0.6104137301445007, "learning_rate": 1.9796792181388905e-05, "loss": 0.3316, "step": 3560 }, { "epoch": 0.06436923565820929, "grad_norm": 1.3540409803390503, "learning_rate": 1.9796222919992176e-05, "loss": 0.309, "step": 3565 }, { "epoch": 0.06445951509111, "grad_norm": 0.599658191204071, "learning_rate": 1.9795652870560537e-05, "loss": 0.452, "step": 3570 }, { "epoch": 0.06454979452401072, "grad_norm": 0.5024389028549194, "learning_rate": 1.9795082033139846e-05, "loss": 0.4017, "step": 3575 }, { "epoch": 0.06464007395691143, "grad_norm": 0.6276569962501526, "learning_rate": 1.979451040777602e-05, "loss": 0.3668, "step": 3580 }, { "epoch": 0.06473035338981215, "grad_norm": 0.4447115659713745, "learning_rate": 1.9793937994515046e-05, "loss": 0.3121, "step": 3585 }, { "epoch": 0.06482063282271286, "grad_norm": 0.36672645807266235, "learning_rate": 1.979336479340297e-05, "loss": 0.3228, "step": 3590 }, { "epoch": 0.06491091225561357, "grad_norm": 0.5025267004966736, "learning_rate": 1.97927908044859e-05, "loss": 0.3724, "step": 3595 }, { "epoch": 0.06500119168851429, "grad_norm": 0.6396102905273438, "learning_rate": 1.9792216027810013e-05, "loss": 0.3236, "step": 3600 }, { "epoch": 0.065091471121415, "grad_norm": 0.36236289143562317, "learning_rate": 1.979164046342154e-05, "loss": 0.3841, "step": 3605 }, { "epoch": 0.06518175055431571, "grad_norm": 0.45055314898490906, "learning_rate": 1.9791064111366783e-05, "loss": 0.3967, "step": 3610 }, { "epoch": 0.06527202998721643, "grad_norm": 0.4135984778404236, "learning_rate": 1.979048697169211e-05, "loss": 0.2799, "step": 3615 }, { "epoch": 0.06536230942011714, "grad_norm": 0.617309033870697, "learning_rate": 1.9789909044443944e-05, "loss": 0.2757, "step": 3620 }, { "epoch": 0.06545258885301786, "grad_norm": 0.489812970161438, "learning_rate": 1.978933032966877e-05, "loss": 0.367, "step": 3625 }, { "epoch": 0.06554286828591857, "grad_norm": 0.4110862612724304, "learning_rate": 1.978875082741315e-05, "loss": 0.3243, "step": 3630 }, { "epoch": 0.06563314771881928, "grad_norm": 0.39427581429481506, "learning_rate": 1.9788170537723694e-05, "loss": 0.3962, "step": 3635 }, { "epoch": 0.06572342715172, "grad_norm": 0.44661369919776917, "learning_rate": 1.9787589460647084e-05, "loss": 0.3356, "step": 3640 }, { "epoch": 0.06581370658462071, "grad_norm": 0.9452258944511414, "learning_rate": 1.978700759623007e-05, "loss": 0.3642, "step": 3645 }, { "epoch": 0.06590398601752143, "grad_norm": 0.38086244463920593, "learning_rate": 1.978642494451945e-05, "loss": 0.3563, "step": 3650 }, { "epoch": 0.06599426545042214, "grad_norm": 0.6124266386032104, "learning_rate": 1.97858415055621e-05, "loss": 0.3346, "step": 3655 }, { "epoch": 0.06608454488332285, "grad_norm": 0.5609608888626099, "learning_rate": 1.978525727940495e-05, "loss": 0.401, "step": 3660 }, { "epoch": 0.06617482431622357, "grad_norm": 0.46799227595329285, "learning_rate": 1.9784672266094994e-05, "loss": 0.3334, "step": 3665 }, { "epoch": 0.0662651037491243, "grad_norm": 0.41846963763237, "learning_rate": 1.97840864656793e-05, "loss": 0.3084, "step": 3670 }, { "epoch": 0.06635538318202501, "grad_norm": 0.5600510239601135, "learning_rate": 1.9783499878204985e-05, "loss": 0.3431, "step": 3675 }, { "epoch": 0.06644566261492572, "grad_norm": 1.7698122262954712, "learning_rate": 1.9782912503719234e-05, "loss": 0.3264, "step": 3680 }, { "epoch": 0.06653594204782644, "grad_norm": 0.43976593017578125, "learning_rate": 1.9782324342269308e-05, "loss": 0.3047, "step": 3685 }, { "epoch": 0.06662622148072715, "grad_norm": 0.3489255905151367, "learning_rate": 1.9781735393902507e-05, "loss": 0.3868, "step": 3690 }, { "epoch": 0.06671650091362787, "grad_norm": 0.4278669059276581, "learning_rate": 1.978114565866622e-05, "loss": 0.4227, "step": 3695 }, { "epoch": 0.06680678034652858, "grad_norm": 0.37567952275276184, "learning_rate": 1.9780555136607873e-05, "loss": 0.3084, "step": 3700 }, { "epoch": 0.0668970597794293, "grad_norm": 0.651716411113739, "learning_rate": 1.977996382777498e-05, "loss": 0.2765, "step": 3705 }, { "epoch": 0.06698733921233001, "grad_norm": 0.4037342667579651, "learning_rate": 1.9779371732215107e-05, "loss": 0.3601, "step": 3710 }, { "epoch": 0.06707761864523072, "grad_norm": 0.31218260526657104, "learning_rate": 1.977877884997588e-05, "loss": 0.3889, "step": 3715 }, { "epoch": 0.06716789807813144, "grad_norm": 0.37937313318252563, "learning_rate": 1.977818518110499e-05, "loss": 0.4928, "step": 3720 }, { "epoch": 0.06725817751103215, "grad_norm": 0.33725881576538086, "learning_rate": 1.97775907256502e-05, "loss": 0.3793, "step": 3725 }, { "epoch": 0.06734845694393286, "grad_norm": 0.387472927570343, "learning_rate": 1.9776995483659323e-05, "loss": 0.4042, "step": 3730 }, { "epoch": 0.06743873637683358, "grad_norm": 0.37021878361701965, "learning_rate": 1.977639945518025e-05, "loss": 0.2947, "step": 3735 }, { "epoch": 0.06752901580973429, "grad_norm": 0.4049207270145416, "learning_rate": 1.9775802640260916e-05, "loss": 0.317, "step": 3740 }, { "epoch": 0.067619295242635, "grad_norm": 0.4391418397426605, "learning_rate": 1.977520503894934e-05, "loss": 0.276, "step": 3745 }, { "epoch": 0.06770957467553572, "grad_norm": 0.396352618932724, "learning_rate": 1.977460665129359e-05, "loss": 0.3707, "step": 3750 }, { "epoch": 0.06779985410843643, "grad_norm": 1.2072421312332153, "learning_rate": 1.9774007477341802e-05, "loss": 0.3005, "step": 3755 }, { "epoch": 0.06789013354133715, "grad_norm": 0.5573527216911316, "learning_rate": 1.977340751714218e-05, "loss": 0.4486, "step": 3760 }, { "epoch": 0.06798041297423786, "grad_norm": 0.41838520765304565, "learning_rate": 1.977280677074298e-05, "loss": 0.3391, "step": 3765 }, { "epoch": 0.06807069240713857, "grad_norm": 0.32235589623451233, "learning_rate": 1.977220523819253e-05, "loss": 0.3726, "step": 3770 }, { "epoch": 0.06816097184003929, "grad_norm": 0.37072479724884033, "learning_rate": 1.9771602919539223e-05, "loss": 0.323, "step": 3775 }, { "epoch": 0.06825125127294, "grad_norm": 0.4548185169696808, "learning_rate": 1.9770999814831504e-05, "loss": 0.3868, "step": 3780 }, { "epoch": 0.06834153070584072, "grad_norm": 0.4475475549697876, "learning_rate": 1.9770395924117895e-05, "loss": 0.2801, "step": 3785 }, { "epoch": 0.06843181013874143, "grad_norm": 0.501424252986908, "learning_rate": 1.9769791247446968e-05, "loss": 0.3281, "step": 3790 }, { "epoch": 0.06852208957164214, "grad_norm": 0.41786015033721924, "learning_rate": 1.976918578486737e-05, "loss": 0.4034, "step": 3795 }, { "epoch": 0.06861236900454286, "grad_norm": 0.45296943187713623, "learning_rate": 1.9768579536427804e-05, "loss": 0.4663, "step": 3800 }, { "epoch": 0.06870264843744357, "grad_norm": 0.5736542344093323, "learning_rate": 1.976797250217704e-05, "loss": 0.3618, "step": 3805 }, { "epoch": 0.06879292787034429, "grad_norm": 0.3980604112148285, "learning_rate": 1.9767364682163905e-05, "loss": 0.3069, "step": 3810 }, { "epoch": 0.068883207303245, "grad_norm": 0.5466489195823669, "learning_rate": 1.97667560764373e-05, "loss": 0.3198, "step": 3815 }, { "epoch": 0.06897348673614571, "grad_norm": 0.7333075404167175, "learning_rate": 1.9766146685046183e-05, "loss": 0.3152, "step": 3820 }, { "epoch": 0.06906376616904643, "grad_norm": 0.4144575893878937, "learning_rate": 1.9765536508039568e-05, "loss": 0.3104, "step": 3825 }, { "epoch": 0.06915404560194714, "grad_norm": 0.6920064091682434, "learning_rate": 1.976492554546654e-05, "loss": 0.3001, "step": 3830 }, { "epoch": 0.06924432503484786, "grad_norm": 0.39211317896842957, "learning_rate": 1.9764313797376253e-05, "loss": 0.3743, "step": 3835 }, { "epoch": 0.06933460446774857, "grad_norm": 0.510002613067627, "learning_rate": 1.9763701263817916e-05, "loss": 0.3874, "step": 3840 }, { "epoch": 0.06942488390064928, "grad_norm": 0.5999088883399963, "learning_rate": 1.97630879448408e-05, "loss": 0.3603, "step": 3845 }, { "epoch": 0.06951516333355, "grad_norm": 0.5141642093658447, "learning_rate": 1.976247384049424e-05, "loss": 0.3159, "step": 3850 }, { "epoch": 0.06960544276645071, "grad_norm": 0.43448373675346375, "learning_rate": 1.9761858950827646e-05, "loss": 0.3758, "step": 3855 }, { "epoch": 0.06969572219935143, "grad_norm": 0.4024636447429657, "learning_rate": 1.9761243275890473e-05, "loss": 0.2689, "step": 3860 }, { "epoch": 0.06978600163225215, "grad_norm": 0.3778102695941925, "learning_rate": 1.9760626815732244e-05, "loss": 0.2949, "step": 3865 }, { "epoch": 0.06987628106515287, "grad_norm": 0.5055009126663208, "learning_rate": 1.976000957040256e-05, "loss": 0.412, "step": 3870 }, { "epoch": 0.06996656049805358, "grad_norm": 0.5268850326538086, "learning_rate": 1.9759391539951067e-05, "loss": 0.3701, "step": 3875 }, { "epoch": 0.0700568399309543, "grad_norm": 0.4896146357059479, "learning_rate": 1.975877272442748e-05, "loss": 0.2631, "step": 3880 }, { "epoch": 0.07014711936385501, "grad_norm": 0.5230270624160767, "learning_rate": 1.9758153123881585e-05, "loss": 0.3436, "step": 3885 }, { "epoch": 0.07023739879675572, "grad_norm": 0.4784453809261322, "learning_rate": 1.9757532738363217e-05, "loss": 0.4115, "step": 3890 }, { "epoch": 0.07032767822965644, "grad_norm": 0.369460791349411, "learning_rate": 1.9756911567922287e-05, "loss": 0.2817, "step": 3895 }, { "epoch": 0.07041795766255715, "grad_norm": 0.40652188658714294, "learning_rate": 1.9756289612608755e-05, "loss": 0.4009, "step": 3900 }, { "epoch": 0.07050823709545787, "grad_norm": 0.6725921630859375, "learning_rate": 1.9755666872472662e-05, "loss": 0.3781, "step": 3905 }, { "epoch": 0.07059851652835858, "grad_norm": 0.4345565140247345, "learning_rate": 1.97550433475641e-05, "loss": 0.3592, "step": 3910 }, { "epoch": 0.0706887959612593, "grad_norm": 0.46316981315612793, "learning_rate": 1.9754419037933226e-05, "loss": 0.2884, "step": 3915 }, { "epoch": 0.07077907539416001, "grad_norm": 0.42469629645347595, "learning_rate": 1.9753793943630263e-05, "loss": 0.279, "step": 3920 }, { "epoch": 0.07086935482706072, "grad_norm": 0.4613531529903412, "learning_rate": 1.9753168064705496e-05, "loss": 0.3594, "step": 3925 }, { "epoch": 0.07095963425996143, "grad_norm": 0.5785402655601501, "learning_rate": 1.9752541401209268e-05, "loss": 0.355, "step": 3930 }, { "epoch": 0.07104991369286215, "grad_norm": 0.4152916371822357, "learning_rate": 1.9751913953191992e-05, "loss": 0.3728, "step": 3935 }, { "epoch": 0.07114019312576286, "grad_norm": 0.6485270261764526, "learning_rate": 1.975128572070414e-05, "loss": 0.3343, "step": 3940 }, { "epoch": 0.07123047255866358, "grad_norm": 0.47957125306129456, "learning_rate": 1.9750656703796254e-05, "loss": 0.3742, "step": 3945 }, { "epoch": 0.07132075199156429, "grad_norm": 0.5528354644775391, "learning_rate": 1.9750026902518928e-05, "loss": 0.409, "step": 3950 }, { "epoch": 0.071411031424465, "grad_norm": 0.48165422677993774, "learning_rate": 1.974939631692283e-05, "loss": 0.3406, "step": 3955 }, { "epoch": 0.07150131085736572, "grad_norm": 0.8650749325752258, "learning_rate": 1.974876494705868e-05, "loss": 0.4059, "step": 3960 }, { "epoch": 0.07159159029026643, "grad_norm": 0.5957344770431519, "learning_rate": 1.974813279297727e-05, "loss": 0.379, "step": 3965 }, { "epoch": 0.07168186972316715, "grad_norm": 0.4109193980693817, "learning_rate": 1.974749985472946e-05, "loss": 0.3744, "step": 3970 }, { "epoch": 0.07177214915606786, "grad_norm": 0.3410075902938843, "learning_rate": 1.9746866132366146e-05, "loss": 0.352, "step": 3975 }, { "epoch": 0.07186242858896857, "grad_norm": 0.35367947816848755, "learning_rate": 1.9746231625938326e-05, "loss": 0.3135, "step": 3980 }, { "epoch": 0.07195270802186929, "grad_norm": 0.8415671586990356, "learning_rate": 1.9745596335497033e-05, "loss": 0.3666, "step": 3985 }, { "epoch": 0.07204298745477, "grad_norm": 0.4690043032169342, "learning_rate": 1.974496026109337e-05, "loss": 0.3948, "step": 3990 }, { "epoch": 0.07213326688767072, "grad_norm": 0.5731785893440247, "learning_rate": 1.9744323402778508e-05, "loss": 0.3948, "step": 3995 }, { "epoch": 0.07222354632057143, "grad_norm": 0.4461507797241211, "learning_rate": 1.9743685760603678e-05, "loss": 0.3688, "step": 4000 }, { "epoch": 0.07231382575347214, "grad_norm": 0.5973614454269409, "learning_rate": 1.974304733462017e-05, "loss": 0.3063, "step": 4005 }, { "epoch": 0.07240410518637286, "grad_norm": 0.6062004566192627, "learning_rate": 1.9742408124879342e-05, "loss": 0.3435, "step": 4010 }, { "epoch": 0.07249438461927357, "grad_norm": 0.3516961932182312, "learning_rate": 1.9741768131432615e-05, "loss": 0.3467, "step": 4015 }, { "epoch": 0.07258466405217429, "grad_norm": 0.4388677179813385, "learning_rate": 1.9741127354331473e-05, "loss": 0.4482, "step": 4020 }, { "epoch": 0.072674943485075, "grad_norm": 0.5486716628074646, "learning_rate": 1.9740485793627456e-05, "loss": 0.3167, "step": 4025 }, { "epoch": 0.07276522291797571, "grad_norm": 0.533378005027771, "learning_rate": 1.9739843449372183e-05, "loss": 0.321, "step": 4030 }, { "epoch": 0.07285550235087643, "grad_norm": 0.4790610373020172, "learning_rate": 1.9739200321617318e-05, "loss": 0.3641, "step": 4035 }, { "epoch": 0.07294578178377714, "grad_norm": 0.42860275506973267, "learning_rate": 1.9738556410414596e-05, "loss": 0.3189, "step": 4040 }, { "epoch": 0.07303606121667786, "grad_norm": 0.7600460648536682, "learning_rate": 1.9737911715815817e-05, "loss": 0.4523, "step": 4045 }, { "epoch": 0.07312634064957857, "grad_norm": 0.43815359473228455, "learning_rate": 1.9737266237872844e-05, "loss": 0.2613, "step": 4050 }, { "epoch": 0.07321662008247928, "grad_norm": 0.43807390332221985, "learning_rate": 1.9736619976637596e-05, "loss": 0.2835, "step": 4055 }, { "epoch": 0.07330689951538, "grad_norm": 0.30521079897880554, "learning_rate": 1.9735972932162068e-05, "loss": 0.3206, "step": 4060 }, { "epoch": 0.07339717894828073, "grad_norm": 0.3289305567741394, "learning_rate": 1.97353251044983e-05, "loss": 0.3101, "step": 4065 }, { "epoch": 0.07348745838118144, "grad_norm": 0.39569854736328125, "learning_rate": 1.9734676493698412e-05, "loss": 0.4014, "step": 4070 }, { "epoch": 0.07357773781408215, "grad_norm": 0.33709490299224854, "learning_rate": 1.9734027099814578e-05, "loss": 0.3654, "step": 4075 }, { "epoch": 0.07366801724698287, "grad_norm": 0.39169785380363464, "learning_rate": 1.9733376922899036e-05, "loss": 0.358, "step": 4080 }, { "epoch": 0.07375829667988358, "grad_norm": 0.5747213959693909, "learning_rate": 1.973272596300409e-05, "loss": 0.3499, "step": 4085 }, { "epoch": 0.0738485761127843, "grad_norm": 0.3984346091747284, "learning_rate": 1.9732074220182104e-05, "loss": 0.3424, "step": 4090 }, { "epoch": 0.07393885554568501, "grad_norm": 0.48990488052368164, "learning_rate": 1.9731421694485505e-05, "loss": 0.3655, "step": 4095 }, { "epoch": 0.07402913497858572, "grad_norm": 0.4573669135570526, "learning_rate": 1.9730768385966788e-05, "loss": 0.4599, "step": 4100 }, { "epoch": 0.07411941441148644, "grad_norm": 0.3597734868526459, "learning_rate": 1.97301142946785e-05, "loss": 0.3179, "step": 4105 }, { "epoch": 0.07420969384438715, "grad_norm": 0.5145891308784485, "learning_rate": 1.972945942067326e-05, "loss": 0.2926, "step": 4110 }, { "epoch": 0.07429997327728786, "grad_norm": 0.33695390820503235, "learning_rate": 1.9728803764003757e-05, "loss": 0.3327, "step": 4115 }, { "epoch": 0.07439025271018858, "grad_norm": 0.5866163372993469, "learning_rate": 1.9728147324722722e-05, "loss": 0.3584, "step": 4120 }, { "epoch": 0.07448053214308929, "grad_norm": 0.3432018458843231, "learning_rate": 1.9727490102882966e-05, "loss": 0.3631, "step": 4125 }, { "epoch": 0.07457081157599, "grad_norm": 0.33699434995651245, "learning_rate": 1.9726832098537355e-05, "loss": 0.3481, "step": 4130 }, { "epoch": 0.07466109100889072, "grad_norm": 0.3236297070980072, "learning_rate": 1.9726173311738825e-05, "loss": 0.3764, "step": 4135 }, { "epoch": 0.07475137044179143, "grad_norm": 0.44569236040115356, "learning_rate": 1.9725513742540366e-05, "loss": 0.3007, "step": 4140 }, { "epoch": 0.07484164987469215, "grad_norm": 0.48883622884750366, "learning_rate": 1.972485339099504e-05, "loss": 0.336, "step": 4145 }, { "epoch": 0.07493192930759286, "grad_norm": 0.514825701713562, "learning_rate": 1.9724192257155965e-05, "loss": 0.3236, "step": 4150 }, { "epoch": 0.07502220874049358, "grad_norm": 0.7090126872062683, "learning_rate": 1.9723530341076326e-05, "loss": 0.3345, "step": 4155 }, { "epoch": 0.07511248817339429, "grad_norm": 0.3429259657859802, "learning_rate": 1.9722867642809364e-05, "loss": 0.2971, "step": 4160 }, { "epoch": 0.075202767606295, "grad_norm": 0.4875260293483734, "learning_rate": 1.9722204162408393e-05, "loss": 0.3013, "step": 4165 }, { "epoch": 0.07529304703919572, "grad_norm": 0.49795055389404297, "learning_rate": 1.9721539899926788e-05, "loss": 0.2305, "step": 4170 }, { "epoch": 0.07538332647209643, "grad_norm": 0.7367872595787048, "learning_rate": 1.972087485541798e-05, "loss": 0.3618, "step": 4175 }, { "epoch": 0.07547360590499715, "grad_norm": 0.3421260118484497, "learning_rate": 1.972020902893546e-05, "loss": 0.3283, "step": 4180 }, { "epoch": 0.07556388533789786, "grad_norm": 0.7015025019645691, "learning_rate": 1.9719542420532805e-05, "loss": 0.2494, "step": 4185 }, { "epoch": 0.07565416477079857, "grad_norm": 0.7375475168228149, "learning_rate": 1.9718875030263627e-05, "loss": 0.3091, "step": 4190 }, { "epoch": 0.07574444420369929, "grad_norm": 0.3345068097114563, "learning_rate": 1.9718206858181615e-05, "loss": 0.428, "step": 4195 }, { "epoch": 0.0758347236366, "grad_norm": 0.6164200305938721, "learning_rate": 1.971753790434052e-05, "loss": 0.4313, "step": 4200 }, { "epoch": 0.07592500306950072, "grad_norm": 0.518981397151947, "learning_rate": 1.9716868168794154e-05, "loss": 0.309, "step": 4205 }, { "epoch": 0.07601528250240143, "grad_norm": 0.44345569610595703, "learning_rate": 1.9716197651596393e-05, "loss": 0.3035, "step": 4210 }, { "epoch": 0.07610556193530214, "grad_norm": 0.5354166626930237, "learning_rate": 1.9715526352801175e-05, "loss": 0.2773, "step": 4215 }, { "epoch": 0.07619584136820286, "grad_norm": 0.5086137652397156, "learning_rate": 1.9714854272462497e-05, "loss": 0.4168, "step": 4220 }, { "epoch": 0.07628612080110357, "grad_norm": 0.631428599357605, "learning_rate": 1.971418141063443e-05, "loss": 0.2984, "step": 4225 }, { "epoch": 0.07637640023400429, "grad_norm": 0.4155879020690918, "learning_rate": 1.9713507767371097e-05, "loss": 0.3106, "step": 4230 }, { "epoch": 0.076466679666905, "grad_norm": 0.4156801998615265, "learning_rate": 1.971283334272669e-05, "loss": 0.3847, "step": 4235 }, { "epoch": 0.07655695909980571, "grad_norm": 0.4328644275665283, "learning_rate": 1.9712158136755457e-05, "loss": 0.3131, "step": 4240 }, { "epoch": 0.07664723853270643, "grad_norm": 0.7468674182891846, "learning_rate": 1.9711482149511717e-05, "loss": 0.3608, "step": 4245 }, { "epoch": 0.07673751796560714, "grad_norm": 0.41286343336105347, "learning_rate": 1.9710805381049845e-05, "loss": 0.3158, "step": 4250 }, { "epoch": 0.07682779739850785, "grad_norm": 0.4288932681083679, "learning_rate": 1.971012783142429e-05, "loss": 0.2644, "step": 4255 }, { "epoch": 0.07691807683140858, "grad_norm": 0.7052945494651794, "learning_rate": 1.9709449500689543e-05, "loss": 0.312, "step": 4260 }, { "epoch": 0.0770083562643093, "grad_norm": 0.36367496848106384, "learning_rate": 1.9708770388900183e-05, "loss": 0.2539, "step": 4265 }, { "epoch": 0.07709863569721001, "grad_norm": 0.3144047260284424, "learning_rate": 1.9708090496110834e-05, "loss": 0.2911, "step": 4270 }, { "epoch": 0.07718891513011072, "grad_norm": 0.48131924867630005, "learning_rate": 1.970740982237619e-05, "loss": 0.2972, "step": 4275 }, { "epoch": 0.07727919456301144, "grad_norm": 0.6672709584236145, "learning_rate": 1.9706728367751005e-05, "loss": 0.3804, "step": 4280 }, { "epoch": 0.07736947399591215, "grad_norm": 0.4266987144947052, "learning_rate": 1.9706046132290097e-05, "loss": 0.3536, "step": 4285 }, { "epoch": 0.07745975342881287, "grad_norm": 0.6607972979545593, "learning_rate": 1.9705363116048347e-05, "loss": 0.3393, "step": 4290 }, { "epoch": 0.07755003286171358, "grad_norm": 0.47216519713401794, "learning_rate": 1.9704679319080696e-05, "loss": 0.3604, "step": 4295 }, { "epoch": 0.0776403122946143, "grad_norm": 0.603981614112854, "learning_rate": 1.9703994741442162e-05, "loss": 0.3582, "step": 4300 }, { "epoch": 0.07773059172751501, "grad_norm": 0.5392507314682007, "learning_rate": 1.9703309383187798e-05, "loss": 0.3195, "step": 4305 }, { "epoch": 0.07782087116041572, "grad_norm": 0.6059638261795044, "learning_rate": 1.970262324437275e-05, "loss": 0.3596, "step": 4310 }, { "epoch": 0.07791115059331644, "grad_norm": 0.45418858528137207, "learning_rate": 1.97019363250522e-05, "loss": 0.3069, "step": 4315 }, { "epoch": 0.07800143002621715, "grad_norm": 0.677732527256012, "learning_rate": 1.970124862528142e-05, "loss": 0.3266, "step": 4320 }, { "epoch": 0.07809170945911786, "grad_norm": 0.4499494135379791, "learning_rate": 1.970056014511572e-05, "loss": 0.3828, "step": 4325 }, { "epoch": 0.07818198889201858, "grad_norm": 0.41509875655174255, "learning_rate": 1.9699870884610485e-05, "loss": 0.3849, "step": 4330 }, { "epoch": 0.07827226832491929, "grad_norm": 0.4929969310760498, "learning_rate": 1.9699180843821166e-05, "loss": 0.3862, "step": 4335 }, { "epoch": 0.07836254775782, "grad_norm": 0.6417096257209778, "learning_rate": 1.9698490022803262e-05, "loss": 0.3864, "step": 4340 }, { "epoch": 0.07845282719072072, "grad_norm": 0.5347675681114197, "learning_rate": 1.9697798421612355e-05, "loss": 0.3556, "step": 4345 }, { "epoch": 0.07854310662362143, "grad_norm": 0.3889930546283722, "learning_rate": 1.9697106040304072e-05, "loss": 0.3758, "step": 4350 }, { "epoch": 0.07863338605652215, "grad_norm": 0.5241528749465942, "learning_rate": 1.969641287893411e-05, "loss": 0.3058, "step": 4355 }, { "epoch": 0.07872366548942286, "grad_norm": 0.5995569229125977, "learning_rate": 1.969571893755824e-05, "loss": 0.3353, "step": 4360 }, { "epoch": 0.07881394492232358, "grad_norm": 0.44480106234550476, "learning_rate": 1.969502421623227e-05, "loss": 0.2931, "step": 4365 }, { "epoch": 0.07890422435522429, "grad_norm": 0.39942800998687744, "learning_rate": 1.9694328715012094e-05, "loss": 0.3321, "step": 4370 }, { "epoch": 0.078994503788125, "grad_norm": 0.42548105120658875, "learning_rate": 1.969363243395366e-05, "loss": 0.3378, "step": 4375 }, { "epoch": 0.07908478322102572, "grad_norm": 0.3198913335800171, "learning_rate": 1.969293537311297e-05, "loss": 0.3251, "step": 4380 }, { "epoch": 0.07917506265392643, "grad_norm": 0.4743817150592804, "learning_rate": 1.969223753254611e-05, "loss": 0.2683, "step": 4385 }, { "epoch": 0.07926534208682715, "grad_norm": 0.3459571599960327, "learning_rate": 1.9691538912309203e-05, "loss": 0.371, "step": 4390 }, { "epoch": 0.07935562151972786, "grad_norm": 0.27832481265068054, "learning_rate": 1.9690839512458458e-05, "loss": 0.306, "step": 4395 }, { "epoch": 0.07944590095262857, "grad_norm": 0.5058069825172424, "learning_rate": 1.9690139333050136e-05, "loss": 0.3165, "step": 4400 }, { "epoch": 0.07953618038552929, "grad_norm": 0.6491962671279907, "learning_rate": 1.9689438374140554e-05, "loss": 0.392, "step": 4405 }, { "epoch": 0.07962645981843, "grad_norm": 0.6899421215057373, "learning_rate": 1.9688736635786107e-05, "loss": 0.4649, "step": 4410 }, { "epoch": 0.07971673925133071, "grad_norm": 0.3906645178794861, "learning_rate": 1.968803411804324e-05, "loss": 0.2287, "step": 4415 }, { "epoch": 0.07980701868423143, "grad_norm": 0.6027063131332397, "learning_rate": 1.9687330820968472e-05, "loss": 0.2839, "step": 4420 }, { "epoch": 0.07989729811713214, "grad_norm": 0.5959811806678772, "learning_rate": 1.968662674461837e-05, "loss": 0.3659, "step": 4425 }, { "epoch": 0.07998757755003286, "grad_norm": 0.5573135614395142, "learning_rate": 1.9685921889049574e-05, "loss": 0.4069, "step": 4430 }, { "epoch": 0.08007785698293357, "grad_norm": 0.5714733600616455, "learning_rate": 1.9685216254318786e-05, "loss": 0.2656, "step": 4435 }, { "epoch": 0.08016813641583428, "grad_norm": 0.5382790565490723, "learning_rate": 1.9684509840482766e-05, "loss": 0.312, "step": 4440 }, { "epoch": 0.080258415848735, "grad_norm": 0.5374647974967957, "learning_rate": 1.9683802647598344e-05, "loss": 0.2594, "step": 4445 }, { "epoch": 0.08034869528163571, "grad_norm": 0.6212499141693115, "learning_rate": 1.968309467572241e-05, "loss": 0.4128, "step": 4450 }, { "epoch": 0.08043897471453643, "grad_norm": 0.4466805160045624, "learning_rate": 1.968238592491191e-05, "loss": 0.3326, "step": 4455 }, { "epoch": 0.08052925414743715, "grad_norm": 0.5234296917915344, "learning_rate": 1.9681676395223858e-05, "loss": 0.4153, "step": 4460 }, { "epoch": 0.08061953358033787, "grad_norm": 0.3909855782985687, "learning_rate": 1.9680966086715337e-05, "loss": 0.3775, "step": 4465 }, { "epoch": 0.08070981301323858, "grad_norm": 0.3779798746109009, "learning_rate": 1.9680254999443478e-05, "loss": 0.3641, "step": 4470 }, { "epoch": 0.0808000924461393, "grad_norm": 0.37800610065460205, "learning_rate": 1.967954313346549e-05, "loss": 0.2642, "step": 4475 }, { "epoch": 0.08089037187904001, "grad_norm": 0.3323556184768677, "learning_rate": 1.967883048883863e-05, "loss": 0.2069, "step": 4480 }, { "epoch": 0.08098065131194072, "grad_norm": 0.40910011529922485, "learning_rate": 1.967811706562023e-05, "loss": 0.3267, "step": 4485 }, { "epoch": 0.08107093074484144, "grad_norm": 0.5210789442062378, "learning_rate": 1.9677402863867684e-05, "loss": 0.3588, "step": 4490 }, { "epoch": 0.08116121017774215, "grad_norm": 0.5192407965660095, "learning_rate": 1.9676687883638433e-05, "loss": 0.3813, "step": 4495 }, { "epoch": 0.08125148961064287, "grad_norm": 0.42497411370277405, "learning_rate": 1.967597212499e-05, "loss": 0.3897, "step": 4500 }, { "epoch": 0.08134176904354358, "grad_norm": 0.4315917193889618, "learning_rate": 1.9675255587979963e-05, "loss": 0.3845, "step": 4505 }, { "epoch": 0.0814320484764443, "grad_norm": 1.1516555547714233, "learning_rate": 1.9674538272665957e-05, "loss": 0.3392, "step": 4510 }, { "epoch": 0.08152232790934501, "grad_norm": 0.44052472710609436, "learning_rate": 1.967382017910569e-05, "loss": 0.2897, "step": 4515 }, { "epoch": 0.08161260734224572, "grad_norm": 0.5504406690597534, "learning_rate": 1.9673101307356922e-05, "loss": 0.3192, "step": 4520 }, { "epoch": 0.08170288677514644, "grad_norm": 0.7369511127471924, "learning_rate": 1.9672381657477483e-05, "loss": 0.3242, "step": 4525 }, { "epoch": 0.08179316620804715, "grad_norm": 0.35009142756462097, "learning_rate": 1.9671661229525268e-05, "loss": 0.3182, "step": 4530 }, { "epoch": 0.08188344564094786, "grad_norm": 0.6602281928062439, "learning_rate": 1.9670940023558227e-05, "loss": 0.3433, "step": 4535 }, { "epoch": 0.08197372507384858, "grad_norm": 0.4609468877315521, "learning_rate": 1.9670218039634372e-05, "loss": 0.4052, "step": 4540 }, { "epoch": 0.08206400450674929, "grad_norm": 0.36140522360801697, "learning_rate": 1.966949527781179e-05, "loss": 0.2799, "step": 4545 }, { "epoch": 0.08215428393965, "grad_norm": 0.4902835488319397, "learning_rate": 1.9668771738148615e-05, "loss": 0.3481, "step": 4550 }, { "epoch": 0.08224456337255072, "grad_norm": 0.4186972975730896, "learning_rate": 1.966804742070305e-05, "loss": 0.2783, "step": 4555 }, { "epoch": 0.08233484280545143, "grad_norm": 0.394040048122406, "learning_rate": 1.9667322325533367e-05, "loss": 0.3048, "step": 4560 }, { "epoch": 0.08242512223835215, "grad_norm": 0.40318575501441956, "learning_rate": 1.966659645269789e-05, "loss": 0.3221, "step": 4565 }, { "epoch": 0.08251540167125286, "grad_norm": 0.3136727511882782, "learning_rate": 1.9665869802255007e-05, "loss": 0.3529, "step": 4570 }, { "epoch": 0.08260568110415357, "grad_norm": 0.5056544542312622, "learning_rate": 1.9665142374263186e-05, "loss": 0.3468, "step": 4575 }, { "epoch": 0.08269596053705429, "grad_norm": 0.42035847902297974, "learning_rate": 1.9664414168780923e-05, "loss": 0.3593, "step": 4580 }, { "epoch": 0.082786239969955, "grad_norm": 0.4517004191875458, "learning_rate": 1.9663685185866813e-05, "loss": 0.3332, "step": 4585 }, { "epoch": 0.08287651940285572, "grad_norm": 0.3949716091156006, "learning_rate": 1.966295542557949e-05, "loss": 0.3173, "step": 4590 }, { "epoch": 0.08296679883575643, "grad_norm": 0.575838565826416, "learning_rate": 1.966222488797766e-05, "loss": 0.3473, "step": 4595 }, { "epoch": 0.08305707826865714, "grad_norm": 0.49253588914871216, "learning_rate": 1.966149357312009e-05, "loss": 0.2891, "step": 4600 }, { "epoch": 0.08314735770155786, "grad_norm": 0.5246073603630066, "learning_rate": 1.9660761481065606e-05, "loss": 0.4548, "step": 4605 }, { "epoch": 0.08323763713445857, "grad_norm": 0.4757160246372223, "learning_rate": 1.9660028611873102e-05, "loss": 0.3069, "step": 4610 }, { "epoch": 0.08332791656735929, "grad_norm": 0.37857431173324585, "learning_rate": 1.965929496560153e-05, "loss": 0.3433, "step": 4615 }, { "epoch": 0.08341819600026, "grad_norm": 0.44293367862701416, "learning_rate": 1.965856054230991e-05, "loss": 0.3435, "step": 4620 }, { "epoch": 0.08350847543316071, "grad_norm": 0.48219558596611023, "learning_rate": 1.9657825342057317e-05, "loss": 0.2491, "step": 4625 }, { "epoch": 0.08359875486606143, "grad_norm": 0.3333645462989807, "learning_rate": 1.9657089364902897e-05, "loss": 0.2522, "step": 4630 }, { "epoch": 0.08368903429896214, "grad_norm": 0.3774479329586029, "learning_rate": 1.965635261090585e-05, "loss": 0.2893, "step": 4635 }, { "epoch": 0.08377931373186286, "grad_norm": 0.33994877338409424, "learning_rate": 1.9655615080125444e-05, "loss": 0.2458, "step": 4640 }, { "epoch": 0.08386959316476357, "grad_norm": 0.5428542494773865, "learning_rate": 1.9654876772621006e-05, "loss": 0.3831, "step": 4645 }, { "epoch": 0.08395987259766428, "grad_norm": 0.4807756841182709, "learning_rate": 1.965413768845193e-05, "loss": 0.2955, "step": 4650 }, { "epoch": 0.084050152030565, "grad_norm": 0.33526355028152466, "learning_rate": 1.965339782767767e-05, "loss": 0.3172, "step": 4655 }, { "epoch": 0.08414043146346573, "grad_norm": 0.30477970838546753, "learning_rate": 1.965265719035774e-05, "loss": 0.3401, "step": 4660 }, { "epoch": 0.08423071089636644, "grad_norm": 0.47036126255989075, "learning_rate": 1.9651915776551727e-05, "loss": 0.3484, "step": 4665 }, { "epoch": 0.08432099032926715, "grad_norm": 0.570608913898468, "learning_rate": 1.965117358631926e-05, "loss": 0.3407, "step": 4670 }, { "epoch": 0.08441126976216787, "grad_norm": 0.4351683259010315, "learning_rate": 1.965043061972005e-05, "loss": 0.4023, "step": 4675 }, { "epoch": 0.08450154919506858, "grad_norm": 0.4088486135005951, "learning_rate": 1.9649686876813864e-05, "loss": 0.3387, "step": 4680 }, { "epoch": 0.0845918286279693, "grad_norm": 0.3737155497074127, "learning_rate": 1.964894235766053e-05, "loss": 0.3863, "step": 4685 }, { "epoch": 0.08468210806087001, "grad_norm": 0.3476511836051941, "learning_rate": 1.9648197062319935e-05, "loss": 0.3141, "step": 4690 }, { "epoch": 0.08477238749377072, "grad_norm": 0.5036412477493286, "learning_rate": 1.9647450990852034e-05, "loss": 0.2858, "step": 4695 }, { "epoch": 0.08486266692667144, "grad_norm": 0.3802044093608856, "learning_rate": 1.9646704143316847e-05, "loss": 0.3502, "step": 4700 }, { "epoch": 0.08495294635957215, "grad_norm": 0.4903644621372223, "learning_rate": 1.9645956519774447e-05, "loss": 0.2952, "step": 4705 }, { "epoch": 0.08504322579247287, "grad_norm": 0.32201284170150757, "learning_rate": 1.9645208120284982e-05, "loss": 0.3125, "step": 4710 }, { "epoch": 0.08513350522537358, "grad_norm": 0.36066821217536926, "learning_rate": 1.9644458944908648e-05, "loss": 0.3653, "step": 4715 }, { "epoch": 0.08522378465827429, "grad_norm": 0.5003206133842468, "learning_rate": 1.9643708993705716e-05, "loss": 0.3442, "step": 4720 }, { "epoch": 0.08531406409117501, "grad_norm": 0.3410700857639313, "learning_rate": 1.9642958266736513e-05, "loss": 0.3535, "step": 4725 }, { "epoch": 0.08540434352407572, "grad_norm": 0.43336379528045654, "learning_rate": 1.9642206764061424e-05, "loss": 0.3435, "step": 4730 }, { "epoch": 0.08549462295697643, "grad_norm": 0.5813950300216675, "learning_rate": 1.9641454485740912e-05, "loss": 0.2742, "step": 4735 }, { "epoch": 0.08558490238987715, "grad_norm": 0.5004847645759583, "learning_rate": 1.964070143183548e-05, "loss": 0.3111, "step": 4740 }, { "epoch": 0.08567518182277786, "grad_norm": 0.3869590759277344, "learning_rate": 1.963994760240572e-05, "loss": 0.3283, "step": 4745 }, { "epoch": 0.08576546125567858, "grad_norm": 0.41948580741882324, "learning_rate": 1.9639192997512258e-05, "loss": 0.2875, "step": 4750 }, { "epoch": 0.08585574068857929, "grad_norm": 0.4879941940307617, "learning_rate": 1.9638437617215808e-05, "loss": 0.3663, "step": 4755 }, { "epoch": 0.08594602012148, "grad_norm": 0.4754737317562103, "learning_rate": 1.9637681461577128e-05, "loss": 0.3635, "step": 4760 }, { "epoch": 0.08603629955438072, "grad_norm": 0.37280508875846863, "learning_rate": 1.9636924530657045e-05, "loss": 0.3096, "step": 4765 }, { "epoch": 0.08612657898728143, "grad_norm": 0.4379524290561676, "learning_rate": 1.9636166824516457e-05, "loss": 0.351, "step": 4770 }, { "epoch": 0.08621685842018215, "grad_norm": 0.41434362530708313, "learning_rate": 1.9635408343216304e-05, "loss": 0.3746, "step": 4775 }, { "epoch": 0.08630713785308286, "grad_norm": 0.4297976493835449, "learning_rate": 1.9634649086817608e-05, "loss": 0.4458, "step": 4780 }, { "epoch": 0.08639741728598357, "grad_norm": 0.47540801763534546, "learning_rate": 1.9633889055381442e-05, "loss": 0.3872, "step": 4785 }, { "epoch": 0.08648769671888429, "grad_norm": 0.7144162058830261, "learning_rate": 1.9633128248968946e-05, "loss": 0.408, "step": 4790 }, { "epoch": 0.086577976151785, "grad_norm": 0.4906654357910156, "learning_rate": 1.9632366667641326e-05, "loss": 0.3667, "step": 4795 }, { "epoch": 0.08666825558468572, "grad_norm": 0.57340008020401, "learning_rate": 1.9631604311459837e-05, "loss": 0.3551, "step": 4800 }, { "epoch": 0.08675853501758643, "grad_norm": 0.4374023377895355, "learning_rate": 1.9630841180485815e-05, "loss": 0.2925, "step": 4805 }, { "epoch": 0.08684881445048714, "grad_norm": 0.4600036144256592, "learning_rate": 1.963007727478064e-05, "loss": 0.4128, "step": 4810 }, { "epoch": 0.08693909388338786, "grad_norm": 0.3051811158657074, "learning_rate": 1.9629312594405765e-05, "loss": 0.3569, "step": 4815 }, { "epoch": 0.08702937331628857, "grad_norm": 0.4193210303783417, "learning_rate": 1.9628547139422706e-05, "loss": 0.3374, "step": 4820 }, { "epoch": 0.08711965274918929, "grad_norm": 0.3596745729446411, "learning_rate": 1.962778090989303e-05, "loss": 0.3388, "step": 4825 }, { "epoch": 0.08720993218209, "grad_norm": 0.5314683318138123, "learning_rate": 1.962701390587839e-05, "loss": 0.3421, "step": 4830 }, { "epoch": 0.08730021161499071, "grad_norm": 0.37680259346961975, "learning_rate": 1.962624612744047e-05, "loss": 0.2718, "step": 4835 }, { "epoch": 0.08739049104789143, "grad_norm": 0.5087005496025085, "learning_rate": 1.962547757464104e-05, "loss": 0.3556, "step": 4840 }, { "epoch": 0.08748077048079214, "grad_norm": 0.38424932956695557, "learning_rate": 1.962470824754192e-05, "loss": 0.3122, "step": 4845 }, { "epoch": 0.08757104991369286, "grad_norm": 0.3894083797931671, "learning_rate": 1.9623938146205006e-05, "loss": 0.2732, "step": 4850 }, { "epoch": 0.08766132934659358, "grad_norm": 0.34054046869277954, "learning_rate": 1.9623167270692236e-05, "loss": 0.3351, "step": 4855 }, { "epoch": 0.0877516087794943, "grad_norm": 0.5789301991462708, "learning_rate": 1.9622395621065628e-05, "loss": 0.3519, "step": 4860 }, { "epoch": 0.08784188821239501, "grad_norm": 0.4679858386516571, "learning_rate": 1.9621623197387254e-05, "loss": 0.4429, "step": 4865 }, { "epoch": 0.08793216764529573, "grad_norm": 0.5168954730033875, "learning_rate": 1.962084999971925e-05, "loss": 0.3585, "step": 4870 }, { "epoch": 0.08802244707819644, "grad_norm": 0.5853481888771057, "learning_rate": 1.9620076028123814e-05, "loss": 0.3833, "step": 4875 }, { "epoch": 0.08811272651109715, "grad_norm": 0.49064958095550537, "learning_rate": 1.9619301282663205e-05, "loss": 0.3219, "step": 4880 }, { "epoch": 0.08820300594399787, "grad_norm": 0.8201891779899597, "learning_rate": 1.961852576339975e-05, "loss": 0.338, "step": 4885 }, { "epoch": 0.08829328537689858, "grad_norm": 0.30209094285964966, "learning_rate": 1.9617749470395825e-05, "loss": 0.3283, "step": 4890 }, { "epoch": 0.0883835648097993, "grad_norm": 0.5721350312232971, "learning_rate": 1.961697240371389e-05, "loss": 0.3774, "step": 4895 }, { "epoch": 0.08847384424270001, "grad_norm": 0.6147782802581787, "learning_rate": 1.961619456341644e-05, "loss": 0.2906, "step": 4900 }, { "epoch": 0.08856412367560072, "grad_norm": 0.5066529512405396, "learning_rate": 1.9615415949566058e-05, "loss": 0.4231, "step": 4905 }, { "epoch": 0.08865440310850144, "grad_norm": 0.33306726813316345, "learning_rate": 1.9614636562225375e-05, "loss": 0.252, "step": 4910 }, { "epoch": 0.08874468254140215, "grad_norm": 0.42946088314056396, "learning_rate": 1.9613856401457084e-05, "loss": 0.3567, "step": 4915 }, { "epoch": 0.08883496197430286, "grad_norm": 0.5795415043830872, "learning_rate": 1.9613075467323942e-05, "loss": 0.2967, "step": 4920 }, { "epoch": 0.08892524140720358, "grad_norm": 0.3875221610069275, "learning_rate": 1.9612293759888777e-05, "loss": 0.2638, "step": 4925 }, { "epoch": 0.08901552084010429, "grad_norm": 0.514550507068634, "learning_rate": 1.9611511279214467e-05, "loss": 0.2981, "step": 4930 }, { "epoch": 0.089105800273005, "grad_norm": 0.4188728332519531, "learning_rate": 1.9610728025363953e-05, "loss": 0.299, "step": 4935 }, { "epoch": 0.08919607970590572, "grad_norm": 0.4870274066925049, "learning_rate": 1.960994399840025e-05, "loss": 0.341, "step": 4940 }, { "epoch": 0.08928635913880643, "grad_norm": 0.5231569409370422, "learning_rate": 1.9609159198386422e-05, "loss": 0.3235, "step": 4945 }, { "epoch": 0.08937663857170715, "grad_norm": 0.2766699492931366, "learning_rate": 1.9608373625385604e-05, "loss": 0.3831, "step": 4950 }, { "epoch": 0.08946691800460786, "grad_norm": 0.38355782628059387, "learning_rate": 1.9607587279460984e-05, "loss": 0.306, "step": 4955 }, { "epoch": 0.08955719743750858, "grad_norm": 0.572722852230072, "learning_rate": 1.960680016067582e-05, "loss": 0.3822, "step": 4960 }, { "epoch": 0.08964747687040929, "grad_norm": 0.46345752477645874, "learning_rate": 1.9606012269093434e-05, "loss": 0.4394, "step": 4965 }, { "epoch": 0.08973775630331, "grad_norm": 0.4448091983795166, "learning_rate": 1.9605223604777207e-05, "loss": 0.3849, "step": 4970 }, { "epoch": 0.08982803573621072, "grad_norm": 0.5819882154464722, "learning_rate": 1.9604434167790574e-05, "loss": 0.3321, "step": 4975 }, { "epoch": 0.08991831516911143, "grad_norm": 0.2463798224925995, "learning_rate": 1.960364395819704e-05, "loss": 0.3517, "step": 4980 }, { "epoch": 0.09000859460201215, "grad_norm": 0.4587620198726654, "learning_rate": 1.960285297606018e-05, "loss": 0.3617, "step": 4985 }, { "epoch": 0.09009887403491286, "grad_norm": 0.5834743976593018, "learning_rate": 1.9602061221443615e-05, "loss": 0.3336, "step": 4990 }, { "epoch": 0.09018915346781357, "grad_norm": 0.42822515964508057, "learning_rate": 1.960126869441104e-05, "loss": 0.2742, "step": 4995 }, { "epoch": 0.09027943290071429, "grad_norm": 0.44845589995384216, "learning_rate": 1.9600475395026203e-05, "loss": 0.3435, "step": 5000 }, { "epoch": 0.090369712333615, "grad_norm": 0.33463209867477417, "learning_rate": 1.9599681323352926e-05, "loss": 0.3159, "step": 5005 }, { "epoch": 0.09045999176651572, "grad_norm": 0.49585360288619995, "learning_rate": 1.9598886479455083e-05, "loss": 0.308, "step": 5010 }, { "epoch": 0.09055027119941643, "grad_norm": 0.5451337695121765, "learning_rate": 1.959809086339661e-05, "loss": 0.2762, "step": 5015 }, { "epoch": 0.09064055063231714, "grad_norm": 0.5406076312065125, "learning_rate": 1.9597294475241516e-05, "loss": 0.2591, "step": 5020 }, { "epoch": 0.09073083006521786, "grad_norm": 0.44968345761299133, "learning_rate": 1.959649731505386e-05, "loss": 0.3669, "step": 5025 }, { "epoch": 0.09082110949811857, "grad_norm": 0.4464029371738434, "learning_rate": 1.9595699382897768e-05, "loss": 0.306, "step": 5030 }, { "epoch": 0.09091138893101929, "grad_norm": 0.4215846061706543, "learning_rate": 1.9594900678837426e-05, "loss": 0.3202, "step": 5035 }, { "epoch": 0.09100166836392, "grad_norm": 0.5329155325889587, "learning_rate": 1.959410120293709e-05, "loss": 0.3137, "step": 5040 }, { "epoch": 0.09109194779682071, "grad_norm": 0.519690990447998, "learning_rate": 1.9593300955261062e-05, "loss": 0.2903, "step": 5045 }, { "epoch": 0.09118222722972143, "grad_norm": 0.9205771088600159, "learning_rate": 1.9592499935873727e-05, "loss": 0.3304, "step": 5050 }, { "epoch": 0.09127250666262215, "grad_norm": 0.46820029616355896, "learning_rate": 1.9591698144839516e-05, "loss": 0.4189, "step": 5055 }, { "epoch": 0.09136278609552287, "grad_norm": 0.7149403095245361, "learning_rate": 1.9590895582222925e-05, "loss": 0.336, "step": 5060 }, { "epoch": 0.09145306552842358, "grad_norm": 0.23928998410701752, "learning_rate": 1.9590092248088522e-05, "loss": 0.2935, "step": 5065 }, { "epoch": 0.0915433449613243, "grad_norm": 0.293186753988266, "learning_rate": 1.958928814250092e-05, "loss": 0.3895, "step": 5070 }, { "epoch": 0.09163362439422501, "grad_norm": 0.7533143758773804, "learning_rate": 1.958848326552481e-05, "loss": 0.3255, "step": 5075 }, { "epoch": 0.09172390382712572, "grad_norm": 0.22842998802661896, "learning_rate": 1.9587677617224932e-05, "loss": 0.2246, "step": 5080 }, { "epoch": 0.09181418326002644, "grad_norm": 0.38793644309043884, "learning_rate": 1.9586871197666103e-05, "loss": 0.3714, "step": 5085 }, { "epoch": 0.09190446269292715, "grad_norm": 0.5378062725067139, "learning_rate": 1.9586064006913192e-05, "loss": 0.3374, "step": 5090 }, { "epoch": 0.09199474212582787, "grad_norm": 0.6650637984275818, "learning_rate": 1.958525604503112e-05, "loss": 0.3621, "step": 5095 }, { "epoch": 0.09208502155872858, "grad_norm": 0.6383867263793945, "learning_rate": 1.95844473120849e-05, "loss": 0.3501, "step": 5100 }, { "epoch": 0.0921753009916293, "grad_norm": 0.5531526207923889, "learning_rate": 1.9583637808139574e-05, "loss": 0.3532, "step": 5105 }, { "epoch": 0.09226558042453001, "grad_norm": 0.44341447949409485, "learning_rate": 1.9582827533260267e-05, "loss": 0.2861, "step": 5110 }, { "epoch": 0.09235585985743072, "grad_norm": 0.26350006461143494, "learning_rate": 1.958201648751216e-05, "loss": 0.1926, "step": 5115 }, { "epoch": 0.09244613929033144, "grad_norm": 0.41844233870506287, "learning_rate": 1.9581204670960496e-05, "loss": 0.2989, "step": 5120 }, { "epoch": 0.09253641872323215, "grad_norm": 0.5063326954841614, "learning_rate": 1.9580392083670573e-05, "loss": 0.3132, "step": 5125 }, { "epoch": 0.09262669815613286, "grad_norm": 0.6237820386886597, "learning_rate": 1.9579578725707768e-05, "loss": 0.3054, "step": 5130 }, { "epoch": 0.09271697758903358, "grad_norm": 0.4341614842414856, "learning_rate": 1.9578764597137504e-05, "loss": 0.3877, "step": 5135 }, { "epoch": 0.09280725702193429, "grad_norm": 0.4137227535247803, "learning_rate": 1.957794969802527e-05, "loss": 0.4141, "step": 5140 }, { "epoch": 0.092897536454835, "grad_norm": 0.4514862596988678, "learning_rate": 1.9577134028436626e-05, "loss": 0.3094, "step": 5145 }, { "epoch": 0.09298781588773572, "grad_norm": 0.4362461566925049, "learning_rate": 1.957631758843718e-05, "loss": 0.311, "step": 5150 }, { "epoch": 0.09307809532063643, "grad_norm": 0.422057181596756, "learning_rate": 1.9575500378092606e-05, "loss": 0.2631, "step": 5155 }, { "epoch": 0.09316837475353715, "grad_norm": 0.37970930337905884, "learning_rate": 1.957468239746865e-05, "loss": 0.2921, "step": 5160 }, { "epoch": 0.09325865418643786, "grad_norm": 0.4683941900730133, "learning_rate": 1.9573863646631113e-05, "loss": 0.3497, "step": 5165 }, { "epoch": 0.09334893361933858, "grad_norm": 0.38652756810188293, "learning_rate": 1.957304412564585e-05, "loss": 0.3171, "step": 5170 }, { "epoch": 0.09343921305223929, "grad_norm": 0.36577558517456055, "learning_rate": 1.9572223834578793e-05, "loss": 0.3247, "step": 5175 }, { "epoch": 0.09352949248514, "grad_norm": 0.35349130630493164, "learning_rate": 1.9571402773495924e-05, "loss": 0.4201, "step": 5180 }, { "epoch": 0.09361977191804072, "grad_norm": 0.4183078408241272, "learning_rate": 1.957058094246329e-05, "loss": 0.3452, "step": 5185 }, { "epoch": 0.09371005135094143, "grad_norm": 0.37030965089797974, "learning_rate": 1.956975834154701e-05, "loss": 0.397, "step": 5190 }, { "epoch": 0.09380033078384215, "grad_norm": 0.4157424569129944, "learning_rate": 1.9568934970813248e-05, "loss": 0.256, "step": 5195 }, { "epoch": 0.09389061021674286, "grad_norm": 0.720859169960022, "learning_rate": 1.956811083032824e-05, "loss": 0.2821, "step": 5200 }, { "epoch": 0.09398088964964357, "grad_norm": 0.5038657784461975, "learning_rate": 1.956728592015828e-05, "loss": 0.2821, "step": 5205 }, { "epoch": 0.09407116908254429, "grad_norm": 0.5624401569366455, "learning_rate": 1.9566460240369733e-05, "loss": 0.3153, "step": 5210 }, { "epoch": 0.094161448515445, "grad_norm": 0.5510204434394836, "learning_rate": 1.956563379102901e-05, "loss": 0.3937, "step": 5215 }, { "epoch": 0.09425172794834571, "grad_norm": 0.5782774090766907, "learning_rate": 1.95648065722026e-05, "loss": 0.2752, "step": 5220 }, { "epoch": 0.09434200738124643, "grad_norm": 0.5185222029685974, "learning_rate": 1.9563978583957044e-05, "loss": 0.3778, "step": 5225 }, { "epoch": 0.09443228681414714, "grad_norm": 0.3531385660171509, "learning_rate": 1.9563149826358946e-05, "loss": 0.3069, "step": 5230 }, { "epoch": 0.09452256624704786, "grad_norm": 0.4458499848842621, "learning_rate": 1.956232029947498e-05, "loss": 0.2716, "step": 5235 }, { "epoch": 0.09461284567994857, "grad_norm": 0.5337215065956116, "learning_rate": 1.9561490003371867e-05, "loss": 0.2823, "step": 5240 }, { "epoch": 0.09470312511284928, "grad_norm": 0.601732075214386, "learning_rate": 1.95606589381164e-05, "loss": 0.2908, "step": 5245 }, { "epoch": 0.09479340454575, "grad_norm": 0.38707998394966125, "learning_rate": 1.9559827103775437e-05, "loss": 0.2424, "step": 5250 }, { "epoch": 0.09488368397865073, "grad_norm": 0.3815915286540985, "learning_rate": 1.9558994500415892e-05, "loss": 0.2925, "step": 5255 }, { "epoch": 0.09497396341155144, "grad_norm": 0.4531812071800232, "learning_rate": 1.9558161128104736e-05, "loss": 0.3384, "step": 5260 }, { "epoch": 0.09506424284445215, "grad_norm": 0.5576108694076538, "learning_rate": 1.9557326986909015e-05, "loss": 0.3105, "step": 5265 }, { "epoch": 0.09515452227735287, "grad_norm": 0.3936028480529785, "learning_rate": 1.9556492076895822e-05, "loss": 0.3366, "step": 5270 }, { "epoch": 0.09524480171025358, "grad_norm": 0.4257822632789612, "learning_rate": 1.955565639813233e-05, "loss": 0.2439, "step": 5275 }, { "epoch": 0.0953350811431543, "grad_norm": 0.47362229228019714, "learning_rate": 1.955481995068575e-05, "loss": 0.342, "step": 5280 }, { "epoch": 0.09542536057605501, "grad_norm": 0.4994874894618988, "learning_rate": 1.955398273462338e-05, "loss": 0.4446, "step": 5285 }, { "epoch": 0.09551564000895572, "grad_norm": 0.42771607637405396, "learning_rate": 1.9553144750012565e-05, "loss": 0.4141, "step": 5290 }, { "epoch": 0.09560591944185644, "grad_norm": 0.2980763018131256, "learning_rate": 1.9552305996920712e-05, "loss": 0.4213, "step": 5295 }, { "epoch": 0.09569619887475715, "grad_norm": 0.5155059695243835, "learning_rate": 1.955146647541529e-05, "loss": 0.2334, "step": 5300 }, { "epoch": 0.09578647830765787, "grad_norm": 0.3601856529712677, "learning_rate": 1.9550626185563838e-05, "loss": 0.3939, "step": 5305 }, { "epoch": 0.09587675774055858, "grad_norm": 0.29154184460639954, "learning_rate": 1.9549785127433948e-05, "loss": 0.3869, "step": 5310 }, { "epoch": 0.0959670371734593, "grad_norm": 0.49999797344207764, "learning_rate": 1.954894330109328e-05, "loss": 0.3985, "step": 5315 }, { "epoch": 0.09605731660636001, "grad_norm": 0.6494349241256714, "learning_rate": 1.954810070660955e-05, "loss": 0.3517, "step": 5320 }, { "epoch": 0.09614759603926072, "grad_norm": 0.33707407116889954, "learning_rate": 1.954725734405054e-05, "loss": 0.3102, "step": 5325 }, { "epoch": 0.09623787547216144, "grad_norm": 0.4518429636955261, "learning_rate": 1.954641321348409e-05, "loss": 0.3393, "step": 5330 }, { "epoch": 0.09632815490506215, "grad_norm": 0.511194109916687, "learning_rate": 1.9545568314978106e-05, "loss": 0.2309, "step": 5335 }, { "epoch": 0.09641843433796286, "grad_norm": 0.3844567537307739, "learning_rate": 1.9544722648600558e-05, "loss": 0.377, "step": 5340 }, { "epoch": 0.09650871377086358, "grad_norm": 0.42884543538093567, "learning_rate": 1.9543876214419467e-05, "loss": 0.3377, "step": 5345 }, { "epoch": 0.09659899320376429, "grad_norm": 0.6516596674919128, "learning_rate": 1.9543029012502926e-05, "loss": 0.2655, "step": 5350 }, { "epoch": 0.096689272636665, "grad_norm": 0.3786361515522003, "learning_rate": 1.9542181042919084e-05, "loss": 0.2778, "step": 5355 }, { "epoch": 0.09677955206956572, "grad_norm": 0.3247559368610382, "learning_rate": 1.9541332305736156e-05, "loss": 0.3328, "step": 5360 }, { "epoch": 0.09686983150246643, "grad_norm": 0.5098878741264343, "learning_rate": 1.9540482801022417e-05, "loss": 0.213, "step": 5365 }, { "epoch": 0.09696011093536715, "grad_norm": 0.4012088179588318, "learning_rate": 1.9539632528846206e-05, "loss": 0.4207, "step": 5370 }, { "epoch": 0.09705039036826786, "grad_norm": 0.36352911591529846, "learning_rate": 1.9538781489275912e-05, "loss": 0.3404, "step": 5375 }, { "epoch": 0.09714066980116857, "grad_norm": 0.2575864791870117, "learning_rate": 1.9537929682380002e-05, "loss": 0.3838, "step": 5380 }, { "epoch": 0.09723094923406929, "grad_norm": 0.3757660388946533, "learning_rate": 1.9537077108227e-05, "loss": 0.3248, "step": 5385 }, { "epoch": 0.09732122866697, "grad_norm": 0.552474319934845, "learning_rate": 1.9536223766885484e-05, "loss": 0.3292, "step": 5390 }, { "epoch": 0.09741150809987072, "grad_norm": 0.596549391746521, "learning_rate": 1.95353696584241e-05, "loss": 0.2818, "step": 5395 }, { "epoch": 0.09750178753277143, "grad_norm": 0.3117164671421051, "learning_rate": 1.9534514782911556e-05, "loss": 0.2427, "step": 5400 }, { "epoch": 0.09759206696567214, "grad_norm": 0.3582278788089752, "learning_rate": 1.953365914041662e-05, "loss": 0.2953, "step": 5405 }, { "epoch": 0.09768234639857286, "grad_norm": 0.5204953551292419, "learning_rate": 1.953280273100812e-05, "loss": 0.304, "step": 5410 }, { "epoch": 0.09777262583147357, "grad_norm": 0.5520836710929871, "learning_rate": 1.9531945554754957e-05, "loss": 0.332, "step": 5415 }, { "epoch": 0.09786290526437429, "grad_norm": 0.426142156124115, "learning_rate": 1.9531087611726074e-05, "loss": 0.3299, "step": 5420 }, { "epoch": 0.097953184697275, "grad_norm": 0.43062669038772583, "learning_rate": 1.9530228901990493e-05, "loss": 0.3585, "step": 5425 }, { "epoch": 0.09804346413017571, "grad_norm": 0.570884644985199, "learning_rate": 1.9529369425617286e-05, "loss": 0.3148, "step": 5430 }, { "epoch": 0.09813374356307643, "grad_norm": 0.4310964345932007, "learning_rate": 1.9528509182675593e-05, "loss": 0.3101, "step": 5435 }, { "epoch": 0.09822402299597714, "grad_norm": 0.631697416305542, "learning_rate": 1.9527648173234616e-05, "loss": 0.2834, "step": 5440 }, { "epoch": 0.09831430242887786, "grad_norm": 0.481629341840744, "learning_rate": 1.952678639736362e-05, "loss": 0.3348, "step": 5445 }, { "epoch": 0.09840458186177858, "grad_norm": 0.3552111089229584, "learning_rate": 1.952592385513192e-05, "loss": 0.2887, "step": 5450 }, { "epoch": 0.0984948612946793, "grad_norm": 0.3959607183933258, "learning_rate": 1.952506054660891e-05, "loss": 0.3694, "step": 5455 }, { "epoch": 0.09858514072758001, "grad_norm": 0.6186146140098572, "learning_rate": 1.9524196471864028e-05, "loss": 0.3707, "step": 5460 }, { "epoch": 0.09867542016048073, "grad_norm": 0.27796289324760437, "learning_rate": 1.9523331630966787e-05, "loss": 0.3947, "step": 5465 }, { "epoch": 0.09876569959338144, "grad_norm": 0.4462321102619171, "learning_rate": 1.9522466023986762e-05, "loss": 0.276, "step": 5470 }, { "epoch": 0.09885597902628215, "grad_norm": 0.40568867325782776, "learning_rate": 1.952159965099358e-05, "loss": 0.323, "step": 5475 }, { "epoch": 0.09894625845918287, "grad_norm": 0.658328652381897, "learning_rate": 1.952073251205693e-05, "loss": 0.3548, "step": 5480 }, { "epoch": 0.09903653789208358, "grad_norm": 0.2920197546482086, "learning_rate": 1.951986460724658e-05, "loss": 0.2942, "step": 5485 }, { "epoch": 0.0991268173249843, "grad_norm": 0.5438705086708069, "learning_rate": 1.951899593663233e-05, "loss": 0.3396, "step": 5490 }, { "epoch": 0.09921709675788501, "grad_norm": 0.5157396793365479, "learning_rate": 1.9518126500284072e-05, "loss": 0.4084, "step": 5495 }, { "epoch": 0.09930737619078572, "grad_norm": 0.4184419810771942, "learning_rate": 1.951725629827174e-05, "loss": 0.3242, "step": 5500 }, { "epoch": 0.09939765562368644, "grad_norm": 0.329950749874115, "learning_rate": 1.9516385330665332e-05, "loss": 0.411, "step": 5505 }, { "epoch": 0.09948793505658715, "grad_norm": 0.5271588563919067, "learning_rate": 1.951551359753492e-05, "loss": 0.3677, "step": 5510 }, { "epoch": 0.09957821448948787, "grad_norm": 0.40672919154167175, "learning_rate": 1.9514641098950623e-05, "loss": 0.2817, "step": 5515 }, { "epoch": 0.09966849392238858, "grad_norm": 0.4978141784667969, "learning_rate": 1.9513767834982625e-05, "loss": 0.309, "step": 5520 }, { "epoch": 0.09975877335528929, "grad_norm": 0.42941680550575256, "learning_rate": 1.951289380570118e-05, "loss": 0.2661, "step": 5525 }, { "epoch": 0.09984905278819001, "grad_norm": 0.3147401809692383, "learning_rate": 1.951201901117659e-05, "loss": 0.3315, "step": 5530 }, { "epoch": 0.09993933222109072, "grad_norm": 0.43563732504844666, "learning_rate": 1.9511143451479228e-05, "loss": 0.3314, "step": 5535 }, { "epoch": 0.10002961165399143, "grad_norm": 0.556304395198822, "learning_rate": 1.9510267126679535e-05, "loss": 0.3468, "step": 5540 }, { "epoch": 0.10011989108689215, "grad_norm": 0.3554510474205017, "learning_rate": 1.9509390036847992e-05, "loss": 0.3373, "step": 5545 }, { "epoch": 0.10021017051979286, "grad_norm": 0.4136534333229065, "learning_rate": 1.950851218205516e-05, "loss": 0.2907, "step": 5550 }, { "epoch": 0.10030044995269358, "grad_norm": 0.41162267327308655, "learning_rate": 1.9507633562371662e-05, "loss": 0.3779, "step": 5555 }, { "epoch": 0.10039072938559429, "grad_norm": 0.6030270457267761, "learning_rate": 1.950675417786817e-05, "loss": 0.3726, "step": 5560 }, { "epoch": 0.100481008818495, "grad_norm": 0.45536723732948303, "learning_rate": 1.950587402861542e-05, "loss": 0.2769, "step": 5565 }, { "epoch": 0.10057128825139572, "grad_norm": 0.3023357689380646, "learning_rate": 1.9504993114684224e-05, "loss": 0.2517, "step": 5570 }, { "epoch": 0.10066156768429643, "grad_norm": 0.3225719928741455, "learning_rate": 1.950411143614544e-05, "loss": 0.2659, "step": 5575 }, { "epoch": 0.10075184711719715, "grad_norm": 0.787666916847229, "learning_rate": 1.9503228993069986e-05, "loss": 0.2757, "step": 5580 }, { "epoch": 0.10084212655009786, "grad_norm": 0.47917357087135315, "learning_rate": 1.9502345785528863e-05, "loss": 0.3791, "step": 5585 }, { "epoch": 0.10093240598299857, "grad_norm": 0.3646545708179474, "learning_rate": 1.9501461813593106e-05, "loss": 0.3377, "step": 5590 }, { "epoch": 0.10102268541589929, "grad_norm": 0.42772263288497925, "learning_rate": 1.9500577077333828e-05, "loss": 0.3481, "step": 5595 }, { "epoch": 0.1011129648488, "grad_norm": 0.3945727050304413, "learning_rate": 1.94996915768222e-05, "loss": 0.2901, "step": 5600 }, { "epoch": 0.10120324428170072, "grad_norm": 0.3048461377620697, "learning_rate": 1.9498805312129456e-05, "loss": 0.2429, "step": 5605 }, { "epoch": 0.10129352371460143, "grad_norm": 0.43724963068962097, "learning_rate": 1.9497918283326887e-05, "loss": 0.2989, "step": 5610 }, { "epoch": 0.10138380314750214, "grad_norm": 0.38626641035079956, "learning_rate": 1.9497030490485848e-05, "loss": 0.3071, "step": 5615 }, { "epoch": 0.10147408258040286, "grad_norm": 0.4728689193725586, "learning_rate": 1.9496141933677757e-05, "loss": 0.3376, "step": 5620 }, { "epoch": 0.10156436201330357, "grad_norm": 0.7466672658920288, "learning_rate": 1.949525261297409e-05, "loss": 0.2819, "step": 5625 }, { "epoch": 0.10165464144620429, "grad_norm": 0.6607660055160522, "learning_rate": 1.9494362528446387e-05, "loss": 0.3414, "step": 5630 }, { "epoch": 0.101744920879105, "grad_norm": 0.4114571213722229, "learning_rate": 1.949347168016625e-05, "loss": 0.4251, "step": 5635 }, { "epoch": 0.10183520031200571, "grad_norm": 0.4187646508216858, "learning_rate": 1.949258006820534e-05, "loss": 0.3238, "step": 5640 }, { "epoch": 0.10192547974490643, "grad_norm": 0.3874375820159912, "learning_rate": 1.9491687692635385e-05, "loss": 0.3899, "step": 5645 }, { "epoch": 0.10201575917780716, "grad_norm": 0.24559728801250458, "learning_rate": 1.9490794553528164e-05, "loss": 0.3407, "step": 5650 }, { "epoch": 0.10210603861070787, "grad_norm": 0.4480688273906708, "learning_rate": 1.9489900650955524e-05, "loss": 0.3278, "step": 5655 }, { "epoch": 0.10219631804360858, "grad_norm": 0.6497907042503357, "learning_rate": 1.9489005984989376e-05, "loss": 0.36, "step": 5660 }, { "epoch": 0.1022865974765093, "grad_norm": 0.37227800488471985, "learning_rate": 1.9488110555701686e-05, "loss": 0.3999, "step": 5665 }, { "epoch": 0.10237687690941001, "grad_norm": 0.35861340165138245, "learning_rate": 1.9487214363164492e-05, "loss": 0.2449, "step": 5670 }, { "epoch": 0.10246715634231073, "grad_norm": 0.29419082403182983, "learning_rate": 1.9486317407449878e-05, "loss": 0.3671, "step": 5675 }, { "epoch": 0.10255743577521144, "grad_norm": 0.4491279721260071, "learning_rate": 1.948541968863e-05, "loss": 0.3232, "step": 5680 }, { "epoch": 0.10264771520811215, "grad_norm": 0.4294513463973999, "learning_rate": 1.9484521206777077e-05, "loss": 0.3929, "step": 5685 }, { "epoch": 0.10273799464101287, "grad_norm": 0.43613892793655396, "learning_rate": 1.948362196196338e-05, "loss": 0.3833, "step": 5690 }, { "epoch": 0.10282827407391358, "grad_norm": 0.3967756927013397, "learning_rate": 1.9482721954261247e-05, "loss": 0.344, "step": 5695 }, { "epoch": 0.1029185535068143, "grad_norm": 0.29174667596817017, "learning_rate": 1.948182118374308e-05, "loss": 0.3903, "step": 5700 }, { "epoch": 0.10300883293971501, "grad_norm": 0.42230311036109924, "learning_rate": 1.948091965048134e-05, "loss": 0.2445, "step": 5705 }, { "epoch": 0.10309911237261572, "grad_norm": 0.502432107925415, "learning_rate": 1.9480017354548547e-05, "loss": 0.2777, "step": 5710 }, { "epoch": 0.10318939180551644, "grad_norm": 0.40217089653015137, "learning_rate": 1.9479114296017283e-05, "loss": 0.3711, "step": 5715 }, { "epoch": 0.10327967123841715, "grad_norm": 0.6035251021385193, "learning_rate": 1.9478210474960193e-05, "loss": 0.3647, "step": 5720 }, { "epoch": 0.10336995067131786, "grad_norm": 0.3311302065849304, "learning_rate": 1.9477305891449987e-05, "loss": 0.2223, "step": 5725 }, { "epoch": 0.10346023010421858, "grad_norm": 0.3849733769893646, "learning_rate": 1.9476400545559426e-05, "loss": 0.3043, "step": 5730 }, { "epoch": 0.10355050953711929, "grad_norm": 2.3564114570617676, "learning_rate": 1.947549443736134e-05, "loss": 0.3054, "step": 5735 }, { "epoch": 0.10364078897002, "grad_norm": 0.3749711215496063, "learning_rate": 1.947458756692862e-05, "loss": 0.2085, "step": 5740 }, { "epoch": 0.10373106840292072, "grad_norm": 0.40689751505851746, "learning_rate": 1.947367993433422e-05, "loss": 0.4027, "step": 5745 }, { "epoch": 0.10382134783582143, "grad_norm": 0.434182733297348, "learning_rate": 1.9472771539651147e-05, "loss": 0.337, "step": 5750 }, { "epoch": 0.10391162726872215, "grad_norm": 0.4543397128582001, "learning_rate": 1.947186238295248e-05, "loss": 0.273, "step": 5755 }, { "epoch": 0.10400190670162286, "grad_norm": 0.5350884795188904, "learning_rate": 1.9470952464311348e-05, "loss": 0.3684, "step": 5760 }, { "epoch": 0.10409218613452358, "grad_norm": 0.5613234639167786, "learning_rate": 1.9470041783800956e-05, "loss": 0.3381, "step": 5765 }, { "epoch": 0.10418246556742429, "grad_norm": 0.5730423331260681, "learning_rate": 1.946913034149455e-05, "loss": 0.2829, "step": 5770 }, { "epoch": 0.104272745000325, "grad_norm": 0.524937093257904, "learning_rate": 1.946821813746546e-05, "loss": 0.3181, "step": 5775 }, { "epoch": 0.10436302443322572, "grad_norm": 0.3746713697910309, "learning_rate": 1.946730517178706e-05, "loss": 0.3931, "step": 5780 }, { "epoch": 0.10445330386612643, "grad_norm": 0.6415619850158691, "learning_rate": 1.946639144453279e-05, "loss": 0.3812, "step": 5785 }, { "epoch": 0.10454358329902715, "grad_norm": 0.4309931993484497, "learning_rate": 1.9465476955776158e-05, "loss": 0.2501, "step": 5790 }, { "epoch": 0.10463386273192786, "grad_norm": 0.5179385542869568, "learning_rate": 1.9464561705590727e-05, "loss": 0.3465, "step": 5795 }, { "epoch": 0.10472414216482857, "grad_norm": 0.6764097213745117, "learning_rate": 1.946364569405012e-05, "loss": 0.4371, "step": 5800 }, { "epoch": 0.10481442159772929, "grad_norm": 0.4080183207988739, "learning_rate": 1.9462728921228025e-05, "loss": 0.3404, "step": 5805 }, { "epoch": 0.10490470103063, "grad_norm": 0.45554742217063904, "learning_rate": 1.9461811387198185e-05, "loss": 0.4058, "step": 5810 }, { "epoch": 0.10499498046353072, "grad_norm": 0.4290477931499481, "learning_rate": 1.946089309203442e-05, "loss": 0.3682, "step": 5815 }, { "epoch": 0.10508525989643143, "grad_norm": 0.42258429527282715, "learning_rate": 1.9459974035810587e-05, "loss": 0.3636, "step": 5820 }, { "epoch": 0.10517553932933214, "grad_norm": 0.30179986357688904, "learning_rate": 1.9459054218600622e-05, "loss": 0.404, "step": 5825 }, { "epoch": 0.10526581876223286, "grad_norm": 0.3837675154209137, "learning_rate": 1.945813364047852e-05, "loss": 0.3033, "step": 5830 }, { "epoch": 0.10535609819513357, "grad_norm": 0.4009370505809784, "learning_rate": 1.945721230151834e-05, "loss": 0.2867, "step": 5835 }, { "epoch": 0.10544637762803429, "grad_norm": 0.5982357263565063, "learning_rate": 1.9456290201794186e-05, "loss": 0.3247, "step": 5840 }, { "epoch": 0.105536657060935, "grad_norm": 0.44127723574638367, "learning_rate": 1.945536734138024e-05, "loss": 0.3373, "step": 5845 }, { "epoch": 0.10562693649383573, "grad_norm": 0.42917925119400024, "learning_rate": 1.9454443720350743e-05, "loss": 0.3158, "step": 5850 }, { "epoch": 0.10571721592673644, "grad_norm": 0.39992135763168335, "learning_rate": 1.9453519338779983e-05, "loss": 0.3198, "step": 5855 }, { "epoch": 0.10580749535963715, "grad_norm": 0.31888100504875183, "learning_rate": 1.945259419674233e-05, "loss": 0.4444, "step": 5860 }, { "epoch": 0.10589777479253787, "grad_norm": 0.31540992856025696, "learning_rate": 1.9451668294312197e-05, "loss": 0.3988, "step": 5865 }, { "epoch": 0.10598805422543858, "grad_norm": 0.7145553827285767, "learning_rate": 1.945074163156407e-05, "loss": 0.3445, "step": 5870 }, { "epoch": 0.1060783336583393, "grad_norm": 0.38850831985473633, "learning_rate": 1.9449814208572495e-05, "loss": 0.3442, "step": 5875 }, { "epoch": 0.10616861309124001, "grad_norm": 0.511945903301239, "learning_rate": 1.9448886025412074e-05, "loss": 0.2274, "step": 5880 }, { "epoch": 0.10625889252414072, "grad_norm": 0.4505237638950348, "learning_rate": 1.9447957082157473e-05, "loss": 0.3868, "step": 5885 }, { "epoch": 0.10634917195704144, "grad_norm": 0.36377763748168945, "learning_rate": 1.9447027378883413e-05, "loss": 0.2994, "step": 5890 }, { "epoch": 0.10643945138994215, "grad_norm": 0.4371629059314728, "learning_rate": 1.944609691566469e-05, "loss": 0.3161, "step": 5895 }, { "epoch": 0.10652973082284287, "grad_norm": 0.32444894313812256, "learning_rate": 1.944516569257615e-05, "loss": 0.2538, "step": 5900 }, { "epoch": 0.10662001025574358, "grad_norm": 0.4282093048095703, "learning_rate": 1.9444233709692704e-05, "loss": 0.4371, "step": 5905 }, { "epoch": 0.1067102896886443, "grad_norm": 0.4953030049800873, "learning_rate": 1.944330096708932e-05, "loss": 0.3598, "step": 5910 }, { "epoch": 0.10680056912154501, "grad_norm": 0.5108286142349243, "learning_rate": 1.9442367464841034e-05, "loss": 0.3352, "step": 5915 }, { "epoch": 0.10689084855444572, "grad_norm": 0.44767871499061584, "learning_rate": 1.9441433203022935e-05, "loss": 0.2807, "step": 5920 }, { "epoch": 0.10698112798734644, "grad_norm": 0.4025225341320038, "learning_rate": 1.944049818171019e-05, "loss": 0.3045, "step": 5925 }, { "epoch": 0.10707140742024715, "grad_norm": 0.5402584671974182, "learning_rate": 1.9439562400977996e-05, "loss": 0.3899, "step": 5930 }, { "epoch": 0.10716168685314786, "grad_norm": 0.37331920862197876, "learning_rate": 1.9438625860901644e-05, "loss": 0.3665, "step": 5935 }, { "epoch": 0.10725196628604858, "grad_norm": 0.592233419418335, "learning_rate": 1.9437688561556463e-05, "loss": 0.284, "step": 5940 }, { "epoch": 0.10734224571894929, "grad_norm": 0.5333718657493591, "learning_rate": 1.9436750503017858e-05, "loss": 0.2664, "step": 5945 }, { "epoch": 0.10743252515185, "grad_norm": 0.5840323567390442, "learning_rate": 1.9435811685361284e-05, "loss": 0.348, "step": 5950 }, { "epoch": 0.10752280458475072, "grad_norm": 0.43222957849502563, "learning_rate": 1.943487210866227e-05, "loss": 0.3971, "step": 5955 }, { "epoch": 0.10761308401765143, "grad_norm": 0.2581470012664795, "learning_rate": 1.943393177299639e-05, "loss": 0.3098, "step": 5960 }, { "epoch": 0.10770336345055215, "grad_norm": 0.3755950927734375, "learning_rate": 1.943299067843929e-05, "loss": 0.3571, "step": 5965 }, { "epoch": 0.10779364288345286, "grad_norm": 0.468671590089798, "learning_rate": 1.9432048825066677e-05, "loss": 0.3714, "step": 5970 }, { "epoch": 0.10788392231635358, "grad_norm": 0.7344030141830444, "learning_rate": 1.9431106212954312e-05, "loss": 0.3145, "step": 5975 }, { "epoch": 0.10797420174925429, "grad_norm": 0.36533960700035095, "learning_rate": 1.943016284217802e-05, "loss": 0.2636, "step": 5980 }, { "epoch": 0.108064481182155, "grad_norm": 0.545117199420929, "learning_rate": 1.9429218712813695e-05, "loss": 0.4378, "step": 5985 }, { "epoch": 0.10815476061505572, "grad_norm": 0.3776969611644745, "learning_rate": 1.9428273824937284e-05, "loss": 0.3666, "step": 5990 }, { "epoch": 0.10824504004795643, "grad_norm": 0.36028388142585754, "learning_rate": 1.9427328178624787e-05, "loss": 0.4163, "step": 5995 }, { "epoch": 0.10833531948085715, "grad_norm": 0.44015151262283325, "learning_rate": 1.9426381773952285e-05, "loss": 0.3104, "step": 6000 }, { "epoch": 0.10842559891375786, "grad_norm": 0.4195164442062378, "learning_rate": 1.9425434610995905e-05, "loss": 0.317, "step": 6005 }, { "epoch": 0.10851587834665857, "grad_norm": 0.42691734433174133, "learning_rate": 1.9424486689831844e-05, "loss": 0.2489, "step": 6010 }, { "epoch": 0.10860615777955929, "grad_norm": 0.6212683916091919, "learning_rate": 1.9423538010536343e-05, "loss": 0.3868, "step": 6015 }, { "epoch": 0.10869643721246, "grad_norm": 0.5525115132331848, "learning_rate": 1.942258857318573e-05, "loss": 0.2956, "step": 6020 }, { "epoch": 0.10878671664536071, "grad_norm": 0.28082871437072754, "learning_rate": 1.9421638377856378e-05, "loss": 0.2908, "step": 6025 }, { "epoch": 0.10887699607826143, "grad_norm": 0.46432995796203613, "learning_rate": 1.9420687424624714e-05, "loss": 0.3011, "step": 6030 }, { "epoch": 0.10896727551116214, "grad_norm": 0.3892320990562439, "learning_rate": 1.941973571356725e-05, "loss": 0.289, "step": 6035 }, { "epoch": 0.10905755494406286, "grad_norm": 0.4740193784236908, "learning_rate": 1.941878324476053e-05, "loss": 0.3497, "step": 6040 }, { "epoch": 0.10914783437696358, "grad_norm": 0.4852452576160431, "learning_rate": 1.9417830018281178e-05, "loss": 0.3478, "step": 6045 }, { "epoch": 0.1092381138098643, "grad_norm": 0.44724610447883606, "learning_rate": 1.941687603420588e-05, "loss": 0.3867, "step": 6050 }, { "epoch": 0.10932839324276501, "grad_norm": 0.45649048686027527, "learning_rate": 1.941592129261137e-05, "loss": 0.3203, "step": 6055 }, { "epoch": 0.10941867267566573, "grad_norm": 0.5838635563850403, "learning_rate": 1.9414965793574452e-05, "loss": 0.3297, "step": 6060 }, { "epoch": 0.10950895210856644, "grad_norm": 0.3139464855194092, "learning_rate": 1.941400953717199e-05, "loss": 0.3547, "step": 6065 }, { "epoch": 0.10959923154146715, "grad_norm": 0.3852726221084595, "learning_rate": 1.9413052523480904e-05, "loss": 0.417, "step": 6070 }, { "epoch": 0.10968951097436787, "grad_norm": 0.35865509510040283, "learning_rate": 1.9412094752578186e-05, "loss": 0.3131, "step": 6075 }, { "epoch": 0.10977979040726858, "grad_norm": 0.6715008020401001, "learning_rate": 1.9411136224540875e-05, "loss": 0.3649, "step": 6080 }, { "epoch": 0.1098700698401693, "grad_norm": 0.5033209919929504, "learning_rate": 1.9410176939446082e-05, "loss": 0.375, "step": 6085 }, { "epoch": 0.10996034927307001, "grad_norm": 0.4022418260574341, "learning_rate": 1.940921689737097e-05, "loss": 0.2064, "step": 6090 }, { "epoch": 0.11005062870597072, "grad_norm": 0.39626574516296387, "learning_rate": 1.940825609839278e-05, "loss": 0.3444, "step": 6095 }, { "epoch": 0.11014090813887144, "grad_norm": 0.33554548025131226, "learning_rate": 1.940729454258878e-05, "loss": 0.3455, "step": 6100 }, { "epoch": 0.11023118757177215, "grad_norm": 0.5526272654533386, "learning_rate": 1.9406332230036335e-05, "loss": 0.3254, "step": 6105 }, { "epoch": 0.11032146700467287, "grad_norm": 0.3943984806537628, "learning_rate": 1.9405369160812855e-05, "loss": 0.312, "step": 6110 }, { "epoch": 0.11041174643757358, "grad_norm": 0.4909297823905945, "learning_rate": 1.940440533499581e-05, "loss": 0.272, "step": 6115 }, { "epoch": 0.1105020258704743, "grad_norm": 0.33837640285491943, "learning_rate": 1.940344075266273e-05, "loss": 0.3183, "step": 6120 }, { "epoch": 0.11059230530337501, "grad_norm": 0.3912183940410614, "learning_rate": 1.9402475413891212e-05, "loss": 0.2911, "step": 6125 }, { "epoch": 0.11068258473627572, "grad_norm": 0.5623686909675598, "learning_rate": 1.940150931875891e-05, "loss": 0.4202, "step": 6130 }, { "epoch": 0.11077286416917644, "grad_norm": 0.4066951274871826, "learning_rate": 1.9400542467343537e-05, "loss": 0.3526, "step": 6135 }, { "epoch": 0.11086314360207715, "grad_norm": 0.43217983841896057, "learning_rate": 1.939957485972287e-05, "loss": 0.2482, "step": 6140 }, { "epoch": 0.11095342303497786, "grad_norm": 0.5239768028259277, "learning_rate": 1.9398606495974752e-05, "loss": 0.3912, "step": 6145 }, { "epoch": 0.11104370246787858, "grad_norm": 0.356083482503891, "learning_rate": 1.9397637376177073e-05, "loss": 0.3193, "step": 6150 }, { "epoch": 0.11113398190077929, "grad_norm": 0.4758167564868927, "learning_rate": 1.9396667500407793e-05, "loss": 0.3422, "step": 6155 }, { "epoch": 0.11122426133368, "grad_norm": 0.37192463874816895, "learning_rate": 1.9395696868744936e-05, "loss": 0.3409, "step": 6160 }, { "epoch": 0.11131454076658072, "grad_norm": 0.7285003066062927, "learning_rate": 1.9394725481266578e-05, "loss": 0.3798, "step": 6165 }, { "epoch": 0.11140482019948143, "grad_norm": 0.47070422768592834, "learning_rate": 1.939375333805086e-05, "loss": 0.4061, "step": 6170 }, { "epoch": 0.11149509963238215, "grad_norm": 0.35136669874191284, "learning_rate": 1.939278043917599e-05, "loss": 0.2757, "step": 6175 }, { "epoch": 0.11158537906528286, "grad_norm": 0.5866798758506775, "learning_rate": 1.9391806784720222e-05, "loss": 0.3323, "step": 6180 }, { "epoch": 0.11167565849818357, "grad_norm": 0.41596898436546326, "learning_rate": 1.9390832374761882e-05, "loss": 0.2752, "step": 6185 }, { "epoch": 0.11176593793108429, "grad_norm": 0.21890419721603394, "learning_rate": 1.9389857209379356e-05, "loss": 0.2227, "step": 6190 }, { "epoch": 0.111856217363985, "grad_norm": 0.491132527589798, "learning_rate": 1.938888128865109e-05, "loss": 0.383, "step": 6195 }, { "epoch": 0.11194649679688572, "grad_norm": 0.4853299856185913, "learning_rate": 1.938790461265559e-05, "loss": 0.342, "step": 6200 }, { "epoch": 0.11203677622978643, "grad_norm": 0.49977195262908936, "learning_rate": 1.9386927181471415e-05, "loss": 0.3409, "step": 6205 }, { "epoch": 0.11212705566268714, "grad_norm": 0.37122440338134766, "learning_rate": 1.9385948995177202e-05, "loss": 0.2683, "step": 6210 }, { "epoch": 0.11221733509558786, "grad_norm": 1.641432523727417, "learning_rate": 1.938497005385163e-05, "loss": 0.3658, "step": 6215 }, { "epoch": 0.11230761452848857, "grad_norm": 0.32605621218681335, "learning_rate": 1.9383990357573456e-05, "loss": 0.3598, "step": 6220 }, { "epoch": 0.11239789396138929, "grad_norm": 0.6883606314659119, "learning_rate": 1.938300990642149e-05, "loss": 0.3504, "step": 6225 }, { "epoch": 0.11248817339429, "grad_norm": 0.426334023475647, "learning_rate": 1.9382028700474592e-05, "loss": 0.3739, "step": 6230 }, { "epoch": 0.11257845282719071, "grad_norm": 0.3779202401638031, "learning_rate": 1.93810467398117e-05, "loss": 0.2548, "step": 6235 }, { "epoch": 0.11266873226009143, "grad_norm": 0.8037693500518799, "learning_rate": 1.9380064024511808e-05, "loss": 0.3067, "step": 6240 }, { "epoch": 0.11275901169299216, "grad_norm": 0.5130690932273865, "learning_rate": 1.9379080554653963e-05, "loss": 0.2785, "step": 6245 }, { "epoch": 0.11284929112589287, "grad_norm": 1.2105435132980347, "learning_rate": 1.9378096330317278e-05, "loss": 0.2803, "step": 6250 }, { "epoch": 0.11293957055879358, "grad_norm": 0.36144623160362244, "learning_rate": 1.937711135158093e-05, "loss": 0.375, "step": 6255 }, { "epoch": 0.1130298499916943, "grad_norm": 0.46951165795326233, "learning_rate": 1.937612561852415e-05, "loss": 0.3502, "step": 6260 }, { "epoch": 0.11312012942459501, "grad_norm": 0.37856414914131165, "learning_rate": 1.9375139131226237e-05, "loss": 0.3715, "step": 6265 }, { "epoch": 0.11321040885749573, "grad_norm": 0.5409042835235596, "learning_rate": 1.9374151889766546e-05, "loss": 0.3416, "step": 6270 }, { "epoch": 0.11330068829039644, "grad_norm": 1.3376145362854004, "learning_rate": 1.937316389422449e-05, "loss": 0.3036, "step": 6275 }, { "epoch": 0.11339096772329715, "grad_norm": 0.49774277210235596, "learning_rate": 1.9372175144679552e-05, "loss": 0.2924, "step": 6280 }, { "epoch": 0.11348124715619787, "grad_norm": 0.5464069843292236, "learning_rate": 1.937118564121126e-05, "loss": 0.3233, "step": 6285 }, { "epoch": 0.11357152658909858, "grad_norm": 0.48345452547073364, "learning_rate": 1.937019538389922e-05, "loss": 0.308, "step": 6290 }, { "epoch": 0.1136618060219993, "grad_norm": 0.335178941488266, "learning_rate": 1.9369204372823092e-05, "loss": 0.3727, "step": 6295 }, { "epoch": 0.11375208545490001, "grad_norm": 0.5843726992607117, "learning_rate": 1.936821260806259e-05, "loss": 0.3487, "step": 6300 }, { "epoch": 0.11384236488780072, "grad_norm": 0.48966479301452637, "learning_rate": 1.93672200896975e-05, "loss": 0.4023, "step": 6305 }, { "epoch": 0.11393264432070144, "grad_norm": 0.4158240258693695, "learning_rate": 1.9366226817807656e-05, "loss": 0.2914, "step": 6310 }, { "epoch": 0.11402292375360215, "grad_norm": 0.45833754539489746, "learning_rate": 1.9365232792472966e-05, "loss": 0.2859, "step": 6315 }, { "epoch": 0.11411320318650287, "grad_norm": 0.43124616146087646, "learning_rate": 1.936423801377339e-05, "loss": 0.3819, "step": 6320 }, { "epoch": 0.11420348261940358, "grad_norm": 0.47638633847236633, "learning_rate": 1.936324248178895e-05, "loss": 0.3102, "step": 6325 }, { "epoch": 0.11429376205230429, "grad_norm": 0.47278913855552673, "learning_rate": 1.936224619659973e-05, "loss": 0.2665, "step": 6330 }, { "epoch": 0.114384041485205, "grad_norm": 0.3465459942817688, "learning_rate": 1.9361249158285875e-05, "loss": 0.3992, "step": 6335 }, { "epoch": 0.11447432091810572, "grad_norm": 0.5552494525909424, "learning_rate": 1.9360251366927586e-05, "loss": 0.3767, "step": 6340 }, { "epoch": 0.11456460035100643, "grad_norm": 0.5205835700035095, "learning_rate": 1.9359252822605132e-05, "loss": 0.2317, "step": 6345 }, { "epoch": 0.11465487978390715, "grad_norm": 0.5803459882736206, "learning_rate": 1.9358253525398835e-05, "loss": 0.2835, "step": 6350 }, { "epoch": 0.11474515921680786, "grad_norm": 0.36763742566108704, "learning_rate": 1.9357253475389083e-05, "loss": 0.3426, "step": 6355 }, { "epoch": 0.11483543864970858, "grad_norm": 0.31475019454956055, "learning_rate": 1.9356252672656323e-05, "loss": 0.285, "step": 6360 }, { "epoch": 0.11492571808260929, "grad_norm": 0.4656864106655121, "learning_rate": 1.9355251117281063e-05, "loss": 0.3868, "step": 6365 }, { "epoch": 0.11501599751551, "grad_norm": 0.41800594329833984, "learning_rate": 1.9354248809343867e-05, "loss": 0.2822, "step": 6370 }, { "epoch": 0.11510627694841072, "grad_norm": 0.8545289635658264, "learning_rate": 1.935324574892537e-05, "loss": 0.3509, "step": 6375 }, { "epoch": 0.11519655638131143, "grad_norm": 0.526145875453949, "learning_rate": 1.9352241936106254e-05, "loss": 0.323, "step": 6380 }, { "epoch": 0.11528683581421215, "grad_norm": 0.445852130651474, "learning_rate": 1.9351237370967275e-05, "loss": 0.3624, "step": 6385 }, { "epoch": 0.11537711524711286, "grad_norm": 0.329623818397522, "learning_rate": 1.9350232053589234e-05, "loss": 0.2851, "step": 6390 }, { "epoch": 0.11546739468001357, "grad_norm": 0.5335118174552917, "learning_rate": 1.934922598405301e-05, "loss": 0.37, "step": 6395 }, { "epoch": 0.11555767411291429, "grad_norm": 0.3303772807121277, "learning_rate": 1.9348219162439534e-05, "loss": 0.3581, "step": 6400 }, { "epoch": 0.115647953545815, "grad_norm": 0.4297807514667511, "learning_rate": 1.934721158882979e-05, "loss": 0.2859, "step": 6405 }, { "epoch": 0.11573823297871572, "grad_norm": 0.48351696133613586, "learning_rate": 1.934620326330484e-05, "loss": 0.2975, "step": 6410 }, { "epoch": 0.11582851241161643, "grad_norm": 0.36849284172058105, "learning_rate": 1.9345194185945785e-05, "loss": 0.2877, "step": 6415 }, { "epoch": 0.11591879184451714, "grad_norm": 0.3167666792869568, "learning_rate": 1.9344184356833805e-05, "loss": 0.3535, "step": 6420 }, { "epoch": 0.11600907127741786, "grad_norm": 0.6065095663070679, "learning_rate": 1.9343173776050137e-05, "loss": 0.3139, "step": 6425 }, { "epoch": 0.11609935071031857, "grad_norm": 0.5970287919044495, "learning_rate": 1.9342162443676063e-05, "loss": 0.2853, "step": 6430 }, { "epoch": 0.11618963014321929, "grad_norm": 0.7117475271224976, "learning_rate": 1.9341150359792952e-05, "loss": 0.458, "step": 6435 }, { "epoch": 0.11627990957612001, "grad_norm": 0.42741113901138306, "learning_rate": 1.9340137524482204e-05, "loss": 0.2426, "step": 6440 }, { "epoch": 0.11637018900902073, "grad_norm": 0.5931331515312195, "learning_rate": 1.9339123937825307e-05, "loss": 0.3373, "step": 6445 }, { "epoch": 0.11646046844192144, "grad_norm": 0.3439541459083557, "learning_rate": 1.9338109599903792e-05, "loss": 0.4163, "step": 6450 }, { "epoch": 0.11655074787482216, "grad_norm": 0.42862388491630554, "learning_rate": 1.933709451079925e-05, "loss": 0.3298, "step": 6455 }, { "epoch": 0.11664102730772287, "grad_norm": 0.7009595036506653, "learning_rate": 1.9336078670593344e-05, "loss": 0.3634, "step": 6460 }, { "epoch": 0.11673130674062358, "grad_norm": 0.5495311617851257, "learning_rate": 1.9335062079367787e-05, "loss": 0.2933, "step": 6465 }, { "epoch": 0.1168215861735243, "grad_norm": 0.3590311110019684, "learning_rate": 1.933404473720436e-05, "loss": 0.3476, "step": 6470 }, { "epoch": 0.11691186560642501, "grad_norm": 0.6501250863075256, "learning_rate": 1.9333026644184898e-05, "loss": 0.2524, "step": 6475 }, { "epoch": 0.11700214503932573, "grad_norm": 0.37842854857444763, "learning_rate": 1.93320078003913e-05, "loss": 0.3653, "step": 6480 }, { "epoch": 0.11709242447222644, "grad_norm": 0.43740513920783997, "learning_rate": 1.9330988205905526e-05, "loss": 0.3841, "step": 6485 }, { "epoch": 0.11718270390512715, "grad_norm": 0.2887119650840759, "learning_rate": 1.9329967860809592e-05, "loss": 0.3, "step": 6490 }, { "epoch": 0.11727298333802787, "grad_norm": 0.4963028132915497, "learning_rate": 1.932894676518558e-05, "loss": 0.3703, "step": 6495 }, { "epoch": 0.11736326277092858, "grad_norm": 0.6175485849380493, "learning_rate": 1.9327924919115628e-05, "loss": 0.3587, "step": 6500 }, { "epoch": 0.1174535422038293, "grad_norm": 0.4199815094470978, "learning_rate": 1.932690232268194e-05, "loss": 0.2776, "step": 6505 }, { "epoch": 0.11754382163673001, "grad_norm": 0.3766605257987976, "learning_rate": 1.9325878975966768e-05, "loss": 0.247, "step": 6510 }, { "epoch": 0.11763410106963072, "grad_norm": 0.48902565240859985, "learning_rate": 1.9324854879052442e-05, "loss": 0.2583, "step": 6515 }, { "epoch": 0.11772438050253144, "grad_norm": 0.49462440609931946, "learning_rate": 1.932383003202134e-05, "loss": 0.3483, "step": 6520 }, { "epoch": 0.11781465993543215, "grad_norm": 0.44759970903396606, "learning_rate": 1.93228044349559e-05, "loss": 0.3257, "step": 6525 }, { "epoch": 0.11790493936833286, "grad_norm": 0.503629207611084, "learning_rate": 1.9321778087938627e-05, "loss": 0.2709, "step": 6530 }, { "epoch": 0.11799521880123358, "grad_norm": 0.4044511914253235, "learning_rate": 1.9320750991052085e-05, "loss": 0.3875, "step": 6535 }, { "epoch": 0.11808549823413429, "grad_norm": 0.4957432150840759, "learning_rate": 1.931972314437889e-05, "loss": 0.3982, "step": 6540 }, { "epoch": 0.118175777667035, "grad_norm": 1.0312913656234741, "learning_rate": 1.931869454800173e-05, "loss": 0.3709, "step": 6545 }, { "epoch": 0.11826605709993572, "grad_norm": 0.5594607591629028, "learning_rate": 1.9317665202003352e-05, "loss": 0.4227, "step": 6550 }, { "epoch": 0.11835633653283643, "grad_norm": 0.3540436923503876, "learning_rate": 1.931663510646655e-05, "loss": 0.191, "step": 6555 }, { "epoch": 0.11844661596573715, "grad_norm": 0.36071327328681946, "learning_rate": 1.931560426147419e-05, "loss": 0.4147, "step": 6560 }, { "epoch": 0.11853689539863786, "grad_norm": 0.37677809596061707, "learning_rate": 1.93145726671092e-05, "loss": 0.2762, "step": 6565 }, { "epoch": 0.11862717483153858, "grad_norm": 0.6227388978004456, "learning_rate": 1.9313540323454564e-05, "loss": 0.3244, "step": 6570 }, { "epoch": 0.11871745426443929, "grad_norm": 0.4217793643474579, "learning_rate": 1.9312507230593327e-05, "loss": 0.3036, "step": 6575 }, { "epoch": 0.11880773369734, "grad_norm": 0.6171652674674988, "learning_rate": 1.9311473388608588e-05, "loss": 0.3221, "step": 6580 }, { "epoch": 0.11889801313024072, "grad_norm": 0.6095660924911499, "learning_rate": 1.931043879758352e-05, "loss": 0.239, "step": 6585 }, { "epoch": 0.11898829256314143, "grad_norm": 0.34578561782836914, "learning_rate": 1.930940345760134e-05, "loss": 0.3711, "step": 6590 }, { "epoch": 0.11907857199604215, "grad_norm": 0.4061829149723053, "learning_rate": 1.9308367368745337e-05, "loss": 0.4484, "step": 6595 }, { "epoch": 0.11916885142894286, "grad_norm": 0.6296597719192505, "learning_rate": 1.930733053109886e-05, "loss": 0.2457, "step": 6600 }, { "epoch": 0.11925913086184357, "grad_norm": 0.7155535817146301, "learning_rate": 1.9306292944745313e-05, "loss": 0.3223, "step": 6605 }, { "epoch": 0.11934941029474429, "grad_norm": 0.4064818322658539, "learning_rate": 1.930525460976816e-05, "loss": 0.2874, "step": 6610 }, { "epoch": 0.119439689727645, "grad_norm": 0.3748913109302521, "learning_rate": 1.930421552625093e-05, "loss": 0.2791, "step": 6615 }, { "epoch": 0.11952996916054572, "grad_norm": 0.5190522074699402, "learning_rate": 1.9303175694277213e-05, "loss": 0.4314, "step": 6620 }, { "epoch": 0.11962024859344643, "grad_norm": 0.4803234040737152, "learning_rate": 1.9302135113930646e-05, "loss": 0.2915, "step": 6625 }, { "epoch": 0.11971052802634714, "grad_norm": 0.33393678069114685, "learning_rate": 1.9301093785294943e-05, "loss": 0.4128, "step": 6630 }, { "epoch": 0.11980080745924786, "grad_norm": 0.4914097189903259, "learning_rate": 1.9300051708453872e-05, "loss": 0.2867, "step": 6635 }, { "epoch": 0.11989108689214859, "grad_norm": 1.1089816093444824, "learning_rate": 1.9299008883491263e-05, "loss": 0.2758, "step": 6640 }, { "epoch": 0.1199813663250493, "grad_norm": 0.34248292446136475, "learning_rate": 1.9297965310490994e-05, "loss": 0.3728, "step": 6645 }, { "epoch": 0.12007164575795001, "grad_norm": 0.34159940481185913, "learning_rate": 1.929692098953702e-05, "loss": 0.247, "step": 6650 }, { "epoch": 0.12016192519085073, "grad_norm": 0.3600628972053528, "learning_rate": 1.929587592071335e-05, "loss": 0.3114, "step": 6655 }, { "epoch": 0.12025220462375144, "grad_norm": 0.4529002606868744, "learning_rate": 1.9294830104104045e-05, "loss": 0.3179, "step": 6660 }, { "epoch": 0.12034248405665215, "grad_norm": 0.39672356843948364, "learning_rate": 1.929378353979324e-05, "loss": 0.3081, "step": 6665 }, { "epoch": 0.12043276348955287, "grad_norm": 0.35251498222351074, "learning_rate": 1.929273622786512e-05, "loss": 0.2415, "step": 6670 }, { "epoch": 0.12052304292245358, "grad_norm": 1.1927274465560913, "learning_rate": 1.929168816840394e-05, "loss": 0.2911, "step": 6675 }, { "epoch": 0.1206133223553543, "grad_norm": 0.5608368515968323, "learning_rate": 1.9290639361494002e-05, "loss": 0.3454, "step": 6680 }, { "epoch": 0.12070360178825501, "grad_norm": 0.712186336517334, "learning_rate": 1.9289589807219674e-05, "loss": 0.3953, "step": 6685 }, { "epoch": 0.12079388122115572, "grad_norm": 0.849686861038208, "learning_rate": 1.928853950566539e-05, "loss": 0.3562, "step": 6690 }, { "epoch": 0.12088416065405644, "grad_norm": 0.7094193696975708, "learning_rate": 1.928748845691564e-05, "loss": 0.3219, "step": 6695 }, { "epoch": 0.12097444008695715, "grad_norm": 0.5030642747879028, "learning_rate": 1.9286436661054965e-05, "loss": 0.2835, "step": 6700 }, { "epoch": 0.12106471951985787, "grad_norm": 0.36728736758232117, "learning_rate": 1.9285384118167984e-05, "loss": 0.389, "step": 6705 }, { "epoch": 0.12115499895275858, "grad_norm": 0.6199520826339722, "learning_rate": 1.928433082833936e-05, "loss": 0.313, "step": 6710 }, { "epoch": 0.1212452783856593, "grad_norm": 0.6002387404441833, "learning_rate": 1.928327679165382e-05, "loss": 0.3059, "step": 6715 }, { "epoch": 0.12133555781856001, "grad_norm": 0.8976176381111145, "learning_rate": 1.9282222008196165e-05, "loss": 0.2992, "step": 6720 }, { "epoch": 0.12142583725146072, "grad_norm": 0.3749474585056305, "learning_rate": 1.9281166478051234e-05, "loss": 0.3149, "step": 6725 }, { "epoch": 0.12151611668436144, "grad_norm": 0.4321290850639343, "learning_rate": 1.9280110201303942e-05, "loss": 0.3096, "step": 6730 }, { "epoch": 0.12160639611726215, "grad_norm": 0.4031892716884613, "learning_rate": 1.9279053178039256e-05, "loss": 0.4123, "step": 6735 }, { "epoch": 0.12169667555016286, "grad_norm": 0.28211620450019836, "learning_rate": 1.927799540834221e-05, "loss": 0.3758, "step": 6740 }, { "epoch": 0.12178695498306358, "grad_norm": 0.6331525444984436, "learning_rate": 1.9276936892297887e-05, "loss": 0.2116, "step": 6745 }, { "epoch": 0.12187723441596429, "grad_norm": 0.44513705372810364, "learning_rate": 1.9275877629991444e-05, "loss": 0.2429, "step": 6750 }, { "epoch": 0.121967513848865, "grad_norm": 0.4318163990974426, "learning_rate": 1.9274817621508088e-05, "loss": 0.3567, "step": 6755 }, { "epoch": 0.12205779328176572, "grad_norm": 0.33108144998550415, "learning_rate": 1.9273756866933083e-05, "loss": 0.2401, "step": 6760 }, { "epoch": 0.12214807271466643, "grad_norm": 0.3258655369281769, "learning_rate": 1.927269536635177e-05, "loss": 0.2845, "step": 6765 }, { "epoch": 0.12223835214756715, "grad_norm": 0.48052114248275757, "learning_rate": 1.9271633119849535e-05, "loss": 0.3305, "step": 6770 }, { "epoch": 0.12232863158046786, "grad_norm": 0.4707503020763397, "learning_rate": 1.927057012751182e-05, "loss": 0.3054, "step": 6775 }, { "epoch": 0.12241891101336858, "grad_norm": 0.7184447050094604, "learning_rate": 1.926950638942415e-05, "loss": 0.2975, "step": 6780 }, { "epoch": 0.12250919044626929, "grad_norm": 0.3022684156894684, "learning_rate": 1.9268441905672078e-05, "loss": 0.3706, "step": 6785 }, { "epoch": 0.12259946987917, "grad_norm": 0.46065765619277954, "learning_rate": 1.9267376676341252e-05, "loss": 0.3397, "step": 6790 }, { "epoch": 0.12268974931207072, "grad_norm": 2.153079032897949, "learning_rate": 1.926631070151735e-05, "loss": 0.3677, "step": 6795 }, { "epoch": 0.12278002874497143, "grad_norm": 0.33272886276245117, "learning_rate": 1.9265243981286122e-05, "loss": 0.293, "step": 6800 }, { "epoch": 0.12287030817787215, "grad_norm": 0.6385965347290039, "learning_rate": 1.926417651573338e-05, "loss": 0.323, "step": 6805 }, { "epoch": 0.12296058761077286, "grad_norm": 0.503334105014801, "learning_rate": 1.9263108304944994e-05, "loss": 0.2452, "step": 6810 }, { "epoch": 0.12305086704367357, "grad_norm": 0.3952818810939789, "learning_rate": 1.92620393490069e-05, "loss": 0.2221, "step": 6815 }, { "epoch": 0.12314114647657429, "grad_norm": 0.39027824997901917, "learning_rate": 1.9260969648005076e-05, "loss": 0.31, "step": 6820 }, { "epoch": 0.123231425909475, "grad_norm": 0.6210625767707825, "learning_rate": 1.925989920202558e-05, "loss": 0.3762, "step": 6825 }, { "epoch": 0.12332170534237571, "grad_norm": 0.5680192708969116, "learning_rate": 1.925882801115452e-05, "loss": 0.3463, "step": 6830 }, { "epoch": 0.12341198477527643, "grad_norm": 0.29992368817329407, "learning_rate": 1.9257756075478063e-05, "loss": 0.2801, "step": 6835 }, { "epoch": 0.12350226420817716, "grad_norm": 0.31016963720321655, "learning_rate": 1.925668339508244e-05, "loss": 0.2648, "step": 6840 }, { "epoch": 0.12359254364107787, "grad_norm": 0.5393102765083313, "learning_rate": 1.9255609970053943e-05, "loss": 0.3645, "step": 6845 }, { "epoch": 0.12368282307397858, "grad_norm": 0.48195183277130127, "learning_rate": 1.9254535800478918e-05, "loss": 0.354, "step": 6850 }, { "epoch": 0.1237731025068793, "grad_norm": 0.44904544949531555, "learning_rate": 1.9253460886443773e-05, "loss": 0.327, "step": 6855 }, { "epoch": 0.12386338193978001, "grad_norm": 0.46062836050987244, "learning_rate": 1.925238522803498e-05, "loss": 0.358, "step": 6860 }, { "epoch": 0.12395366137268073, "grad_norm": 0.38590455055236816, "learning_rate": 1.9251308825339063e-05, "loss": 0.3627, "step": 6865 }, { "epoch": 0.12404394080558144, "grad_norm": 0.6193512082099915, "learning_rate": 1.925023167844262e-05, "loss": 0.3356, "step": 6870 }, { "epoch": 0.12413422023848215, "grad_norm": 0.4015476703643799, "learning_rate": 1.9249153787432294e-05, "loss": 0.3249, "step": 6875 }, { "epoch": 0.12422449967138287, "grad_norm": 0.5982859134674072, "learning_rate": 1.924807515239479e-05, "loss": 0.2964, "step": 6880 }, { "epoch": 0.12431477910428358, "grad_norm": 0.34201470017433167, "learning_rate": 1.924699577341688e-05, "loss": 0.3193, "step": 6885 }, { "epoch": 0.1244050585371843, "grad_norm": 0.5029416084289551, "learning_rate": 1.9245915650585396e-05, "loss": 0.3764, "step": 6890 }, { "epoch": 0.12449533797008501, "grad_norm": 0.5241310000419617, "learning_rate": 1.924483478398722e-05, "loss": 0.4296, "step": 6895 }, { "epoch": 0.12458561740298572, "grad_norm": 0.7433327436447144, "learning_rate": 1.9243753173709302e-05, "loss": 0.4092, "step": 6900 }, { "epoch": 0.12467589683588644, "grad_norm": 0.3439296782016754, "learning_rate": 1.924267081983865e-05, "loss": 0.2702, "step": 6905 }, { "epoch": 0.12476617626878715, "grad_norm": 0.3778502345085144, "learning_rate": 1.924158772246233e-05, "loss": 0.3099, "step": 6910 }, { "epoch": 0.12485645570168787, "grad_norm": 0.44551828503608704, "learning_rate": 1.9240503881667472e-05, "loss": 0.3896, "step": 6915 }, { "epoch": 0.12494673513458858, "grad_norm": 0.5756784677505493, "learning_rate": 1.923941929754126e-05, "loss": 0.389, "step": 6920 }, { "epoch": 0.12503701456748928, "grad_norm": 0.624144971370697, "learning_rate": 1.9238333970170947e-05, "loss": 0.2913, "step": 6925 }, { "epoch": 0.12512729400039, "grad_norm": 0.6104403734207153, "learning_rate": 1.923724789964383e-05, "loss": 0.3229, "step": 6930 }, { "epoch": 0.12521757343329074, "grad_norm": 0.32302334904670715, "learning_rate": 1.9236161086047286e-05, "loss": 0.2511, "step": 6935 }, { "epoch": 0.12530785286619145, "grad_norm": 0.28354236483573914, "learning_rate": 1.923507352946873e-05, "loss": 0.328, "step": 6940 }, { "epoch": 0.12539813229909216, "grad_norm": 0.5260065793991089, "learning_rate": 1.9233985229995657e-05, "loss": 0.3043, "step": 6945 }, { "epoch": 0.12548841173199288, "grad_norm": 0.5684165358543396, "learning_rate": 1.923289618771561e-05, "loss": 0.3425, "step": 6950 }, { "epoch": 0.1255786911648936, "grad_norm": 0.4093371033668518, "learning_rate": 1.9231806402716195e-05, "loss": 0.3634, "step": 6955 }, { "epoch": 0.1256689705977943, "grad_norm": 0.4712226986885071, "learning_rate": 1.9230715875085076e-05, "loss": 0.2982, "step": 6960 }, { "epoch": 0.12575925003069502, "grad_norm": 0.3231732249259949, "learning_rate": 1.9229624604909977e-05, "loss": 0.2779, "step": 6965 }, { "epoch": 0.12584952946359573, "grad_norm": 0.4267129600048065, "learning_rate": 1.9228532592278686e-05, "loss": 0.2812, "step": 6970 }, { "epoch": 0.12593980889649645, "grad_norm": 0.4321097731590271, "learning_rate": 1.9227439837279046e-05, "loss": 0.3731, "step": 6975 }, { "epoch": 0.12603008832939716, "grad_norm": 0.4453035295009613, "learning_rate": 1.9226346339998962e-05, "loss": 0.3764, "step": 6980 }, { "epoch": 0.12612036776229787, "grad_norm": 0.5288938283920288, "learning_rate": 1.9225252100526394e-05, "loss": 0.3277, "step": 6985 }, { "epoch": 0.1262106471951986, "grad_norm": 0.3053990304470062, "learning_rate": 1.9224157118949373e-05, "loss": 0.2816, "step": 6990 }, { "epoch": 0.1263009266280993, "grad_norm": 0.4477319121360779, "learning_rate": 1.9223061395355975e-05, "loss": 0.3374, "step": 6995 }, { "epoch": 0.12639120606100002, "grad_norm": 0.32137957215309143, "learning_rate": 1.9221964929834343e-05, "loss": 0.3038, "step": 7000 }, { "epoch": 0.12648148549390073, "grad_norm": 0.37034428119659424, "learning_rate": 1.9220867722472687e-05, "loss": 0.3287, "step": 7005 }, { "epoch": 0.12657176492680144, "grad_norm": 0.4952510893344879, "learning_rate": 1.9219769773359265e-05, "loss": 0.2908, "step": 7010 }, { "epoch": 0.12666204435970216, "grad_norm": 0.30036431550979614, "learning_rate": 1.9218671082582396e-05, "loss": 0.3113, "step": 7015 }, { "epoch": 0.12675232379260287, "grad_norm": 0.41781020164489746, "learning_rate": 1.9217571650230464e-05, "loss": 0.3724, "step": 7020 }, { "epoch": 0.1268426032255036, "grad_norm": 0.4511464238166809, "learning_rate": 1.9216471476391915e-05, "loss": 0.2724, "step": 7025 }, { "epoch": 0.1269328826584043, "grad_norm": 0.32684236764907837, "learning_rate": 1.9215370561155245e-05, "loss": 0.3578, "step": 7030 }, { "epoch": 0.12702316209130501, "grad_norm": 0.38256338238716125, "learning_rate": 1.9214268904609015e-05, "loss": 0.2174, "step": 7035 }, { "epoch": 0.12711344152420573, "grad_norm": 0.5053243637084961, "learning_rate": 1.9213166506841844e-05, "loss": 0.2677, "step": 7040 }, { "epoch": 0.12720372095710644, "grad_norm": 0.3353807330131531, "learning_rate": 1.9212063367942416e-05, "loss": 0.2603, "step": 7045 }, { "epoch": 0.12729400039000716, "grad_norm": 0.4773527979850769, "learning_rate": 1.9210959487999466e-05, "loss": 0.3303, "step": 7050 }, { "epoch": 0.12738427982290787, "grad_norm": 0.962485134601593, "learning_rate": 1.92098548671018e-05, "loss": 0.323, "step": 7055 }, { "epoch": 0.12747455925580858, "grad_norm": 0.31148481369018555, "learning_rate": 1.9208749505338266e-05, "loss": 0.3649, "step": 7060 }, { "epoch": 0.1275648386887093, "grad_norm": 0.41241616010665894, "learning_rate": 1.920764340279779e-05, "loss": 0.3506, "step": 7065 }, { "epoch": 0.12765511812161, "grad_norm": 0.5747043490409851, "learning_rate": 1.920653655956935e-05, "loss": 0.3686, "step": 7070 }, { "epoch": 0.12774539755451073, "grad_norm": 0.4869537055492401, "learning_rate": 1.9205428975741983e-05, "loss": 0.3266, "step": 7075 }, { "epoch": 0.12783567698741144, "grad_norm": 0.6491648554801941, "learning_rate": 1.9204320651404782e-05, "loss": 0.3375, "step": 7080 }, { "epoch": 0.12792595642031215, "grad_norm": 0.3836076259613037, "learning_rate": 1.9203211586646908e-05, "loss": 0.317, "step": 7085 }, { "epoch": 0.12801623585321287, "grad_norm": 0.4446025490760803, "learning_rate": 1.9202101781557575e-05, "loss": 0.3376, "step": 7090 }, { "epoch": 0.12810651528611358, "grad_norm": 0.43301495909690857, "learning_rate": 1.9200991236226057e-05, "loss": 0.3474, "step": 7095 }, { "epoch": 0.1281967947190143, "grad_norm": 0.5248858332633972, "learning_rate": 1.9199879950741698e-05, "loss": 0.3209, "step": 7100 }, { "epoch": 0.128287074151915, "grad_norm": 0.5935579538345337, "learning_rate": 1.919876792519388e-05, "loss": 0.3652, "step": 7105 }, { "epoch": 0.12837735358481572, "grad_norm": 0.3689795732498169, "learning_rate": 1.9197655159672068e-05, "loss": 0.3509, "step": 7110 }, { "epoch": 0.12846763301771644, "grad_norm": 0.44201385974884033, "learning_rate": 1.919654165426577e-05, "loss": 0.2219, "step": 7115 }, { "epoch": 0.12855791245061715, "grad_norm": 0.4781646430492401, "learning_rate": 1.9195427409064563e-05, "loss": 0.2743, "step": 7120 }, { "epoch": 0.12864819188351787, "grad_norm": 0.3723335862159729, "learning_rate": 1.9194312424158077e-05, "loss": 0.3604, "step": 7125 }, { "epoch": 0.12873847131641858, "grad_norm": 0.43962472677230835, "learning_rate": 1.9193196699636004e-05, "loss": 0.3219, "step": 7130 }, { "epoch": 0.1288287507493193, "grad_norm": 0.5739518404006958, "learning_rate": 1.91920802355881e-05, "loss": 0.3676, "step": 7135 }, { "epoch": 0.12891903018222, "grad_norm": 0.4589199721813202, "learning_rate": 1.9190963032104173e-05, "loss": 0.2633, "step": 7140 }, { "epoch": 0.12900930961512072, "grad_norm": 0.7155197262763977, "learning_rate": 1.9189845089274095e-05, "loss": 0.406, "step": 7145 }, { "epoch": 0.12909958904802143, "grad_norm": 0.5128468871116638, "learning_rate": 1.9188726407187795e-05, "loss": 0.3197, "step": 7150 }, { "epoch": 0.12918986848092215, "grad_norm": 0.29664355516433716, "learning_rate": 1.9187606985935267e-05, "loss": 0.3227, "step": 7155 }, { "epoch": 0.12928014791382286, "grad_norm": 0.4465535581111908, "learning_rate": 1.9186486825606553e-05, "loss": 0.3069, "step": 7160 }, { "epoch": 0.12937042734672358, "grad_norm": 0.5476791262626648, "learning_rate": 1.918536592629177e-05, "loss": 0.3073, "step": 7165 }, { "epoch": 0.1294607067796243, "grad_norm": 0.47054997086524963, "learning_rate": 1.918424428808108e-05, "loss": 0.2936, "step": 7170 }, { "epoch": 0.129550986212525, "grad_norm": 0.8305136561393738, "learning_rate": 1.9183121911064715e-05, "loss": 0.3147, "step": 7175 }, { "epoch": 0.12964126564542572, "grad_norm": 0.5708081126213074, "learning_rate": 1.9181998795332958e-05, "loss": 0.2975, "step": 7180 }, { "epoch": 0.12973154507832643, "grad_norm": 0.31832578778266907, "learning_rate": 1.918087494097616e-05, "loss": 0.3107, "step": 7185 }, { "epoch": 0.12982182451122715, "grad_norm": 0.8390007019042969, "learning_rate": 1.917975034808472e-05, "loss": 0.3815, "step": 7190 }, { "epoch": 0.12991210394412786, "grad_norm": 0.37863609194755554, "learning_rate": 1.917862501674911e-05, "loss": 0.3383, "step": 7195 }, { "epoch": 0.13000238337702857, "grad_norm": 0.44151777029037476, "learning_rate": 1.9177498947059856e-05, "loss": 0.281, "step": 7200 }, { "epoch": 0.1300926628099293, "grad_norm": 0.47824159264564514, "learning_rate": 1.917637213910753e-05, "loss": 0.3079, "step": 7205 }, { "epoch": 0.13018294224283, "grad_norm": 0.37554118037223816, "learning_rate": 1.9175244592982795e-05, "loss": 0.3497, "step": 7210 }, { "epoch": 0.13027322167573072, "grad_norm": 0.5599135160446167, "learning_rate": 1.9174116308776337e-05, "loss": 0.2656, "step": 7215 }, { "epoch": 0.13036350110863143, "grad_norm": 0.44534119963645935, "learning_rate": 1.9172987286578922e-05, "loss": 0.3264, "step": 7220 }, { "epoch": 0.13045378054153214, "grad_norm": 0.3763585388660431, "learning_rate": 1.917185752648138e-05, "loss": 0.2163, "step": 7225 }, { "epoch": 0.13054405997443286, "grad_norm": 0.3645566701889038, "learning_rate": 1.9170727028574582e-05, "loss": 0.3497, "step": 7230 }, { "epoch": 0.13063433940733357, "grad_norm": 0.36997899413108826, "learning_rate": 1.916959579294947e-05, "loss": 0.3257, "step": 7235 }, { "epoch": 0.13072461884023429, "grad_norm": 0.5548878312110901, "learning_rate": 1.916846381969705e-05, "loss": 0.3302, "step": 7240 }, { "epoch": 0.130814898273135, "grad_norm": 0.39686310291290283, "learning_rate": 1.9167331108908376e-05, "loss": 0.277, "step": 7245 }, { "epoch": 0.1309051777060357, "grad_norm": 0.3491361439228058, "learning_rate": 1.9166197660674567e-05, "loss": 0.2826, "step": 7250 }, { "epoch": 0.13099545713893643, "grad_norm": 0.42074933648109436, "learning_rate": 1.9165063475086797e-05, "loss": 0.3404, "step": 7255 }, { "epoch": 0.13108573657183714, "grad_norm": 0.46928852796554565, "learning_rate": 1.9163928552236314e-05, "loss": 0.3195, "step": 7260 }, { "epoch": 0.13117601600473786, "grad_norm": 0.37827062606811523, "learning_rate": 1.91627928922144e-05, "loss": 0.3645, "step": 7265 }, { "epoch": 0.13126629543763857, "grad_norm": 0.442892849445343, "learning_rate": 1.9161656495112424e-05, "loss": 0.3934, "step": 7270 }, { "epoch": 0.13135657487053928, "grad_norm": 0.3846202790737152, "learning_rate": 1.916051936102179e-05, "loss": 0.2378, "step": 7275 }, { "epoch": 0.13144685430344, "grad_norm": 0.3885536193847656, "learning_rate": 1.915938149003398e-05, "loss": 0.193, "step": 7280 }, { "epoch": 0.1315371337363407, "grad_norm": 0.3747337758541107, "learning_rate": 1.9158242882240523e-05, "loss": 0.3629, "step": 7285 }, { "epoch": 0.13162741316924143, "grad_norm": 0.3291547894477844, "learning_rate": 1.9157103537733013e-05, "loss": 0.3123, "step": 7290 }, { "epoch": 0.13171769260214214, "grad_norm": 0.3559887707233429, "learning_rate": 1.91559634566031e-05, "loss": 0.3227, "step": 7295 }, { "epoch": 0.13180797203504285, "grad_norm": 0.4530769884586334, "learning_rate": 1.91548226389425e-05, "loss": 0.3168, "step": 7300 }, { "epoch": 0.13189825146794357, "grad_norm": 0.38989293575286865, "learning_rate": 1.915368108484298e-05, "loss": 0.2548, "step": 7305 }, { "epoch": 0.13198853090084428, "grad_norm": 0.5381086468696594, "learning_rate": 1.9152538794396372e-05, "loss": 0.3533, "step": 7310 }, { "epoch": 0.132078810333745, "grad_norm": 0.5024468898773193, "learning_rate": 1.9151395767694563e-05, "loss": 0.3122, "step": 7315 }, { "epoch": 0.1321690897666457, "grad_norm": 0.5017898082733154, "learning_rate": 1.9150252004829498e-05, "loss": 0.2025, "step": 7320 }, { "epoch": 0.13225936919954642, "grad_norm": 0.46784597635269165, "learning_rate": 1.914910750589319e-05, "loss": 0.3269, "step": 7325 }, { "epoch": 0.13234964863244714, "grad_norm": 1.0234980583190918, "learning_rate": 1.9147962270977705e-05, "loss": 0.3999, "step": 7330 }, { "epoch": 0.13243992806534788, "grad_norm": 0.3947860598564148, "learning_rate": 1.9146816300175166e-05, "loss": 0.3523, "step": 7335 }, { "epoch": 0.1325302074982486, "grad_norm": 0.7022761702537537, "learning_rate": 1.914566959357776e-05, "loss": 0.3815, "step": 7340 }, { "epoch": 0.1326204869311493, "grad_norm": 0.4152867794036865, "learning_rate": 1.914452215127773e-05, "loss": 0.2709, "step": 7345 }, { "epoch": 0.13271076636405002, "grad_norm": 0.5031547546386719, "learning_rate": 1.914337397336738e-05, "loss": 0.2726, "step": 7350 }, { "epoch": 0.13280104579695073, "grad_norm": 0.5867427587509155, "learning_rate": 1.9142225059939073e-05, "loss": 0.2936, "step": 7355 }, { "epoch": 0.13289132522985145, "grad_norm": 0.49512407183647156, "learning_rate": 1.914107541108523e-05, "loss": 0.3274, "step": 7360 }, { "epoch": 0.13298160466275216, "grad_norm": 0.4875224828720093, "learning_rate": 1.9139925026898335e-05, "loss": 0.3373, "step": 7365 }, { "epoch": 0.13307188409565288, "grad_norm": 0.3881199359893799, "learning_rate": 1.913877390747092e-05, "loss": 0.2481, "step": 7370 }, { "epoch": 0.1331621635285536, "grad_norm": 0.5740962028503418, "learning_rate": 1.9137622052895593e-05, "loss": 0.3268, "step": 7375 }, { "epoch": 0.1332524429614543, "grad_norm": 0.39869681000709534, "learning_rate": 1.913646946326501e-05, "loss": 0.3082, "step": 7380 }, { "epoch": 0.13334272239435502, "grad_norm": 0.41652965545654297, "learning_rate": 1.9135316138671884e-05, "loss": 0.4155, "step": 7385 }, { "epoch": 0.13343300182725573, "grad_norm": 0.5283089280128479, "learning_rate": 1.9134162079208995e-05, "loss": 0.2462, "step": 7390 }, { "epoch": 0.13352328126015645, "grad_norm": 0.46559688448905945, "learning_rate": 1.9133007284969182e-05, "loss": 0.3209, "step": 7395 }, { "epoch": 0.13361356069305716, "grad_norm": 0.6211048364639282, "learning_rate": 1.9131851756045334e-05, "loss": 0.2968, "step": 7400 }, { "epoch": 0.13370384012595787, "grad_norm": 0.45277780294418335, "learning_rate": 1.9130695492530403e-05, "loss": 0.3602, "step": 7405 }, { "epoch": 0.1337941195588586, "grad_norm": 0.5387540459632874, "learning_rate": 1.912953849451741e-05, "loss": 0.3366, "step": 7410 }, { "epoch": 0.1338843989917593, "grad_norm": 0.44352853298187256, "learning_rate": 1.9128380762099422e-05, "loss": 0.3103, "step": 7415 }, { "epoch": 0.13397467842466002, "grad_norm": 0.5637753009796143, "learning_rate": 1.9127222295369576e-05, "loss": 0.4029, "step": 7420 }, { "epoch": 0.13406495785756073, "grad_norm": 0.36902207136154175, "learning_rate": 1.9126063094421055e-05, "loss": 0.3426, "step": 7425 }, { "epoch": 0.13415523729046144, "grad_norm": 0.2464890480041504, "learning_rate": 1.912490315934711e-05, "loss": 0.2659, "step": 7430 }, { "epoch": 0.13424551672336216, "grad_norm": 0.454995721578598, "learning_rate": 1.912374249024105e-05, "loss": 0.3515, "step": 7435 }, { "epoch": 0.13433579615626287, "grad_norm": 0.4358680546283722, "learning_rate": 1.9122581087196245e-05, "loss": 0.3289, "step": 7440 }, { "epoch": 0.13442607558916359, "grad_norm": 0.4596993923187256, "learning_rate": 1.9121418950306117e-05, "loss": 0.2327, "step": 7445 }, { "epoch": 0.1345163550220643, "grad_norm": 0.5084929466247559, "learning_rate": 1.9120256079664154e-05, "loss": 0.3468, "step": 7450 }, { "epoch": 0.134606634454965, "grad_norm": 0.3656415343284607, "learning_rate": 1.9119092475363902e-05, "loss": 0.27, "step": 7455 }, { "epoch": 0.13469691388786573, "grad_norm": 0.40600907802581787, "learning_rate": 1.911792813749896e-05, "loss": 0.1852, "step": 7460 }, { "epoch": 0.13478719332076644, "grad_norm": 0.42754778265953064, "learning_rate": 1.9116763066162992e-05, "loss": 0.3033, "step": 7465 }, { "epoch": 0.13487747275366715, "grad_norm": 0.43957412242889404, "learning_rate": 1.911559726144972e-05, "loss": 0.2942, "step": 7470 }, { "epoch": 0.13496775218656787, "grad_norm": 0.3917752802371979, "learning_rate": 1.911443072345293e-05, "loss": 0.3116, "step": 7475 }, { "epoch": 0.13505803161946858, "grad_norm": 0.6572051048278809, "learning_rate": 1.9113263452266457e-05, "loss": 0.3136, "step": 7480 }, { "epoch": 0.1351483110523693, "grad_norm": 0.4349372386932373, "learning_rate": 1.9112095447984195e-05, "loss": 0.3361, "step": 7485 }, { "epoch": 0.13523859048527, "grad_norm": 0.43233203887939453, "learning_rate": 1.9110926710700106e-05, "loss": 0.3134, "step": 7490 }, { "epoch": 0.13532886991817072, "grad_norm": 0.6694607138633728, "learning_rate": 1.910975724050821e-05, "loss": 0.3164, "step": 7495 }, { "epoch": 0.13541914935107144, "grad_norm": 0.5502925515174866, "learning_rate": 1.9108587037502573e-05, "loss": 0.3057, "step": 7500 }, { "epoch": 0.13550942878397215, "grad_norm": 0.508446216583252, "learning_rate": 1.9107416101777337e-05, "loss": 0.3227, "step": 7505 }, { "epoch": 0.13559970821687287, "grad_norm": 0.4535052180290222, "learning_rate": 1.9106244433426692e-05, "loss": 0.2741, "step": 7510 }, { "epoch": 0.13568998764977358, "grad_norm": 0.4868471324443817, "learning_rate": 1.910507203254489e-05, "loss": 0.2351, "step": 7515 }, { "epoch": 0.1357802670826743, "grad_norm": 0.9161044955253601, "learning_rate": 1.9103898899226246e-05, "loss": 0.3767, "step": 7520 }, { "epoch": 0.135870546515575, "grad_norm": 0.5390794277191162, "learning_rate": 1.9102725033565125e-05, "loss": 0.3531, "step": 7525 }, { "epoch": 0.13596082594847572, "grad_norm": 0.42803600430488586, "learning_rate": 1.9101550435655964e-05, "loss": 0.4034, "step": 7530 }, { "epoch": 0.13605110538137644, "grad_norm": 0.4380834102630615, "learning_rate": 1.9100375105593237e-05, "loss": 0.3925, "step": 7535 }, { "epoch": 0.13614138481427715, "grad_norm": 0.53178870677948, "learning_rate": 1.9099199043471505e-05, "loss": 0.26, "step": 7540 }, { "epoch": 0.13623166424717786, "grad_norm": 0.4513871669769287, "learning_rate": 1.9098022249385364e-05, "loss": 0.2941, "step": 7545 }, { "epoch": 0.13632194368007858, "grad_norm": 0.701305091381073, "learning_rate": 1.9096844723429485e-05, "loss": 0.49, "step": 7550 }, { "epoch": 0.1364122231129793, "grad_norm": 0.37477660179138184, "learning_rate": 1.9095666465698587e-05, "loss": 0.3307, "step": 7555 }, { "epoch": 0.13650250254588, "grad_norm": 0.39110666513442993, "learning_rate": 1.9094487476287452e-05, "loss": 0.3496, "step": 7560 }, { "epoch": 0.13659278197878072, "grad_norm": 0.3477240800857544, "learning_rate": 1.9093307755290925e-05, "loss": 0.4175, "step": 7565 }, { "epoch": 0.13668306141168143, "grad_norm": 0.4026183784008026, "learning_rate": 1.90921273028039e-05, "loss": 0.25, "step": 7570 }, { "epoch": 0.13677334084458215, "grad_norm": 0.4791066348552704, "learning_rate": 1.9090946118921345e-05, "loss": 0.2891, "step": 7575 }, { "epoch": 0.13686362027748286, "grad_norm": 0.33462172746658325, "learning_rate": 1.908976420373827e-05, "loss": 0.4292, "step": 7580 }, { "epoch": 0.13695389971038358, "grad_norm": 0.42122966051101685, "learning_rate": 1.9088581557349753e-05, "loss": 0.3056, "step": 7585 }, { "epoch": 0.1370441791432843, "grad_norm": 0.4601115882396698, "learning_rate": 1.9087398179850933e-05, "loss": 0.347, "step": 7590 }, { "epoch": 0.137134458576185, "grad_norm": 0.6133005619049072, "learning_rate": 1.9086214071336998e-05, "loss": 0.3491, "step": 7595 }, { "epoch": 0.13722473800908572, "grad_norm": 0.6506732702255249, "learning_rate": 1.9085029231903205e-05, "loss": 0.4802, "step": 7600 }, { "epoch": 0.13731501744198643, "grad_norm": 0.4467129707336426, "learning_rate": 1.9083843661644865e-05, "loss": 0.343, "step": 7605 }, { "epoch": 0.13740529687488715, "grad_norm": 0.3698214292526245, "learning_rate": 1.9082657360657347e-05, "loss": 0.252, "step": 7610 }, { "epoch": 0.13749557630778786, "grad_norm": 0.35796207189559937, "learning_rate": 1.9081470329036086e-05, "loss": 0.3932, "step": 7615 }, { "epoch": 0.13758585574068857, "grad_norm": 0.5271620154380798, "learning_rate": 1.908028256687656e-05, "loss": 0.3614, "step": 7620 }, { "epoch": 0.1376761351735893, "grad_norm": 0.371025949716568, "learning_rate": 1.9079094074274325e-05, "loss": 0.2348, "step": 7625 }, { "epoch": 0.13776641460649, "grad_norm": 0.5057605504989624, "learning_rate": 1.9077904851324982e-05, "loss": 0.3497, "step": 7630 }, { "epoch": 0.13785669403939071, "grad_norm": 0.4116515517234802, "learning_rate": 1.9076714898124197e-05, "loss": 0.2773, "step": 7635 }, { "epoch": 0.13794697347229143, "grad_norm": 0.5498316287994385, "learning_rate": 1.9075524214767694e-05, "loss": 0.2589, "step": 7640 }, { "epoch": 0.13803725290519214, "grad_norm": 0.6358838081359863, "learning_rate": 1.9074332801351253e-05, "loss": 0.4033, "step": 7645 }, { "epoch": 0.13812753233809286, "grad_norm": 0.33449217677116394, "learning_rate": 1.9073140657970712e-05, "loss": 0.3144, "step": 7650 }, { "epoch": 0.13821781177099357, "grad_norm": 0.4676278233528137, "learning_rate": 1.9071947784721976e-05, "loss": 0.3036, "step": 7655 }, { "epoch": 0.13830809120389428, "grad_norm": 0.6691996455192566, "learning_rate": 1.9070754181701e-05, "loss": 0.2843, "step": 7660 }, { "epoch": 0.138398370636795, "grad_norm": 0.3309561014175415, "learning_rate": 1.9069559849003795e-05, "loss": 0.2363, "step": 7665 }, { "epoch": 0.1384886500696957, "grad_norm": 0.5446121692657471, "learning_rate": 1.906836478672645e-05, "loss": 0.2942, "step": 7670 }, { "epoch": 0.13857892950259643, "grad_norm": 0.4546838700771332, "learning_rate": 1.9067168994965088e-05, "loss": 0.2817, "step": 7675 }, { "epoch": 0.13866920893549714, "grad_norm": 0.6302188634872437, "learning_rate": 1.9065972473815906e-05, "loss": 0.4446, "step": 7680 }, { "epoch": 0.13875948836839785, "grad_norm": 0.5918726921081543, "learning_rate": 1.9064775223375154e-05, "loss": 0.3337, "step": 7685 }, { "epoch": 0.13884976780129857, "grad_norm": 0.710841178894043, "learning_rate": 1.9063577243739143e-05, "loss": 0.3233, "step": 7690 }, { "epoch": 0.13894004723419928, "grad_norm": 0.5338574647903442, "learning_rate": 1.9062378535004236e-05, "loss": 0.2823, "step": 7695 }, { "epoch": 0.1390303266671, "grad_norm": 0.6912916898727417, "learning_rate": 1.9061179097266876e-05, "loss": 0.3708, "step": 7700 }, { "epoch": 0.1391206061000007, "grad_norm": 0.5621781945228577, "learning_rate": 1.905997893062353e-05, "loss": 0.2592, "step": 7705 }, { "epoch": 0.13921088553290142, "grad_norm": 0.47019150853157043, "learning_rate": 1.9058778035170754e-05, "loss": 0.3395, "step": 7710 }, { "epoch": 0.13930116496580214, "grad_norm": 0.35848888754844666, "learning_rate": 1.9057576411005153e-05, "loss": 0.2767, "step": 7715 }, { "epoch": 0.13939144439870285, "grad_norm": 0.4225088953971863, "learning_rate": 1.905637405822338e-05, "loss": 0.3292, "step": 7720 }, { "epoch": 0.13948172383160357, "grad_norm": 0.46198078989982605, "learning_rate": 1.9055170976922158e-05, "loss": 0.3877, "step": 7725 }, { "epoch": 0.1395720032645043, "grad_norm": 0.5311920046806335, "learning_rate": 1.9053967167198274e-05, "loss": 0.2245, "step": 7730 }, { "epoch": 0.13966228269740502, "grad_norm": 0.34501591324806213, "learning_rate": 1.905276262914856e-05, "loss": 0.3565, "step": 7735 }, { "epoch": 0.13975256213030574, "grad_norm": 0.42606121301651, "learning_rate": 1.905155736286991e-05, "loss": 0.3168, "step": 7740 }, { "epoch": 0.13984284156320645, "grad_norm": 0.599910318851471, "learning_rate": 1.905035136845928e-05, "loss": 0.2913, "step": 7745 }, { "epoch": 0.13993312099610716, "grad_norm": 0.3511393368244171, "learning_rate": 1.9049144646013686e-05, "loss": 0.3479, "step": 7750 }, { "epoch": 0.14002340042900788, "grad_norm": 0.4650457799434662, "learning_rate": 1.90479371956302e-05, "loss": 0.3055, "step": 7755 }, { "epoch": 0.1401136798619086, "grad_norm": 0.5261766910552979, "learning_rate": 1.904672901740595e-05, "loss": 0.3334, "step": 7760 }, { "epoch": 0.1402039592948093, "grad_norm": 0.4383196532726288, "learning_rate": 1.904552011143813e-05, "loss": 0.2858, "step": 7765 }, { "epoch": 0.14029423872771002, "grad_norm": 0.5381696820259094, "learning_rate": 1.9044310477823978e-05, "loss": 0.244, "step": 7770 }, { "epoch": 0.14038451816061073, "grad_norm": 0.411138117313385, "learning_rate": 1.9043100116660812e-05, "loss": 0.258, "step": 7775 }, { "epoch": 0.14047479759351145, "grad_norm": 0.5190327167510986, "learning_rate": 1.9041889028045987e-05, "loss": 0.3412, "step": 7780 }, { "epoch": 0.14056507702641216, "grad_norm": 0.7907403111457825, "learning_rate": 1.9040677212076937e-05, "loss": 0.3496, "step": 7785 }, { "epoch": 0.14065535645931287, "grad_norm": 0.36397334933280945, "learning_rate": 1.903946466885113e-05, "loss": 0.2988, "step": 7790 }, { "epoch": 0.1407456358922136, "grad_norm": 0.41145774722099304, "learning_rate": 1.9038251398466118e-05, "loss": 0.3286, "step": 7795 }, { "epoch": 0.1408359153251143, "grad_norm": 0.6124861836433411, "learning_rate": 1.9037037401019494e-05, "loss": 0.3634, "step": 7800 }, { "epoch": 0.14092619475801502, "grad_norm": 0.3801920711994171, "learning_rate": 1.903582267660892e-05, "loss": 0.2512, "step": 7805 }, { "epoch": 0.14101647419091573, "grad_norm": 0.3616124987602234, "learning_rate": 1.9034607225332104e-05, "loss": 0.3029, "step": 7810 }, { "epoch": 0.14110675362381644, "grad_norm": 0.6739056706428528, "learning_rate": 1.9033391047286826e-05, "loss": 0.3886, "step": 7815 }, { "epoch": 0.14119703305671716, "grad_norm": 0.5704717040061951, "learning_rate": 1.9032174142570918e-05, "loss": 0.2179, "step": 7820 }, { "epoch": 0.14128731248961787, "grad_norm": 0.36887940764427185, "learning_rate": 1.903095651128227e-05, "loss": 0.3024, "step": 7825 }, { "epoch": 0.1413775919225186, "grad_norm": 0.413861483335495, "learning_rate": 1.9029738153518832e-05, "loss": 0.2548, "step": 7830 }, { "epoch": 0.1414678713554193, "grad_norm": 0.4529048502445221, "learning_rate": 1.9028519069378613e-05, "loss": 0.4083, "step": 7835 }, { "epoch": 0.14155815078832001, "grad_norm": 0.58521968126297, "learning_rate": 1.902729925895968e-05, "loss": 0.3763, "step": 7840 }, { "epoch": 0.14164843022122073, "grad_norm": 0.5197453498840332, "learning_rate": 1.9026078722360154e-05, "loss": 0.3415, "step": 7845 }, { "epoch": 0.14173870965412144, "grad_norm": 0.3636833727359772, "learning_rate": 1.9024857459678218e-05, "loss": 0.2997, "step": 7850 }, { "epoch": 0.14182898908702216, "grad_norm": 0.2941308915615082, "learning_rate": 1.902363547101212e-05, "loss": 0.412, "step": 7855 }, { "epoch": 0.14191926851992287, "grad_norm": 0.6289583444595337, "learning_rate": 1.9022412756460157e-05, "loss": 0.3075, "step": 7860 }, { "epoch": 0.14200954795282358, "grad_norm": 0.45096611976623535, "learning_rate": 1.902118931612069e-05, "loss": 0.3337, "step": 7865 }, { "epoch": 0.1420998273857243, "grad_norm": 0.5643873810768127, "learning_rate": 1.9019965150092122e-05, "loss": 0.3478, "step": 7870 }, { "epoch": 0.142190106818625, "grad_norm": 0.4536270499229431, "learning_rate": 1.9018740258472948e-05, "loss": 0.3698, "step": 7875 }, { "epoch": 0.14228038625152573, "grad_norm": 0.4536241590976715, "learning_rate": 1.9017514641361693e-05, "loss": 0.2808, "step": 7880 }, { "epoch": 0.14237066568442644, "grad_norm": 0.4845343232154846, "learning_rate": 1.9016288298856948e-05, "loss": 0.2832, "step": 7885 }, { "epoch": 0.14246094511732715, "grad_norm": 0.3254559338092804, "learning_rate": 1.9015061231057358e-05, "loss": 0.4254, "step": 7890 }, { "epoch": 0.14255122455022787, "grad_norm": 0.43062731623649597, "learning_rate": 1.9013833438061647e-05, "loss": 0.3528, "step": 7895 }, { "epoch": 0.14264150398312858, "grad_norm": 0.4750933349132538, "learning_rate": 1.9012604919968568e-05, "loss": 0.3383, "step": 7900 }, { "epoch": 0.1427317834160293, "grad_norm": 0.538891077041626, "learning_rate": 1.901137567687695e-05, "loss": 0.2925, "step": 7905 }, { "epoch": 0.14282206284893, "grad_norm": 0.5271896123886108, "learning_rate": 1.9010145708885678e-05, "loss": 0.3562, "step": 7910 }, { "epoch": 0.14291234228183072, "grad_norm": 0.3939216434955597, "learning_rate": 1.9008915016093695e-05, "loss": 0.2797, "step": 7915 }, { "epoch": 0.14300262171473144, "grad_norm": 1.2909743785858154, "learning_rate": 1.90076835986e-05, "loss": 0.3289, "step": 7920 }, { "epoch": 0.14309290114763215, "grad_norm": 0.6015067100524902, "learning_rate": 1.900645145650365e-05, "loss": 0.3242, "step": 7925 }, { "epoch": 0.14318318058053287, "grad_norm": 0.38563331961631775, "learning_rate": 1.9005218589903768e-05, "loss": 0.3693, "step": 7930 }, { "epoch": 0.14327346001343358, "grad_norm": 0.2794496417045593, "learning_rate": 1.9003984998899523e-05, "loss": 0.3101, "step": 7935 }, { "epoch": 0.1433637394463343, "grad_norm": 0.32866206765174866, "learning_rate": 1.900275068359015e-05, "loss": 0.262, "step": 7940 }, { "epoch": 0.143454018879235, "grad_norm": 0.44161251187324524, "learning_rate": 1.900151564407494e-05, "loss": 0.2301, "step": 7945 }, { "epoch": 0.14354429831213572, "grad_norm": 0.4842809736728668, "learning_rate": 1.9000279880453246e-05, "loss": 0.3021, "step": 7950 }, { "epoch": 0.14363457774503643, "grad_norm": 0.35649093985557556, "learning_rate": 1.899904339282447e-05, "loss": 0.3484, "step": 7955 }, { "epoch": 0.14372485717793715, "grad_norm": 0.44895628094673157, "learning_rate": 1.8997806181288086e-05, "loss": 0.3674, "step": 7960 }, { "epoch": 0.14381513661083786, "grad_norm": 0.5911328196525574, "learning_rate": 1.8996568245943612e-05, "loss": 0.3428, "step": 7965 }, { "epoch": 0.14390541604373858, "grad_norm": 0.4658902883529663, "learning_rate": 1.8995329586890637e-05, "loss": 0.2989, "step": 7970 }, { "epoch": 0.1439956954766393, "grad_norm": 0.2901282012462616, "learning_rate": 1.8994090204228796e-05, "loss": 0.3787, "step": 7975 }, { "epoch": 0.14408597490954, "grad_norm": 0.44001367688179016, "learning_rate": 1.8992850098057795e-05, "loss": 0.2329, "step": 7980 }, { "epoch": 0.14417625434244072, "grad_norm": 0.9763175845146179, "learning_rate": 1.8991609268477383e-05, "loss": 0.4526, "step": 7985 }, { "epoch": 0.14426653377534143, "grad_norm": 0.263467401266098, "learning_rate": 1.8990367715587383e-05, "loss": 0.2223, "step": 7990 }, { "epoch": 0.14435681320824215, "grad_norm": 0.723092257976532, "learning_rate": 1.898912543948767e-05, "loss": 0.3509, "step": 7995 }, { "epoch": 0.14444709264114286, "grad_norm": 0.4045044481754303, "learning_rate": 1.898788244027817e-05, "loss": 0.3391, "step": 8000 }, { "epoch": 0.14453737207404357, "grad_norm": 0.43147021532058716, "learning_rate": 1.8986638718058872e-05, "loss": 0.2449, "step": 8005 }, { "epoch": 0.1446276515069443, "grad_norm": 0.6426684856414795, "learning_rate": 1.8985394272929835e-05, "loss": 0.3077, "step": 8010 }, { "epoch": 0.144717930939845, "grad_norm": 0.4371914863586426, "learning_rate": 1.8984149104991154e-05, "loss": 0.2966, "step": 8015 }, { "epoch": 0.14480821037274572, "grad_norm": 0.6035380363464355, "learning_rate": 1.8982903214343e-05, "loss": 0.3514, "step": 8020 }, { "epoch": 0.14489848980564643, "grad_norm": 0.6385045051574707, "learning_rate": 1.8981656601085596e-05, "loss": 0.2887, "step": 8025 }, { "epoch": 0.14498876923854714, "grad_norm": 0.3629067838191986, "learning_rate": 1.898040926531922e-05, "loss": 0.3372, "step": 8030 }, { "epoch": 0.14507904867144786, "grad_norm": 0.48646870255470276, "learning_rate": 1.897916120714421e-05, "loss": 0.4035, "step": 8035 }, { "epoch": 0.14516932810434857, "grad_norm": 0.3212614357471466, "learning_rate": 1.8977912426660968e-05, "loss": 0.226, "step": 8040 }, { "epoch": 0.14525960753724929, "grad_norm": 0.49237778782844543, "learning_rate": 1.897666292396995e-05, "loss": 0.3552, "step": 8045 }, { "epoch": 0.14534988697015, "grad_norm": 0.46724480390548706, "learning_rate": 1.8975412699171665e-05, "loss": 0.3779, "step": 8050 }, { "epoch": 0.1454401664030507, "grad_norm": 0.4620061218738556, "learning_rate": 1.8974161752366684e-05, "loss": 0.3916, "step": 8055 }, { "epoch": 0.14553044583595143, "grad_norm": 0.35850104689598083, "learning_rate": 1.897291008365564e-05, "loss": 0.2794, "step": 8060 }, { "epoch": 0.14562072526885214, "grad_norm": 0.5722913146018982, "learning_rate": 1.897165769313922e-05, "loss": 0.367, "step": 8065 }, { "epoch": 0.14571100470175286, "grad_norm": 0.4498758912086487, "learning_rate": 1.8970404580918165e-05, "loss": 0.3568, "step": 8070 }, { "epoch": 0.14580128413465357, "grad_norm": 0.4813031852245331, "learning_rate": 1.8969150747093286e-05, "loss": 0.2986, "step": 8075 }, { "epoch": 0.14589156356755428, "grad_norm": 0.3807981312274933, "learning_rate": 1.896789619176544e-05, "loss": 0.3528, "step": 8080 }, { "epoch": 0.145981843000455, "grad_norm": 0.6421025395393372, "learning_rate": 1.896664091503555e-05, "loss": 0.2872, "step": 8085 }, { "epoch": 0.1460721224333557, "grad_norm": 0.3946101665496826, "learning_rate": 1.8965384917004587e-05, "loss": 0.3196, "step": 8090 }, { "epoch": 0.14616240186625643, "grad_norm": 0.5006182789802551, "learning_rate": 1.8964128197773598e-05, "loss": 0.3334, "step": 8095 }, { "epoch": 0.14625268129915714, "grad_norm": 0.5575314164161682, "learning_rate": 1.8962870757443667e-05, "loss": 0.437, "step": 8100 }, { "epoch": 0.14634296073205785, "grad_norm": 0.33846473693847656, "learning_rate": 1.896161259611595e-05, "loss": 0.3118, "step": 8105 }, { "epoch": 0.14643324016495857, "grad_norm": 0.36873605847358704, "learning_rate": 1.896035371389166e-05, "loss": 0.2947, "step": 8110 }, { "epoch": 0.14652351959785928, "grad_norm": 0.39913228154182434, "learning_rate": 1.8959094110872062e-05, "loss": 0.3383, "step": 8115 }, { "epoch": 0.14661379903076, "grad_norm": 0.4663682281970978, "learning_rate": 1.895783378715848e-05, "loss": 0.243, "step": 8120 }, { "epoch": 0.14670407846366074, "grad_norm": 0.4984837472438812, "learning_rate": 1.89565727428523e-05, "loss": 0.3388, "step": 8125 }, { "epoch": 0.14679435789656145, "grad_norm": 0.3878990411758423, "learning_rate": 1.8955310978054963e-05, "loss": 0.2641, "step": 8130 }, { "epoch": 0.14688463732946216, "grad_norm": 0.46170365810394287, "learning_rate": 1.895404849286797e-05, "loss": 0.3186, "step": 8135 }, { "epoch": 0.14697491676236288, "grad_norm": 0.46712246537208557, "learning_rate": 1.8952785287392876e-05, "loss": 0.2991, "step": 8140 }, { "epoch": 0.1470651961952636, "grad_norm": 0.4796598553657532, "learning_rate": 1.89515213617313e-05, "loss": 0.3449, "step": 8145 }, { "epoch": 0.1471554756281643, "grad_norm": 0.28505873680114746, "learning_rate": 1.8950256715984912e-05, "loss": 0.2601, "step": 8150 }, { "epoch": 0.14724575506106502, "grad_norm": 0.5526983141899109, "learning_rate": 1.894899135025545e-05, "loss": 0.2915, "step": 8155 }, { "epoch": 0.14733603449396573, "grad_norm": 0.35364168882369995, "learning_rate": 1.8947725264644697e-05, "loss": 0.2816, "step": 8160 }, { "epoch": 0.14742631392686645, "grad_norm": 0.48852768540382385, "learning_rate": 1.8946458459254505e-05, "loss": 0.3107, "step": 8165 }, { "epoch": 0.14751659335976716, "grad_norm": 0.5498285889625549, "learning_rate": 1.894519093418677e-05, "loss": 0.3314, "step": 8170 }, { "epoch": 0.14760687279266788, "grad_norm": 0.332851380109787, "learning_rate": 1.894392268954347e-05, "loss": 0.3827, "step": 8175 }, { "epoch": 0.1476971522255686, "grad_norm": 0.38158583641052246, "learning_rate": 1.8942653725426618e-05, "loss": 0.3227, "step": 8180 }, { "epoch": 0.1477874316584693, "grad_norm": 0.4075416326522827, "learning_rate": 1.894138404193829e-05, "loss": 0.4028, "step": 8185 }, { "epoch": 0.14787771109137002, "grad_norm": 0.5352085828781128, "learning_rate": 1.8940113639180627e-05, "loss": 0.2752, "step": 8190 }, { "epoch": 0.14796799052427073, "grad_norm": 0.4605783224105835, "learning_rate": 1.893884251725582e-05, "loss": 0.4026, "step": 8195 }, { "epoch": 0.14805826995717145, "grad_norm": 0.31095898151397705, "learning_rate": 1.893757067626613e-05, "loss": 0.305, "step": 8200 }, { "epoch": 0.14814854939007216, "grad_norm": 0.44574591517448425, "learning_rate": 1.8936298116313854e-05, "loss": 0.3021, "step": 8205 }, { "epoch": 0.14823882882297287, "grad_norm": 0.40220680832862854, "learning_rate": 1.8935024837501373e-05, "loss": 0.3497, "step": 8210 }, { "epoch": 0.1483291082558736, "grad_norm": 0.3617036044597626, "learning_rate": 1.8933750839931106e-05, "loss": 0.3125, "step": 8215 }, { "epoch": 0.1484193876887743, "grad_norm": 0.37707874178886414, "learning_rate": 1.893247612370554e-05, "loss": 0.3204, "step": 8220 }, { "epoch": 0.14850966712167502, "grad_norm": 0.3177054524421692, "learning_rate": 1.8931200688927213e-05, "loss": 0.2895, "step": 8225 }, { "epoch": 0.14859994655457573, "grad_norm": 0.32511448860168457, "learning_rate": 1.892992453569873e-05, "loss": 0.3439, "step": 8230 }, { "epoch": 0.14869022598747644, "grad_norm": 0.5057108402252197, "learning_rate": 1.892864766412274e-05, "loss": 0.431, "step": 8235 }, { "epoch": 0.14878050542037716, "grad_norm": 0.4230477809906006, "learning_rate": 1.8927370074301966e-05, "loss": 0.3074, "step": 8240 }, { "epoch": 0.14887078485327787, "grad_norm": 0.5735442042350769, "learning_rate": 1.8926091766339178e-05, "loss": 0.2622, "step": 8245 }, { "epoch": 0.14896106428617858, "grad_norm": 0.5898981690406799, "learning_rate": 1.8924812740337206e-05, "loss": 0.3139, "step": 8250 }, { "epoch": 0.1490513437190793, "grad_norm": 0.21331223845481873, "learning_rate": 1.8923532996398937e-05, "loss": 0.3723, "step": 8255 }, { "epoch": 0.14914162315198, "grad_norm": 0.409196674823761, "learning_rate": 1.892225253462732e-05, "loss": 0.3487, "step": 8260 }, { "epoch": 0.14923190258488073, "grad_norm": 0.3333122432231903, "learning_rate": 1.892097135512536e-05, "loss": 0.3065, "step": 8265 }, { "epoch": 0.14932218201778144, "grad_norm": 0.4181888997554779, "learning_rate": 1.8919689457996116e-05, "loss": 0.2028, "step": 8270 }, { "epoch": 0.14941246145068215, "grad_norm": 0.5751470923423767, "learning_rate": 1.8918406843342704e-05, "loss": 0.3421, "step": 8275 }, { "epoch": 0.14950274088358287, "grad_norm": 0.5706935524940491, "learning_rate": 1.8917123511268306e-05, "loss": 0.3903, "step": 8280 }, { "epoch": 0.14959302031648358, "grad_norm": 0.8409367203712463, "learning_rate": 1.8915839461876156e-05, "loss": 0.3937, "step": 8285 }, { "epoch": 0.1496832997493843, "grad_norm": 0.44183939695358276, "learning_rate": 1.8914554695269543e-05, "loss": 0.3441, "step": 8290 }, { "epoch": 0.149773579182285, "grad_norm": 0.4171811044216156, "learning_rate": 1.8913269211551824e-05, "loss": 0.284, "step": 8295 }, { "epoch": 0.14986385861518572, "grad_norm": 0.3072311580181122, "learning_rate": 1.89119830108264e-05, "loss": 0.302, "step": 8300 }, { "epoch": 0.14995413804808644, "grad_norm": 0.3270736038684845, "learning_rate": 1.8910696093196742e-05, "loss": 0.4464, "step": 8305 }, { "epoch": 0.15004441748098715, "grad_norm": 0.43450701236724854, "learning_rate": 1.8909408458766367e-05, "loss": 0.3566, "step": 8310 }, { "epoch": 0.15013469691388787, "grad_norm": 0.3389006555080414, "learning_rate": 1.890812010763886e-05, "loss": 0.2731, "step": 8315 }, { "epoch": 0.15022497634678858, "grad_norm": 0.6542930603027344, "learning_rate": 1.8906831039917862e-05, "loss": 0.3361, "step": 8320 }, { "epoch": 0.1503152557796893, "grad_norm": 0.3040352463722229, "learning_rate": 1.890554125570706e-05, "loss": 0.2567, "step": 8325 }, { "epoch": 0.15040553521259, "grad_norm": 0.5242255330085754, "learning_rate": 1.8904250755110216e-05, "loss": 0.2654, "step": 8330 }, { "epoch": 0.15049581464549072, "grad_norm": 0.4824141263961792, "learning_rate": 1.890295953823114e-05, "loss": 0.3723, "step": 8335 }, { "epoch": 0.15058609407839144, "grad_norm": 0.4082823097705841, "learning_rate": 1.89016676051737e-05, "loss": 0.2324, "step": 8340 }, { "epoch": 0.15067637351129215, "grad_norm": 0.3655155897140503, "learning_rate": 1.890037495604182e-05, "loss": 0.3749, "step": 8345 }, { "epoch": 0.15076665294419286, "grad_norm": 0.5974289774894714, "learning_rate": 1.889908159093949e-05, "loss": 0.316, "step": 8350 }, { "epoch": 0.15085693237709358, "grad_norm": 0.3974992632865906, "learning_rate": 1.8897787509970746e-05, "loss": 0.2754, "step": 8355 }, { "epoch": 0.1509472118099943, "grad_norm": 0.5423205494880676, "learning_rate": 1.889649271323969e-05, "loss": 0.3706, "step": 8360 }, { "epoch": 0.151037491242895, "grad_norm": 0.6967107057571411, "learning_rate": 1.8895197200850482e-05, "loss": 0.2848, "step": 8365 }, { "epoch": 0.15112777067579572, "grad_norm": 0.5599609017372131, "learning_rate": 1.889390097290733e-05, "loss": 0.2757, "step": 8370 }, { "epoch": 0.15121805010869643, "grad_norm": 0.4817441999912262, "learning_rate": 1.889260402951451e-05, "loss": 0.3057, "step": 8375 }, { "epoch": 0.15130832954159715, "grad_norm": 0.4420287609100342, "learning_rate": 1.8891306370776353e-05, "loss": 0.303, "step": 8380 }, { "epoch": 0.15139860897449786, "grad_norm": 0.3668675422668457, "learning_rate": 1.8890007996797245e-05, "loss": 0.258, "step": 8385 }, { "epoch": 0.15148888840739858, "grad_norm": 0.5196265578269958, "learning_rate": 1.888870890768163e-05, "loss": 0.2724, "step": 8390 }, { "epoch": 0.1515791678402993, "grad_norm": 0.3072160482406616, "learning_rate": 1.888740910353401e-05, "loss": 0.3446, "step": 8395 }, { "epoch": 0.1516694472732, "grad_norm": 0.5588591694831848, "learning_rate": 1.8886108584458945e-05, "loss": 0.3808, "step": 8400 }, { "epoch": 0.15175972670610072, "grad_norm": 0.32789307832717896, "learning_rate": 1.8884807350561053e-05, "loss": 0.2752, "step": 8405 }, { "epoch": 0.15185000613900143, "grad_norm": 1.054311990737915, "learning_rate": 1.8883505401945008e-05, "loss": 0.4017, "step": 8410 }, { "epoch": 0.15194028557190215, "grad_norm": 0.5561165809631348, "learning_rate": 1.8882202738715542e-05, "loss": 0.3239, "step": 8415 }, { "epoch": 0.15203056500480286, "grad_norm": 0.3699195086956024, "learning_rate": 1.8880899360977447e-05, "loss": 0.2967, "step": 8420 }, { "epoch": 0.15212084443770357, "grad_norm": 0.3856097161769867, "learning_rate": 1.887959526883557e-05, "loss": 0.2988, "step": 8425 }, { "epoch": 0.1522111238706043, "grad_norm": 0.33342596888542175, "learning_rate": 1.887829046239481e-05, "loss": 0.4531, "step": 8430 }, { "epoch": 0.152301403303505, "grad_norm": 0.41385820508003235, "learning_rate": 1.8876984941760137e-05, "loss": 0.2464, "step": 8435 }, { "epoch": 0.15239168273640571, "grad_norm": 0.4393940567970276, "learning_rate": 1.8875678707036568e-05, "loss": 0.3006, "step": 8440 }, { "epoch": 0.15248196216930643, "grad_norm": 0.3313063681125641, "learning_rate": 1.8874371758329178e-05, "loss": 0.3243, "step": 8445 }, { "epoch": 0.15257224160220714, "grad_norm": 0.348006010055542, "learning_rate": 1.8873064095743104e-05, "loss": 0.3317, "step": 8450 }, { "epoch": 0.15266252103510786, "grad_norm": 0.31291893124580383, "learning_rate": 1.8871755719383535e-05, "loss": 0.254, "step": 8455 }, { "epoch": 0.15275280046800857, "grad_norm": 0.44833308458328247, "learning_rate": 1.8870446629355723e-05, "loss": 0.2834, "step": 8460 }, { "epoch": 0.15284307990090928, "grad_norm": 0.40464499592781067, "learning_rate": 1.8869136825764977e-05, "loss": 0.2787, "step": 8465 }, { "epoch": 0.15293335933381, "grad_norm": 0.4436585009098053, "learning_rate": 1.8867826308716655e-05, "loss": 0.3241, "step": 8470 }, { "epoch": 0.1530236387667107, "grad_norm": 0.44052863121032715, "learning_rate": 1.8866515078316183e-05, "loss": 0.4283, "step": 8475 }, { "epoch": 0.15311391819961143, "grad_norm": 0.40572524070739746, "learning_rate": 1.8865203134669038e-05, "loss": 0.248, "step": 8480 }, { "epoch": 0.15320419763251214, "grad_norm": 0.3816758096218109, "learning_rate": 1.8863890477880756e-05, "loss": 0.3807, "step": 8485 }, { "epoch": 0.15329447706541285, "grad_norm": 0.4932968020439148, "learning_rate": 1.8862577108056933e-05, "loss": 0.3574, "step": 8490 }, { "epoch": 0.15338475649831357, "grad_norm": 0.283809632062912, "learning_rate": 1.886126302530322e-05, "loss": 0.2696, "step": 8495 }, { "epoch": 0.15347503593121428, "grad_norm": 0.6454717516899109, "learning_rate": 1.8859948229725324e-05, "loss": 0.2792, "step": 8500 }, { "epoch": 0.153565315364115, "grad_norm": 0.6588291525840759, "learning_rate": 1.885863272142901e-05, "loss": 0.2829, "step": 8505 }, { "epoch": 0.1536555947970157, "grad_norm": 0.3564765751361847, "learning_rate": 1.88573165005201e-05, "loss": 0.2167, "step": 8510 }, { "epoch": 0.15374587422991642, "grad_norm": 0.40071821212768555, "learning_rate": 1.885599956710448e-05, "loss": 0.3207, "step": 8515 }, { "epoch": 0.15383615366281717, "grad_norm": 0.47020772099494934, "learning_rate": 1.885468192128808e-05, "loss": 0.2499, "step": 8520 }, { "epoch": 0.15392643309571788, "grad_norm": 0.39165768027305603, "learning_rate": 1.8853363563176903e-05, "loss": 0.3108, "step": 8525 }, { "epoch": 0.1540167125286186, "grad_norm": 0.3537646234035492, "learning_rate": 1.8852044492876995e-05, "loss": 0.3809, "step": 8530 }, { "epoch": 0.1541069919615193, "grad_norm": 0.3931910991668701, "learning_rate": 1.885072471049447e-05, "loss": 0.3819, "step": 8535 }, { "epoch": 0.15419727139442002, "grad_norm": 0.5318612456321716, "learning_rate": 1.8849404216135493e-05, "loss": 0.2379, "step": 8540 }, { "epoch": 0.15428755082732074, "grad_norm": 0.46349745988845825, "learning_rate": 1.8848083009906288e-05, "loss": 0.4473, "step": 8545 }, { "epoch": 0.15437783026022145, "grad_norm": 0.718696117401123, "learning_rate": 1.8846761091913138e-05, "loss": 0.2779, "step": 8550 }, { "epoch": 0.15446810969312216, "grad_norm": 0.32716104388237, "learning_rate": 1.8845438462262378e-05, "loss": 0.2581, "step": 8555 }, { "epoch": 0.15455838912602288, "grad_norm": 0.33014896512031555, "learning_rate": 1.8844115121060408e-05, "loss": 0.3538, "step": 8560 }, { "epoch": 0.1546486685589236, "grad_norm": 0.7303894758224487, "learning_rate": 1.8842791068413676e-05, "loss": 0.3095, "step": 8565 }, { "epoch": 0.1547389479918243, "grad_norm": 0.661654531955719, "learning_rate": 1.88414663044287e-05, "loss": 0.2514, "step": 8570 }, { "epoch": 0.15482922742472502, "grad_norm": 0.48193421959877014, "learning_rate": 1.884014082921204e-05, "loss": 0.351, "step": 8575 }, { "epoch": 0.15491950685762573, "grad_norm": 0.6869872808456421, "learning_rate": 1.8838814642870328e-05, "loss": 0.297, "step": 8580 }, { "epoch": 0.15500978629052645, "grad_norm": 0.5479223132133484, "learning_rate": 1.883748774551024e-05, "loss": 0.2831, "step": 8585 }, { "epoch": 0.15510006572342716, "grad_norm": 0.5548229813575745, "learning_rate": 1.8836160137238516e-05, "loss": 0.3498, "step": 8590 }, { "epoch": 0.15519034515632787, "grad_norm": 0.5103094577789307, "learning_rate": 1.8834831818161957e-05, "loss": 0.3182, "step": 8595 }, { "epoch": 0.1552806245892286, "grad_norm": 0.3971729874610901, "learning_rate": 1.8833502788387412e-05, "loss": 0.3637, "step": 8600 }, { "epoch": 0.1553709040221293, "grad_norm": 0.5114766955375671, "learning_rate": 1.8832173048021796e-05, "loss": 0.3682, "step": 8605 }, { "epoch": 0.15546118345503002, "grad_norm": 0.3616827428340912, "learning_rate": 1.8830842597172073e-05, "loss": 0.3377, "step": 8610 }, { "epoch": 0.15555146288793073, "grad_norm": 0.3950643837451935, "learning_rate": 1.882951143594527e-05, "loss": 0.3024, "step": 8615 }, { "epoch": 0.15564174232083144, "grad_norm": 0.46623605489730835, "learning_rate": 1.8828179564448468e-05, "loss": 0.2826, "step": 8620 }, { "epoch": 0.15573202175373216, "grad_norm": 0.542243242263794, "learning_rate": 1.8826846982788805e-05, "loss": 0.2457, "step": 8625 }, { "epoch": 0.15582230118663287, "grad_norm": 0.4826582968235016, "learning_rate": 1.8825513691073484e-05, "loss": 0.2876, "step": 8630 }, { "epoch": 0.1559125806195336, "grad_norm": 0.3582850396633148, "learning_rate": 1.882417968940975e-05, "loss": 0.2749, "step": 8635 }, { "epoch": 0.1560028600524343, "grad_norm": 0.4192401170730591, "learning_rate": 1.882284497790492e-05, "loss": 0.337, "step": 8640 }, { "epoch": 0.15609313948533501, "grad_norm": 0.37834084033966064, "learning_rate": 1.882150955666636e-05, "loss": 0.3137, "step": 8645 }, { "epoch": 0.15618341891823573, "grad_norm": 0.5601775050163269, "learning_rate": 1.8820173425801493e-05, "loss": 0.2094, "step": 8650 }, { "epoch": 0.15627369835113644, "grad_norm": 0.4296712279319763, "learning_rate": 1.8818836585417804e-05, "loss": 0.3138, "step": 8655 }, { "epoch": 0.15636397778403716, "grad_norm": 0.2774662375450134, "learning_rate": 1.881749903562283e-05, "loss": 0.2602, "step": 8660 }, { "epoch": 0.15645425721693787, "grad_norm": 0.32397139072418213, "learning_rate": 1.881616077652417e-05, "loss": 0.2654, "step": 8665 }, { "epoch": 0.15654453664983858, "grad_norm": 0.49733802676200867, "learning_rate": 1.8814821808229476e-05, "loss": 0.2114, "step": 8670 }, { "epoch": 0.1566348160827393, "grad_norm": 0.5005702972412109, "learning_rate": 1.8813482130846454e-05, "loss": 0.3082, "step": 8675 }, { "epoch": 0.15672509551564, "grad_norm": 0.48754778504371643, "learning_rate": 1.8812141744482876e-05, "loss": 0.5073, "step": 8680 }, { "epoch": 0.15681537494854073, "grad_norm": 0.575425922870636, "learning_rate": 1.881080064924657e-05, "loss": 0.3876, "step": 8685 }, { "epoch": 0.15690565438144144, "grad_norm": 0.4367899000644684, "learning_rate": 1.8809458845245405e-05, "loss": 0.296, "step": 8690 }, { "epoch": 0.15699593381434215, "grad_norm": 0.33415520191192627, "learning_rate": 1.880811633258733e-05, "loss": 0.2484, "step": 8695 }, { "epoch": 0.15708621324724287, "grad_norm": 0.3557751774787903, "learning_rate": 1.8806773111380337e-05, "loss": 0.3323, "step": 8700 }, { "epoch": 0.15717649268014358, "grad_norm": 0.409047394990921, "learning_rate": 1.880542918173248e-05, "loss": 0.1935, "step": 8705 }, { "epoch": 0.1572667721130443, "grad_norm": 0.3447967767715454, "learning_rate": 1.8804084543751863e-05, "loss": 0.3283, "step": 8710 }, { "epoch": 0.157357051545945, "grad_norm": 0.3793751001358032, "learning_rate": 1.880273919754666e-05, "loss": 0.1691, "step": 8715 }, { "epoch": 0.15744733097884572, "grad_norm": 0.6087231040000916, "learning_rate": 1.880139314322509e-05, "loss": 0.3477, "step": 8720 }, { "epoch": 0.15753761041174644, "grad_norm": 0.6917526721954346, "learning_rate": 1.8800046380895435e-05, "loss": 0.3205, "step": 8725 }, { "epoch": 0.15762788984464715, "grad_norm": 0.45527705550193787, "learning_rate": 1.8798698910666028e-05, "loss": 0.3088, "step": 8730 }, { "epoch": 0.15771816927754786, "grad_norm": 0.5412310361862183, "learning_rate": 1.8797350732645268e-05, "loss": 0.2451, "step": 8735 }, { "epoch": 0.15780844871044858, "grad_norm": 0.3872532844543457, "learning_rate": 1.8796001846941604e-05, "loss": 0.3598, "step": 8740 }, { "epoch": 0.1578987281433493, "grad_norm": 0.40230852365493774, "learning_rate": 1.879465225366355e-05, "loss": 0.3362, "step": 8745 }, { "epoch": 0.15798900757625, "grad_norm": 0.35699185729026794, "learning_rate": 1.879330195291966e-05, "loss": 0.2791, "step": 8750 }, { "epoch": 0.15807928700915072, "grad_norm": 0.9417124390602112, "learning_rate": 1.879195094481856e-05, "loss": 0.3251, "step": 8755 }, { "epoch": 0.15816956644205143, "grad_norm": 0.6971483826637268, "learning_rate": 1.879059922946893e-05, "loss": 0.3632, "step": 8760 }, { "epoch": 0.15825984587495215, "grad_norm": 0.5676143765449524, "learning_rate": 1.8789246806979512e-05, "loss": 0.337, "step": 8765 }, { "epoch": 0.15835012530785286, "grad_norm": 0.5160952806472778, "learning_rate": 1.8787893677459087e-05, "loss": 0.2961, "step": 8770 }, { "epoch": 0.15844040474075358, "grad_norm": 0.467488557100296, "learning_rate": 1.878653984101651e-05, "loss": 0.3232, "step": 8775 }, { "epoch": 0.1585306841736543, "grad_norm": 0.4414568245410919, "learning_rate": 1.8785185297760687e-05, "loss": 0.3518, "step": 8780 }, { "epoch": 0.158620963606555, "grad_norm": 0.4487064778804779, "learning_rate": 1.8783830047800583e-05, "loss": 0.2638, "step": 8785 }, { "epoch": 0.15871124303945572, "grad_norm": 1.112725019454956, "learning_rate": 1.878247409124522e-05, "loss": 0.3202, "step": 8790 }, { "epoch": 0.15880152247235643, "grad_norm": 0.30171719193458557, "learning_rate": 1.8781117428203665e-05, "loss": 0.2899, "step": 8795 }, { "epoch": 0.15889180190525715, "grad_norm": 0.3588135838508606, "learning_rate": 1.877976005878506e-05, "loss": 0.3362, "step": 8800 }, { "epoch": 0.15898208133815786, "grad_norm": 0.49537143111228943, "learning_rate": 1.8778401983098594e-05, "loss": 0.3071, "step": 8805 }, { "epoch": 0.15907236077105857, "grad_norm": 0.5950140953063965, "learning_rate": 1.877704320125351e-05, "loss": 0.3152, "step": 8810 }, { "epoch": 0.1591626402039593, "grad_norm": 0.4290141761302948, "learning_rate": 1.8775683713359118e-05, "loss": 0.3679, "step": 8815 }, { "epoch": 0.15925291963686, "grad_norm": 0.7214063405990601, "learning_rate": 1.877432351952478e-05, "loss": 0.2864, "step": 8820 }, { "epoch": 0.15934319906976072, "grad_norm": 0.5823917388916016, "learning_rate": 1.8772962619859908e-05, "loss": 0.3488, "step": 8825 }, { "epoch": 0.15943347850266143, "grad_norm": 0.47410455346107483, "learning_rate": 1.8771601014473978e-05, "loss": 0.276, "step": 8830 }, { "epoch": 0.15952375793556214, "grad_norm": 0.3576255440711975, "learning_rate": 1.8770238703476522e-05, "loss": 0.3626, "step": 8835 }, { "epoch": 0.15961403736846286, "grad_norm": 0.5306724905967712, "learning_rate": 1.8768875686977125e-05, "loss": 0.2775, "step": 8840 }, { "epoch": 0.15970431680136357, "grad_norm": 0.4195229411125183, "learning_rate": 1.876751196508544e-05, "loss": 0.3033, "step": 8845 }, { "epoch": 0.15979459623426429, "grad_norm": 0.7627418041229248, "learning_rate": 1.8766147537911165e-05, "loss": 0.338, "step": 8850 }, { "epoch": 0.159884875667165, "grad_norm": 0.3827398121356964, "learning_rate": 1.8764782405564052e-05, "loss": 0.3547, "step": 8855 }, { "epoch": 0.1599751551000657, "grad_norm": 0.4121302664279938, "learning_rate": 1.876341656815392e-05, "loss": 0.2591, "step": 8860 }, { "epoch": 0.16006543453296643, "grad_norm": 0.3650098741054535, "learning_rate": 1.8762050025790643e-05, "loss": 0.356, "step": 8865 }, { "epoch": 0.16015571396586714, "grad_norm": 0.3953309953212738, "learning_rate": 1.8760682778584146e-05, "loss": 0.3404, "step": 8870 }, { "epoch": 0.16024599339876786, "grad_norm": 0.4996097683906555, "learning_rate": 1.875931482664442e-05, "loss": 0.377, "step": 8875 }, { "epoch": 0.16033627283166857, "grad_norm": 0.34962838888168335, "learning_rate": 1.87579461700815e-05, "loss": 0.2998, "step": 8880 }, { "epoch": 0.16042655226456928, "grad_norm": 0.3440771996974945, "learning_rate": 1.8756576809005488e-05, "loss": 0.358, "step": 8885 }, { "epoch": 0.16051683169747, "grad_norm": 0.38825514912605286, "learning_rate": 1.8755206743526537e-05, "loss": 0.2428, "step": 8890 }, { "epoch": 0.1606071111303707, "grad_norm": 0.38474076986312866, "learning_rate": 1.875383597375486e-05, "loss": 0.3534, "step": 8895 }, { "epoch": 0.16069739056327143, "grad_norm": 0.42535915970802307, "learning_rate": 1.8752464499800727e-05, "loss": 0.2535, "step": 8900 }, { "epoch": 0.16078766999617214, "grad_norm": 0.3820883333683014, "learning_rate": 1.875109232177446e-05, "loss": 0.375, "step": 8905 }, { "epoch": 0.16087794942907285, "grad_norm": 0.39074403047561646, "learning_rate": 1.874971943978644e-05, "loss": 0.3036, "step": 8910 }, { "epoch": 0.16096822886197357, "grad_norm": 0.5746060609817505, "learning_rate": 1.8748345853947116e-05, "loss": 0.4176, "step": 8915 }, { "epoch": 0.1610585082948743, "grad_norm": 0.3055366575717926, "learning_rate": 1.8746971564366966e-05, "loss": 0.3802, "step": 8920 }, { "epoch": 0.16114878772777502, "grad_norm": 0.48693153262138367, "learning_rate": 1.8745596571156554e-05, "loss": 0.3053, "step": 8925 }, { "epoch": 0.16123906716067574, "grad_norm": 0.33140549063682556, "learning_rate": 1.8744220874426485e-05, "loss": 0.3338, "step": 8930 }, { "epoch": 0.16132934659357645, "grad_norm": 0.32309263944625854, "learning_rate": 1.874284447428742e-05, "loss": 0.3479, "step": 8935 }, { "epoch": 0.16141962602647716, "grad_norm": 0.3173151910305023, "learning_rate": 1.874146737085009e-05, "loss": 0.2721, "step": 8940 }, { "epoch": 0.16150990545937788, "grad_norm": 0.5842396020889282, "learning_rate": 1.874008956422526e-05, "loss": 0.306, "step": 8945 }, { "epoch": 0.1616001848922786, "grad_norm": 0.45474880933761597, "learning_rate": 1.8738711054523772e-05, "loss": 0.2855, "step": 8950 }, { "epoch": 0.1616904643251793, "grad_norm": 0.40684881806373596, "learning_rate": 1.8737331841856518e-05, "loss": 0.3572, "step": 8955 }, { "epoch": 0.16178074375808002, "grad_norm": 0.6544089317321777, "learning_rate": 1.8735951926334443e-05, "loss": 0.3391, "step": 8960 }, { "epoch": 0.16187102319098073, "grad_norm": 0.4144783914089203, "learning_rate": 1.8734571308068555e-05, "loss": 0.3722, "step": 8965 }, { "epoch": 0.16196130262388145, "grad_norm": 0.43708932399749756, "learning_rate": 1.873318998716991e-05, "loss": 0.3814, "step": 8970 }, { "epoch": 0.16205158205678216, "grad_norm": 0.6572683453559875, "learning_rate": 1.8731807963749627e-05, "loss": 0.2317, "step": 8975 }, { "epoch": 0.16214186148968288, "grad_norm": 0.48241615295410156, "learning_rate": 1.8730425237918878e-05, "loss": 0.3372, "step": 8980 }, { "epoch": 0.1622321409225836, "grad_norm": 0.3190326392650604, "learning_rate": 1.8729041809788894e-05, "loss": 0.217, "step": 8985 }, { "epoch": 0.1623224203554843, "grad_norm": 0.24012210965156555, "learning_rate": 1.8727657679470964e-05, "loss": 0.2669, "step": 8990 }, { "epoch": 0.16241269978838502, "grad_norm": 0.5661858916282654, "learning_rate": 1.872627284707643e-05, "loss": 0.3828, "step": 8995 }, { "epoch": 0.16250297922128573, "grad_norm": 0.3285576403141022, "learning_rate": 1.8724887312716696e-05, "loss": 0.3112, "step": 9000 }, { "epoch": 0.16259325865418645, "grad_norm": 0.4387272000312805, "learning_rate": 1.8723501076503207e-05, "loss": 0.2479, "step": 9005 }, { "epoch": 0.16268353808708716, "grad_norm": 0.41906678676605225, "learning_rate": 1.872211413854749e-05, "loss": 0.267, "step": 9010 }, { "epoch": 0.16277381751998787, "grad_norm": 0.37576326727867126, "learning_rate": 1.8720726498961098e-05, "loss": 0.2704, "step": 9015 }, { "epoch": 0.1628640969528886, "grad_norm": 0.5084801316261292, "learning_rate": 1.8719338157855672e-05, "loss": 0.3471, "step": 9020 }, { "epoch": 0.1629543763857893, "grad_norm": 0.38982388377189636, "learning_rate": 1.8717949115342882e-05, "loss": 0.3095, "step": 9025 }, { "epoch": 0.16304465581869002, "grad_norm": 0.40285664796829224, "learning_rate": 1.8716559371534472e-05, "loss": 0.3278, "step": 9030 }, { "epoch": 0.16313493525159073, "grad_norm": 0.49813154339790344, "learning_rate": 1.871516892654224e-05, "loss": 0.2548, "step": 9035 }, { "epoch": 0.16322521468449144, "grad_norm": 0.2894067168235779, "learning_rate": 1.871377778047803e-05, "loss": 0.2275, "step": 9040 }, { "epoch": 0.16331549411739216, "grad_norm": 0.9235433340072632, "learning_rate": 1.8712385933453753e-05, "loss": 0.3341, "step": 9045 }, { "epoch": 0.16340577355029287, "grad_norm": 0.510308563709259, "learning_rate": 1.8710993385581373e-05, "loss": 0.2429, "step": 9050 }, { "epoch": 0.16349605298319358, "grad_norm": 0.357827365398407, "learning_rate": 1.8709600136972906e-05, "loss": 0.3234, "step": 9055 }, { "epoch": 0.1635863324160943, "grad_norm": 0.5867786407470703, "learning_rate": 1.870820618774044e-05, "loss": 0.3031, "step": 9060 }, { "epoch": 0.163676611848995, "grad_norm": 0.73820561170578, "learning_rate": 1.87068115379961e-05, "loss": 0.2481, "step": 9065 }, { "epoch": 0.16376689128189573, "grad_norm": 0.43540045619010925, "learning_rate": 1.8705416187852073e-05, "loss": 0.2251, "step": 9070 }, { "epoch": 0.16385717071479644, "grad_norm": 0.37970179319381714, "learning_rate": 1.870402013742061e-05, "loss": 0.3055, "step": 9075 }, { "epoch": 0.16394745014769715, "grad_norm": 0.3866814374923706, "learning_rate": 1.870262338681401e-05, "loss": 0.2766, "step": 9080 }, { "epoch": 0.16403772958059787, "grad_norm": 0.5278443694114685, "learning_rate": 1.8701225936144633e-05, "loss": 0.3221, "step": 9085 }, { "epoch": 0.16412800901349858, "grad_norm": 0.37457430362701416, "learning_rate": 1.8699827785524895e-05, "loss": 0.3623, "step": 9090 }, { "epoch": 0.1642182884463993, "grad_norm": 0.4798072278499603, "learning_rate": 1.8698428935067264e-05, "loss": 0.3111, "step": 9095 }, { "epoch": 0.1643085678793, "grad_norm": 0.404145747423172, "learning_rate": 1.869702938488427e-05, "loss": 0.3708, "step": 9100 }, { "epoch": 0.16439884731220072, "grad_norm": 0.38765108585357666, "learning_rate": 1.8695629135088494e-05, "loss": 0.3342, "step": 9105 }, { "epoch": 0.16448912674510144, "grad_norm": 0.48070767521858215, "learning_rate": 1.869422818579258e-05, "loss": 0.3036, "step": 9110 }, { "epoch": 0.16457940617800215, "grad_norm": 0.3652752637863159, "learning_rate": 1.869282653710922e-05, "loss": 0.3352, "step": 9115 }, { "epoch": 0.16466968561090287, "grad_norm": 0.40660160779953003, "learning_rate": 1.8691424189151166e-05, "loss": 0.2548, "step": 9120 }, { "epoch": 0.16475996504380358, "grad_norm": 0.602676272392273, "learning_rate": 1.8690021142031233e-05, "loss": 0.3861, "step": 9125 }, { "epoch": 0.1648502444767043, "grad_norm": 0.3529750108718872, "learning_rate": 1.868861739586228e-05, "loss": 0.2575, "step": 9130 }, { "epoch": 0.164940523909605, "grad_norm": 0.39472639560699463, "learning_rate": 1.8687212950757226e-05, "loss": 0.2434, "step": 9135 }, { "epoch": 0.16503080334250572, "grad_norm": 0.20535361766815186, "learning_rate": 1.8685807806829056e-05, "loss": 0.1722, "step": 9140 }, { "epoch": 0.16512108277540644, "grad_norm": 0.31685560941696167, "learning_rate": 1.86844019641908e-05, "loss": 0.2489, "step": 9145 }, { "epoch": 0.16521136220830715, "grad_norm": 0.45184728503227234, "learning_rate": 1.8682995422955548e-05, "loss": 0.294, "step": 9150 }, { "epoch": 0.16530164164120786, "grad_norm": 0.4597442150115967, "learning_rate": 1.8681588183236443e-05, "loss": 0.3423, "step": 9155 }, { "epoch": 0.16539192107410858, "grad_norm": 0.5040349364280701, "learning_rate": 1.8680180245146693e-05, "loss": 0.3271, "step": 9160 }, { "epoch": 0.1654822005070093, "grad_norm": 0.44972705841064453, "learning_rate": 1.8678771608799553e-05, "loss": 0.2843, "step": 9165 }, { "epoch": 0.16557247993991, "grad_norm": 0.6874871253967285, "learning_rate": 1.8677362274308338e-05, "loss": 0.2859, "step": 9170 }, { "epoch": 0.16566275937281072, "grad_norm": 0.6598188281059265, "learning_rate": 1.8675952241786416e-05, "loss": 0.3639, "step": 9175 }, { "epoch": 0.16575303880571143, "grad_norm": 0.4872834384441376, "learning_rate": 1.8674541511347218e-05, "loss": 0.3082, "step": 9180 }, { "epoch": 0.16584331823861215, "grad_norm": 0.40957456827163696, "learning_rate": 1.8673130083104227e-05, "loss": 0.2927, "step": 9185 }, { "epoch": 0.16593359767151286, "grad_norm": 0.42083311080932617, "learning_rate": 1.8671717957170977e-05, "loss": 0.2965, "step": 9190 }, { "epoch": 0.16602387710441358, "grad_norm": 0.7757776379585266, "learning_rate": 1.8670305133661067e-05, "loss": 0.381, "step": 9195 }, { "epoch": 0.1661141565373143, "grad_norm": 0.24977640807628632, "learning_rate": 1.866889161268815e-05, "loss": 0.2825, "step": 9200 }, { "epoch": 0.166204435970215, "grad_norm": 0.42900508642196655, "learning_rate": 1.866747739436593e-05, "loss": 0.3708, "step": 9205 }, { "epoch": 0.16629471540311572, "grad_norm": 0.4191928803920746, "learning_rate": 1.8666062478808176e-05, "loss": 0.3219, "step": 9210 }, { "epoch": 0.16638499483601643, "grad_norm": 0.4868921935558319, "learning_rate": 1.8664646866128702e-05, "loss": 0.3252, "step": 9215 }, { "epoch": 0.16647527426891714, "grad_norm": 0.3454371690750122, "learning_rate": 1.8663230556441384e-05, "loss": 0.3906, "step": 9220 }, { "epoch": 0.16656555370181786, "grad_norm": 0.24596817791461945, "learning_rate": 1.866181354986016e-05, "loss": 0.3218, "step": 9225 }, { "epoch": 0.16665583313471857, "grad_norm": 0.47529229521751404, "learning_rate": 1.866039584649901e-05, "loss": 0.314, "step": 9230 }, { "epoch": 0.1667461125676193, "grad_norm": 0.5996313691139221, "learning_rate": 1.865897744647198e-05, "loss": 0.3252, "step": 9235 }, { "epoch": 0.16683639200052, "grad_norm": 0.5580554604530334, "learning_rate": 1.8657558349893174e-05, "loss": 0.2919, "step": 9240 }, { "epoch": 0.16692667143342071, "grad_norm": 0.32417070865631104, "learning_rate": 1.8656138556876742e-05, "loss": 0.3111, "step": 9245 }, { "epoch": 0.16701695086632143, "grad_norm": 0.7458740472793579, "learning_rate": 1.8654718067536904e-05, "loss": 0.2593, "step": 9250 }, { "epoch": 0.16710723029922214, "grad_norm": 0.5567686557769775, "learning_rate": 1.865329688198792e-05, "loss": 0.3088, "step": 9255 }, { "epoch": 0.16719750973212286, "grad_norm": 0.4393288195133209, "learning_rate": 1.865187500034412e-05, "loss": 0.2979, "step": 9260 }, { "epoch": 0.16728778916502357, "grad_norm": 0.39182204008102417, "learning_rate": 1.865045242271988e-05, "loss": 0.3249, "step": 9265 }, { "epoch": 0.16737806859792428, "grad_norm": 0.34086981415748596, "learning_rate": 1.8649029149229638e-05, "loss": 0.4379, "step": 9270 }, { "epoch": 0.167468348030825, "grad_norm": 0.4125692546367645, "learning_rate": 1.8647605179987882e-05, "loss": 0.2359, "step": 9275 }, { "epoch": 0.1675586274637257, "grad_norm": 0.2924150228500366, "learning_rate": 1.8646180515109168e-05, "loss": 0.2635, "step": 9280 }, { "epoch": 0.16764890689662643, "grad_norm": 0.2948613464832306, "learning_rate": 1.864475515470809e-05, "loss": 0.2019, "step": 9285 }, { "epoch": 0.16773918632952714, "grad_norm": 0.4389468729496002, "learning_rate": 1.8643329098899317e-05, "loss": 0.3511, "step": 9290 }, { "epoch": 0.16782946576242785, "grad_norm": 0.47662729024887085, "learning_rate": 1.864190234779756e-05, "loss": 0.2472, "step": 9295 }, { "epoch": 0.16791974519532857, "grad_norm": 0.3126203417778015, "learning_rate": 1.8640474901517597e-05, "loss": 0.3962, "step": 9300 }, { "epoch": 0.16801002462822928, "grad_norm": 0.5453171730041504, "learning_rate": 1.8639046760174243e-05, "loss": 0.3613, "step": 9305 }, { "epoch": 0.16810030406113, "grad_norm": 0.4740075170993805, "learning_rate": 1.8637617923882392e-05, "loss": 0.239, "step": 9310 }, { "epoch": 0.16819058349403074, "grad_norm": 0.5277817249298096, "learning_rate": 1.863618839275698e-05, "loss": 0.2778, "step": 9315 }, { "epoch": 0.16828086292693145, "grad_norm": 0.41539040207862854, "learning_rate": 1.8634758166913004e-05, "loss": 0.3002, "step": 9320 }, { "epoch": 0.16837114235983217, "grad_norm": 0.5895034074783325, "learning_rate": 1.8633327246465517e-05, "loss": 0.3376, "step": 9325 }, { "epoch": 0.16846142179273288, "grad_norm": 0.3407139778137207, "learning_rate": 1.863189563152962e-05, "loss": 0.3832, "step": 9330 }, { "epoch": 0.1685517012256336, "grad_norm": 0.5738711357116699, "learning_rate": 1.8630463322220477e-05, "loss": 0.4042, "step": 9335 }, { "epoch": 0.1686419806585343, "grad_norm": 0.4206055998802185, "learning_rate": 1.8629030318653313e-05, "loss": 0.3159, "step": 9340 }, { "epoch": 0.16873226009143502, "grad_norm": 0.4046110510826111, "learning_rate": 1.8627596620943398e-05, "loss": 0.4248, "step": 9345 }, { "epoch": 0.16882253952433574, "grad_norm": 0.4212724268436432, "learning_rate": 1.8626162229206063e-05, "loss": 0.2942, "step": 9350 }, { "epoch": 0.16891281895723645, "grad_norm": 0.6038151383399963, "learning_rate": 1.8624727143556694e-05, "loss": 0.3216, "step": 9355 }, { "epoch": 0.16900309839013716, "grad_norm": 0.5483262538909912, "learning_rate": 1.8623291364110737e-05, "loss": 0.4853, "step": 9360 }, { "epoch": 0.16909337782303788, "grad_norm": 0.7040364146232605, "learning_rate": 1.8621854890983687e-05, "loss": 0.3221, "step": 9365 }, { "epoch": 0.1691836572559386, "grad_norm": 0.6033093929290771, "learning_rate": 1.8620417724291096e-05, "loss": 0.2782, "step": 9370 }, { "epoch": 0.1692739366888393, "grad_norm": 0.7284932732582092, "learning_rate": 1.8618979864148574e-05, "loss": 0.2881, "step": 9375 }, { "epoch": 0.16936421612174002, "grad_norm": 0.3731858432292938, "learning_rate": 1.861754131067179e-05, "loss": 0.2258, "step": 9380 }, { "epoch": 0.16945449555464073, "grad_norm": 0.6486430764198303, "learning_rate": 1.8616102063976465e-05, "loss": 0.3082, "step": 9385 }, { "epoch": 0.16954477498754145, "grad_norm": 0.6816726922988892, "learning_rate": 1.8614662124178373e-05, "loss": 0.3372, "step": 9390 }, { "epoch": 0.16963505442044216, "grad_norm": 0.39962419867515564, "learning_rate": 1.8613221491393348e-05, "loss": 0.3209, "step": 9395 }, { "epoch": 0.16972533385334287, "grad_norm": 0.4884919822216034, "learning_rate": 1.8611780165737275e-05, "loss": 0.3539, "step": 9400 }, { "epoch": 0.1698156132862436, "grad_norm": 0.6725785136222839, "learning_rate": 1.8610338147326105e-05, "loss": 0.2836, "step": 9405 }, { "epoch": 0.1699058927191443, "grad_norm": 0.4711063504219055, "learning_rate": 1.8608895436275833e-05, "loss": 0.2337, "step": 9410 }, { "epoch": 0.16999617215204502, "grad_norm": 0.635113000869751, "learning_rate": 1.8607452032702515e-05, "loss": 0.2342, "step": 9415 }, { "epoch": 0.17008645158494573, "grad_norm": 1.7193716764450073, "learning_rate": 1.8606007936722264e-05, "loss": 0.3885, "step": 9420 }, { "epoch": 0.17017673101784644, "grad_norm": 0.46928563714027405, "learning_rate": 1.8604563148451245e-05, "loss": 0.3314, "step": 9425 }, { "epoch": 0.17026701045074716, "grad_norm": 0.5596863627433777, "learning_rate": 1.8603117668005684e-05, "loss": 0.2857, "step": 9430 }, { "epoch": 0.17035728988364787, "grad_norm": 0.66194748878479, "learning_rate": 1.860167149550186e-05, "loss": 0.3214, "step": 9435 }, { "epoch": 0.17044756931654859, "grad_norm": 0.2665224075317383, "learning_rate": 1.8600224631056098e-05, "loss": 0.232, "step": 9440 }, { "epoch": 0.1705378487494493, "grad_norm": 0.4515458345413208, "learning_rate": 1.8598777074784796e-05, "loss": 0.288, "step": 9445 }, { "epoch": 0.17062812818235001, "grad_norm": 0.3401292860507965, "learning_rate": 1.85973288268044e-05, "loss": 0.2927, "step": 9450 }, { "epoch": 0.17071840761525073, "grad_norm": 0.5469954013824463, "learning_rate": 1.8595879887231408e-05, "loss": 0.3329, "step": 9455 }, { "epoch": 0.17080868704815144, "grad_norm": 0.4402863383293152, "learning_rate": 1.8594430256182375e-05, "loss": 0.2655, "step": 9460 }, { "epoch": 0.17089896648105216, "grad_norm": 0.4635493755340576, "learning_rate": 1.8592979933773918e-05, "loss": 0.3237, "step": 9465 }, { "epoch": 0.17098924591395287, "grad_norm": 0.4579942524433136, "learning_rate": 1.85915289201227e-05, "loss": 0.2661, "step": 9470 }, { "epoch": 0.17107952534685358, "grad_norm": 0.30433493852615356, "learning_rate": 1.859007721534545e-05, "loss": 0.4544, "step": 9475 }, { "epoch": 0.1711698047797543, "grad_norm": 0.5254673957824707, "learning_rate": 1.8588624819558938e-05, "loss": 0.168, "step": 9480 }, { "epoch": 0.171260084212655, "grad_norm": 0.5817158818244934, "learning_rate": 1.858717173288001e-05, "loss": 0.2764, "step": 9485 }, { "epoch": 0.17135036364555573, "grad_norm": 0.4819915294647217, "learning_rate": 1.8585717955425545e-05, "loss": 0.2993, "step": 9490 }, { "epoch": 0.17144064307845644, "grad_norm": 0.6393699645996094, "learning_rate": 1.85842634873125e-05, "loss": 0.2509, "step": 9495 }, { "epoch": 0.17153092251135715, "grad_norm": 0.4473429024219513, "learning_rate": 1.858280832865787e-05, "loss": 0.3107, "step": 9500 }, { "epoch": 0.17162120194425787, "grad_norm": 0.5762903690338135, "learning_rate": 1.8581352479578715e-05, "loss": 0.2317, "step": 9505 }, { "epoch": 0.17171148137715858, "grad_norm": 0.5538835525512695, "learning_rate": 1.8579895940192143e-05, "loss": 0.3585, "step": 9510 }, { "epoch": 0.1718017608100593, "grad_norm": 0.4441256523132324, "learning_rate": 1.8578438710615322e-05, "loss": 0.2742, "step": 9515 }, { "epoch": 0.17189204024296, "grad_norm": 0.3827473223209381, "learning_rate": 1.8576980790965482e-05, "loss": 0.3831, "step": 9520 }, { "epoch": 0.17198231967586072, "grad_norm": 0.44780096411705017, "learning_rate": 1.85755221813599e-05, "loss": 0.4024, "step": 9525 }, { "epoch": 0.17207259910876144, "grad_norm": 0.27713555097579956, "learning_rate": 1.8574062881915904e-05, "loss": 0.2225, "step": 9530 }, { "epoch": 0.17216287854166215, "grad_norm": 0.43907350301742554, "learning_rate": 1.857260289275089e-05, "loss": 0.2925, "step": 9535 }, { "epoch": 0.17225315797456286, "grad_norm": 0.5583574771881104, "learning_rate": 1.8571142213982302e-05, "loss": 0.3521, "step": 9540 }, { "epoch": 0.17234343740746358, "grad_norm": 0.3877319097518921, "learning_rate": 1.8569680845727644e-05, "loss": 0.3317, "step": 9545 }, { "epoch": 0.1724337168403643, "grad_norm": 0.622263491153717, "learning_rate": 1.8568218788104464e-05, "loss": 0.2734, "step": 9550 }, { "epoch": 0.172523996273265, "grad_norm": 0.5004008412361145, "learning_rate": 1.8566756041230382e-05, "loss": 0.3637, "step": 9555 }, { "epoch": 0.17261427570616572, "grad_norm": 0.44145500659942627, "learning_rate": 1.8565292605223064e-05, "loss": 0.2419, "step": 9560 }, { "epoch": 0.17270455513906643, "grad_norm": 0.3170686364173889, "learning_rate": 1.856382848020023e-05, "loss": 0.2014, "step": 9565 }, { "epoch": 0.17279483457196715, "grad_norm": 0.43776872754096985, "learning_rate": 1.8562363666279663e-05, "loss": 0.3425, "step": 9570 }, { "epoch": 0.17288511400486786, "grad_norm": 0.4404096007347107, "learning_rate": 1.8560898163579187e-05, "loss": 0.3461, "step": 9575 }, { "epoch": 0.17297539343776858, "grad_norm": 0.3963436782360077, "learning_rate": 1.8559431972216704e-05, "loss": 0.427, "step": 9580 }, { "epoch": 0.1730656728706693, "grad_norm": 0.37308162450790405, "learning_rate": 1.8557965092310147e-05, "loss": 0.3837, "step": 9585 }, { "epoch": 0.17315595230357, "grad_norm": 0.399116188287735, "learning_rate": 1.8556497523977525e-05, "loss": 0.3143, "step": 9590 }, { "epoch": 0.17324623173647072, "grad_norm": 0.5526614785194397, "learning_rate": 1.8555029267336887e-05, "loss": 0.2633, "step": 9595 }, { "epoch": 0.17333651116937143, "grad_norm": 0.47489047050476074, "learning_rate": 1.8553560322506347e-05, "loss": 0.3514, "step": 9600 }, { "epoch": 0.17342679060227215, "grad_norm": 0.45416170358657837, "learning_rate": 1.855209068960407e-05, "loss": 0.25, "step": 9605 }, { "epoch": 0.17351707003517286, "grad_norm": 0.5261057615280151, "learning_rate": 1.855062036874827e-05, "loss": 0.3549, "step": 9610 }, { "epoch": 0.17360734946807357, "grad_norm": 0.5666975975036621, "learning_rate": 1.854914936005724e-05, "loss": 0.3656, "step": 9615 }, { "epoch": 0.1736976289009743, "grad_norm": 0.3660118579864502, "learning_rate": 1.8547677663649293e-05, "loss": 0.3276, "step": 9620 }, { "epoch": 0.173787908333875, "grad_norm": 1.1892653703689575, "learning_rate": 1.8546205279642834e-05, "loss": 0.2777, "step": 9625 }, { "epoch": 0.17387818776677572, "grad_norm": 0.605702817440033, "learning_rate": 1.854473220815629e-05, "loss": 0.2925, "step": 9630 }, { "epoch": 0.17396846719967643, "grad_norm": 0.3500027060508728, "learning_rate": 1.854325844930817e-05, "loss": 0.268, "step": 9635 }, { "epoch": 0.17405874663257714, "grad_norm": 0.7925516963005066, "learning_rate": 1.8541784003217023e-05, "loss": 0.2649, "step": 9640 }, { "epoch": 0.17414902606547786, "grad_norm": 0.5006440877914429, "learning_rate": 1.8540308870001456e-05, "loss": 0.3122, "step": 9645 }, { "epoch": 0.17423930549837857, "grad_norm": 0.39755287766456604, "learning_rate": 1.8538833049780136e-05, "loss": 0.3285, "step": 9650 }, { "epoch": 0.17432958493127929, "grad_norm": 0.32424280047416687, "learning_rate": 1.853735654267178e-05, "loss": 0.3431, "step": 9655 }, { "epoch": 0.17441986436418, "grad_norm": 0.42777007818222046, "learning_rate": 1.8535879348795163e-05, "loss": 0.2386, "step": 9660 }, { "epoch": 0.1745101437970807, "grad_norm": 0.3951456546783447, "learning_rate": 1.8534401468269113e-05, "loss": 0.3338, "step": 9665 }, { "epoch": 0.17460042322998143, "grad_norm": 0.5320587754249573, "learning_rate": 1.853292290121252e-05, "loss": 0.289, "step": 9670 }, { "epoch": 0.17469070266288214, "grad_norm": 0.4082178771495819, "learning_rate": 1.8531443647744317e-05, "loss": 0.3192, "step": 9675 }, { "epoch": 0.17478098209578286, "grad_norm": 0.45076894760131836, "learning_rate": 1.85299637079835e-05, "loss": 0.3045, "step": 9680 }, { "epoch": 0.17487126152868357, "grad_norm": 0.39481931924819946, "learning_rate": 1.8528483082049124e-05, "loss": 0.3346, "step": 9685 }, { "epoch": 0.17496154096158428, "grad_norm": 0.3986913859844208, "learning_rate": 1.8527001770060292e-05, "loss": 0.4209, "step": 9690 }, { "epoch": 0.175051820394485, "grad_norm": 0.3154614269733429, "learning_rate": 1.8525519772136166e-05, "loss": 0.3049, "step": 9695 }, { "epoch": 0.1751420998273857, "grad_norm": 0.31176674365997314, "learning_rate": 1.8524037088395957e-05, "loss": 0.2899, "step": 9700 }, { "epoch": 0.17523237926028642, "grad_norm": 0.7537745833396912, "learning_rate": 1.8522553718958946e-05, "loss": 0.3217, "step": 9705 }, { "epoch": 0.17532265869318717, "grad_norm": 0.38892337679862976, "learning_rate": 1.8521069663944448e-05, "loss": 0.2509, "step": 9710 }, { "epoch": 0.17541293812608788, "grad_norm": 0.40880605578422546, "learning_rate": 1.851958492347185e-05, "loss": 0.3098, "step": 9715 }, { "epoch": 0.1755032175589886, "grad_norm": 0.4116984009742737, "learning_rate": 1.851809949766059e-05, "loss": 0.3527, "step": 9720 }, { "epoch": 0.1755934969918893, "grad_norm": 0.36210495233535767, "learning_rate": 1.851661338663016e-05, "loss": 0.2324, "step": 9725 }, { "epoch": 0.17568377642479002, "grad_norm": 0.43543070554733276, "learning_rate": 1.8515126590500104e-05, "loss": 0.2956, "step": 9730 }, { "epoch": 0.17577405585769074, "grad_norm": 0.5375482439994812, "learning_rate": 1.851363910939002e-05, "loss": 0.3565, "step": 9735 }, { "epoch": 0.17586433529059145, "grad_norm": 0.48285213112831116, "learning_rate": 1.8512150943419573e-05, "loss": 0.3498, "step": 9740 }, { "epoch": 0.17595461472349216, "grad_norm": 0.5422080159187317, "learning_rate": 1.8510662092708473e-05, "loss": 0.369, "step": 9745 }, { "epoch": 0.17604489415639288, "grad_norm": 0.6598948836326599, "learning_rate": 1.8509172557376484e-05, "loss": 0.336, "step": 9750 }, { "epoch": 0.1761351735892936, "grad_norm": 0.5499585270881653, "learning_rate": 1.8507682337543432e-05, "loss": 0.2605, "step": 9755 }, { "epoch": 0.1762254530221943, "grad_norm": 0.4877864420413971, "learning_rate": 1.8506191433329196e-05, "loss": 0.1437, "step": 9760 }, { "epoch": 0.17631573245509502, "grad_norm": 0.7126004695892334, "learning_rate": 1.8504699844853697e-05, "loss": 0.2657, "step": 9765 }, { "epoch": 0.17640601188799573, "grad_norm": 0.3607148230075836, "learning_rate": 1.8503207572236938e-05, "loss": 0.2516, "step": 9770 }, { "epoch": 0.17649629132089645, "grad_norm": 0.5653864741325378, "learning_rate": 1.850171461559895e-05, "loss": 0.3246, "step": 9775 }, { "epoch": 0.17658657075379716, "grad_norm": 0.35818910598754883, "learning_rate": 1.8500220975059833e-05, "loss": 0.3174, "step": 9780 }, { "epoch": 0.17667685018669788, "grad_norm": 0.4652705192565918, "learning_rate": 1.8498726650739747e-05, "loss": 0.3435, "step": 9785 }, { "epoch": 0.1767671296195986, "grad_norm": 0.27703943848609924, "learning_rate": 1.849723164275889e-05, "loss": 0.305, "step": 9790 }, { "epoch": 0.1768574090524993, "grad_norm": 0.5585147738456726, "learning_rate": 1.849573595123753e-05, "loss": 0.2885, "step": 9795 }, { "epoch": 0.17694768848540002, "grad_norm": 0.413076788187027, "learning_rate": 1.8494239576295982e-05, "loss": 0.2558, "step": 9800 }, { "epoch": 0.17703796791830073, "grad_norm": 0.545178234577179, "learning_rate": 1.849274251805462e-05, "loss": 0.265, "step": 9805 }, { "epoch": 0.17712824735120145, "grad_norm": 0.42601487040519714, "learning_rate": 1.8491244776633868e-05, "loss": 0.3653, "step": 9810 }, { "epoch": 0.17721852678410216, "grad_norm": 0.32546499371528625, "learning_rate": 1.848974635215421e-05, "loss": 0.3098, "step": 9815 }, { "epoch": 0.17730880621700287, "grad_norm": 0.8420270085334778, "learning_rate": 1.848824724473619e-05, "loss": 0.3444, "step": 9820 }, { "epoch": 0.1773990856499036, "grad_norm": 0.6668725609779358, "learning_rate": 1.8486747454500392e-05, "loss": 0.3269, "step": 9825 }, { "epoch": 0.1774893650828043, "grad_norm": 0.3488863706588745, "learning_rate": 1.848524698156746e-05, "loss": 0.3749, "step": 9830 }, { "epoch": 0.17757964451570502, "grad_norm": 0.5781786441802979, "learning_rate": 1.848374582605811e-05, "loss": 0.1893, "step": 9835 }, { "epoch": 0.17766992394860573, "grad_norm": 0.4205300211906433, "learning_rate": 1.848224398809309e-05, "loss": 0.2878, "step": 9840 }, { "epoch": 0.17776020338150644, "grad_norm": 0.5582265853881836, "learning_rate": 1.848074146779321e-05, "loss": 0.3293, "step": 9845 }, { "epoch": 0.17785048281440716, "grad_norm": 1.3087825775146484, "learning_rate": 1.8479238265279343e-05, "loss": 0.2873, "step": 9850 }, { "epoch": 0.17794076224730787, "grad_norm": 0.4465949535369873, "learning_rate": 1.8477734380672406e-05, "loss": 0.2533, "step": 9855 }, { "epoch": 0.17803104168020858, "grad_norm": 0.45469945669174194, "learning_rate": 1.8476229814093375e-05, "loss": 0.3821, "step": 9860 }, { "epoch": 0.1781213211131093, "grad_norm": 0.4225321114063263, "learning_rate": 1.8474724565663286e-05, "loss": 0.298, "step": 9865 }, { "epoch": 0.17821160054601, "grad_norm": 0.4197310507297516, "learning_rate": 1.8473218635503223e-05, "loss": 0.3406, "step": 9870 }, { "epoch": 0.17830187997891073, "grad_norm": 0.6299892663955688, "learning_rate": 1.8471712023734328e-05, "loss": 0.234, "step": 9875 }, { "epoch": 0.17839215941181144, "grad_norm": 0.8793956637382507, "learning_rate": 1.8470204730477794e-05, "loss": 0.2818, "step": 9880 }, { "epoch": 0.17848243884471215, "grad_norm": 0.2909405827522278, "learning_rate": 1.8468696755854876e-05, "loss": 0.2264, "step": 9885 }, { "epoch": 0.17857271827761287, "grad_norm": 0.43145665526390076, "learning_rate": 1.8467188099986875e-05, "loss": 0.324, "step": 9890 }, { "epoch": 0.17866299771051358, "grad_norm": 0.37526363134384155, "learning_rate": 1.846567876299515e-05, "loss": 0.381, "step": 9895 }, { "epoch": 0.1787532771434143, "grad_norm": 0.4539874792098999, "learning_rate": 1.8464168745001128e-05, "loss": 0.2594, "step": 9900 }, { "epoch": 0.178843556576315, "grad_norm": 0.45686039328575134, "learning_rate": 1.8462658046126267e-05, "loss": 0.3893, "step": 9905 }, { "epoch": 0.17893383600921572, "grad_norm": 0.45958003401756287, "learning_rate": 1.8461146666492097e-05, "loss": 0.3155, "step": 9910 }, { "epoch": 0.17902411544211644, "grad_norm": 0.5699650645256042, "learning_rate": 1.8459634606220194e-05, "loss": 0.262, "step": 9915 }, { "epoch": 0.17911439487501715, "grad_norm": 0.48587679862976074, "learning_rate": 1.8458121865432196e-05, "loss": 0.2676, "step": 9920 }, { "epoch": 0.17920467430791787, "grad_norm": 0.33711960911750793, "learning_rate": 1.845660844424979e-05, "loss": 0.2437, "step": 9925 }, { "epoch": 0.17929495374081858, "grad_norm": 0.44800063967704773, "learning_rate": 1.845509434279472e-05, "loss": 0.3275, "step": 9930 }, { "epoch": 0.1793852331737193, "grad_norm": 0.630777895450592, "learning_rate": 1.8453579561188786e-05, "loss": 0.2771, "step": 9935 }, { "epoch": 0.17947551260662, "grad_norm": 0.5990297198295593, "learning_rate": 1.8452064099553836e-05, "loss": 0.2691, "step": 9940 }, { "epoch": 0.17956579203952072, "grad_norm": 0.42177292704582214, "learning_rate": 1.8450547958011788e-05, "loss": 0.3546, "step": 9945 }, { "epoch": 0.17965607147242144, "grad_norm": 0.39384013414382935, "learning_rate": 1.8449031136684593e-05, "loss": 0.2662, "step": 9950 }, { "epoch": 0.17974635090532215, "grad_norm": 0.367742657661438, "learning_rate": 1.8447513635694274e-05, "loss": 0.2673, "step": 9955 }, { "epoch": 0.17983663033822286, "grad_norm": 0.43818777799606323, "learning_rate": 1.8445995455162904e-05, "loss": 0.2634, "step": 9960 }, { "epoch": 0.17992690977112358, "grad_norm": 0.3801169693470001, "learning_rate": 1.8444476595212607e-05, "loss": 0.2969, "step": 9965 }, { "epoch": 0.1800171892040243, "grad_norm": 0.40738508105278015, "learning_rate": 1.8442957055965566e-05, "loss": 0.2861, "step": 9970 }, { "epoch": 0.180107468636925, "grad_norm": 0.40975838899612427, "learning_rate": 1.8441436837544014e-05, "loss": 0.2714, "step": 9975 }, { "epoch": 0.18019774806982572, "grad_norm": 0.3722114562988281, "learning_rate": 1.8439915940070247e-05, "loss": 0.2468, "step": 9980 }, { "epoch": 0.18028802750272643, "grad_norm": 0.44791749119758606, "learning_rate": 1.8438394363666603e-05, "loss": 0.2855, "step": 9985 }, { "epoch": 0.18037830693562715, "grad_norm": 0.6452430486679077, "learning_rate": 1.8436872108455487e-05, "loss": 0.3599, "step": 9990 }, { "epoch": 0.18046858636852786, "grad_norm": 0.4257093369960785, "learning_rate": 1.8435349174559355e-05, "loss": 0.3286, "step": 9995 }, { "epoch": 0.18055886580142858, "grad_norm": 0.4258350431919098, "learning_rate": 1.8433825562100708e-05, "loss": 0.2843, "step": 10000 }, { "epoch": 0.1806491452343293, "grad_norm": 0.3578794002532959, "learning_rate": 1.8432301271202117e-05, "loss": 0.3294, "step": 10005 }, { "epoch": 0.18073942466723, "grad_norm": 0.47850295901298523, "learning_rate": 1.8430776301986196e-05, "loss": 0.3223, "step": 10010 }, { "epoch": 0.18082970410013072, "grad_norm": 0.44029831886291504, "learning_rate": 1.842925065457562e-05, "loss": 0.3394, "step": 10015 }, { "epoch": 0.18091998353303143, "grad_norm": 0.5738121271133423, "learning_rate": 1.842772432909312e-05, "loss": 0.3103, "step": 10020 }, { "epoch": 0.18101026296593214, "grad_norm": 0.7582994699478149, "learning_rate": 1.8426197325661466e-05, "loss": 0.2223, "step": 10025 }, { "epoch": 0.18110054239883286, "grad_norm": 0.3985866904258728, "learning_rate": 1.8424669644403505e-05, "loss": 0.3643, "step": 10030 }, { "epoch": 0.18119082183173357, "grad_norm": 0.5045907497406006, "learning_rate": 1.8423141285442123e-05, "loss": 0.2353, "step": 10035 }, { "epoch": 0.1812811012646343, "grad_norm": 0.3531908690929413, "learning_rate": 1.842161224890027e-05, "loss": 0.3548, "step": 10040 }, { "epoch": 0.181371380697535, "grad_norm": 0.34367236495018005, "learning_rate": 1.8420082534900934e-05, "loss": 0.2043, "step": 10045 }, { "epoch": 0.18146166013043571, "grad_norm": 0.6093673706054688, "learning_rate": 1.8418552143567184e-05, "loss": 0.3982, "step": 10050 }, { "epoch": 0.18155193956333643, "grad_norm": 0.5029755234718323, "learning_rate": 1.8417021075022122e-05, "loss": 0.3863, "step": 10055 }, { "epoch": 0.18164221899623714, "grad_norm": 0.46935874223709106, "learning_rate": 1.8415489329388914e-05, "loss": 0.3116, "step": 10060 }, { "epoch": 0.18173249842913786, "grad_norm": 0.9425585865974426, "learning_rate": 1.8413956906790776e-05, "loss": 0.2711, "step": 10065 }, { "epoch": 0.18182277786203857, "grad_norm": 0.43612468242645264, "learning_rate": 1.8412423807350978e-05, "loss": 0.3326, "step": 10070 }, { "epoch": 0.18191305729493928, "grad_norm": 0.39147958159446716, "learning_rate": 1.8410890031192852e-05, "loss": 0.319, "step": 10075 }, { "epoch": 0.18200333672784, "grad_norm": 0.445437490940094, "learning_rate": 1.8409355578439773e-05, "loss": 0.2219, "step": 10080 }, { "epoch": 0.1820936161607407, "grad_norm": 0.3498551547527313, "learning_rate": 1.840782044921518e-05, "loss": 0.3016, "step": 10085 }, { "epoch": 0.18218389559364143, "grad_norm": 0.5700546503067017, "learning_rate": 1.8406284643642562e-05, "loss": 0.3446, "step": 10090 }, { "epoch": 0.18227417502654214, "grad_norm": 0.3117503821849823, "learning_rate": 1.8404748161845467e-05, "loss": 0.286, "step": 10095 }, { "epoch": 0.18236445445944285, "grad_norm": 0.35964101552963257, "learning_rate": 1.840321100394749e-05, "loss": 0.2157, "step": 10100 }, { "epoch": 0.18245473389234357, "grad_norm": 0.4494312107563019, "learning_rate": 1.8401673170072283e-05, "loss": 0.294, "step": 10105 }, { "epoch": 0.1825450133252443, "grad_norm": 0.39130812883377075, "learning_rate": 1.8400134660343555e-05, "loss": 0.2839, "step": 10110 }, { "epoch": 0.18263529275814502, "grad_norm": 0.6958088874816895, "learning_rate": 1.839859547488507e-05, "loss": 0.3133, "step": 10115 }, { "epoch": 0.18272557219104574, "grad_norm": 0.5423473119735718, "learning_rate": 1.8397055613820644e-05, "loss": 0.3583, "step": 10120 }, { "epoch": 0.18281585162394645, "grad_norm": 0.521397590637207, "learning_rate": 1.8395515077274146e-05, "loss": 0.316, "step": 10125 }, { "epoch": 0.18290613105684717, "grad_norm": 0.5494081377983093, "learning_rate": 1.83939738653695e-05, "loss": 0.3184, "step": 10130 }, { "epoch": 0.18299641048974788, "grad_norm": 0.49178290367126465, "learning_rate": 1.839243197823069e-05, "loss": 0.2045, "step": 10135 }, { "epoch": 0.1830866899226486, "grad_norm": 0.3609885275363922, "learning_rate": 1.839088941598174e-05, "loss": 0.2909, "step": 10140 }, { "epoch": 0.1831769693555493, "grad_norm": 0.2735249698162079, "learning_rate": 1.8389346178746747e-05, "loss": 0.2842, "step": 10145 }, { "epoch": 0.18326724878845002, "grad_norm": 0.42014428973197937, "learning_rate": 1.8387802266649854e-05, "loss": 0.3625, "step": 10150 }, { "epoch": 0.18335752822135074, "grad_norm": 0.3797282874584198, "learning_rate": 1.8386257679815253e-05, "loss": 0.3603, "step": 10155 }, { "epoch": 0.18344780765425145, "grad_norm": 0.8530354499816895, "learning_rate": 1.838471241836719e-05, "loss": 0.3365, "step": 10160 }, { "epoch": 0.18353808708715216, "grad_norm": 0.4313885271549225, "learning_rate": 1.838316648242998e-05, "loss": 0.2984, "step": 10165 }, { "epoch": 0.18362836652005288, "grad_norm": 0.41217172145843506, "learning_rate": 1.8381619872127978e-05, "loss": 0.2754, "step": 10170 }, { "epoch": 0.1837186459529536, "grad_norm": 0.41457244753837585, "learning_rate": 1.8380072587585597e-05, "loss": 0.4513, "step": 10175 }, { "epoch": 0.1838089253858543, "grad_norm": 0.550830602645874, "learning_rate": 1.8378524628927302e-05, "loss": 0.2716, "step": 10180 }, { "epoch": 0.18389920481875502, "grad_norm": 0.7130752801895142, "learning_rate": 1.837697599627762e-05, "loss": 0.2783, "step": 10185 }, { "epoch": 0.18398948425165573, "grad_norm": 0.4837576448917389, "learning_rate": 1.837542668976113e-05, "loss": 0.1878, "step": 10190 }, { "epoch": 0.18407976368455645, "grad_norm": 0.46950626373291016, "learning_rate": 1.8373876709502455e-05, "loss": 0.3807, "step": 10195 }, { "epoch": 0.18417004311745716, "grad_norm": 0.5759735107421875, "learning_rate": 1.8372326055626285e-05, "loss": 0.3412, "step": 10200 }, { "epoch": 0.18426032255035787, "grad_norm": 0.6385390162467957, "learning_rate": 1.837077472825735e-05, "loss": 0.3626, "step": 10205 }, { "epoch": 0.1843506019832586, "grad_norm": 0.5239001512527466, "learning_rate": 1.8369222727520455e-05, "loss": 0.3327, "step": 10210 }, { "epoch": 0.1844408814161593, "grad_norm": 0.43816807866096497, "learning_rate": 1.836767005354044e-05, "loss": 0.2671, "step": 10215 }, { "epoch": 0.18453116084906002, "grad_norm": 0.8781191110610962, "learning_rate": 1.836611670644221e-05, "loss": 0.34, "step": 10220 }, { "epoch": 0.18462144028196073, "grad_norm": 0.3327060341835022, "learning_rate": 1.836456268635072e-05, "loss": 0.2622, "step": 10225 }, { "epoch": 0.18471171971486144, "grad_norm": 0.338997483253479, "learning_rate": 1.8363007993390973e-05, "loss": 0.2391, "step": 10230 }, { "epoch": 0.18480199914776216, "grad_norm": 0.3980759382247925, "learning_rate": 1.836145262768804e-05, "loss": 0.3819, "step": 10235 }, { "epoch": 0.18489227858066287, "grad_norm": 0.57769376039505, "learning_rate": 1.8359896589367035e-05, "loss": 0.2773, "step": 10240 }, { "epoch": 0.18498255801356359, "grad_norm": 0.30585551261901855, "learning_rate": 1.8358339878553134e-05, "loss": 0.3717, "step": 10245 }, { "epoch": 0.1850728374464643, "grad_norm": 0.5136041641235352, "learning_rate": 1.835678249537156e-05, "loss": 0.3142, "step": 10250 }, { "epoch": 0.18516311687936501, "grad_norm": 0.4172489047050476, "learning_rate": 1.8355224439947594e-05, "loss": 0.3688, "step": 10255 }, { "epoch": 0.18525339631226573, "grad_norm": 0.6374697685241699, "learning_rate": 1.8353665712406572e-05, "loss": 0.3623, "step": 10260 }, { "epoch": 0.18534367574516644, "grad_norm": 0.3242988586425781, "learning_rate": 1.835210631287388e-05, "loss": 0.1904, "step": 10265 }, { "epoch": 0.18543395517806716, "grad_norm": 0.46144676208496094, "learning_rate": 1.8350546241474957e-05, "loss": 0.3244, "step": 10270 }, { "epoch": 0.18552423461096787, "grad_norm": 0.46591830253601074, "learning_rate": 1.8348985498335306e-05, "loss": 0.3928, "step": 10275 }, { "epoch": 0.18561451404386858, "grad_norm": 0.4382494390010834, "learning_rate": 1.8347424083580473e-05, "loss": 0.3477, "step": 10280 }, { "epoch": 0.1857047934767693, "grad_norm": 1.1415561437606812, "learning_rate": 1.8345861997336067e-05, "loss": 0.2341, "step": 10285 }, { "epoch": 0.18579507290967, "grad_norm": 0.403601735830307, "learning_rate": 1.834429923972774e-05, "loss": 0.3129, "step": 10290 }, { "epoch": 0.18588535234257073, "grad_norm": 1.1687211990356445, "learning_rate": 1.8342735810881214e-05, "loss": 0.3114, "step": 10295 }, { "epoch": 0.18597563177547144, "grad_norm": 0.42405131459236145, "learning_rate": 1.8341171710922244e-05, "loss": 0.3974, "step": 10300 }, { "epoch": 0.18606591120837215, "grad_norm": 1.2109848260879517, "learning_rate": 1.833960693997666e-05, "loss": 0.3509, "step": 10305 }, { "epoch": 0.18615619064127287, "grad_norm": 0.6749964952468872, "learning_rate": 1.833804149817033e-05, "loss": 0.2898, "step": 10310 }, { "epoch": 0.18624647007417358, "grad_norm": 0.5378634333610535, "learning_rate": 1.8336475385629186e-05, "loss": 0.2738, "step": 10315 }, { "epoch": 0.1863367495070743, "grad_norm": 0.5143244862556458, "learning_rate": 1.8334908602479206e-05, "loss": 0.2809, "step": 10320 }, { "epoch": 0.186427028939975, "grad_norm": 0.3657784163951874, "learning_rate": 1.833334114884643e-05, "loss": 0.2773, "step": 10325 }, { "epoch": 0.18651730837287572, "grad_norm": 0.5001368522644043, "learning_rate": 1.8331773024856953e-05, "loss": 0.2747, "step": 10330 }, { "epoch": 0.18660758780577644, "grad_norm": 0.4016692340373993, "learning_rate": 1.833020423063691e-05, "loss": 0.3401, "step": 10335 }, { "epoch": 0.18669786723867715, "grad_norm": 0.47351789474487305, "learning_rate": 1.8328634766312505e-05, "loss": 0.2704, "step": 10340 }, { "epoch": 0.18678814667157786, "grad_norm": 0.39467868208885193, "learning_rate": 1.8327064632009986e-05, "loss": 0.3595, "step": 10345 }, { "epoch": 0.18687842610447858, "grad_norm": 0.5907410979270935, "learning_rate": 1.8325493827855664e-05, "loss": 0.3104, "step": 10350 }, { "epoch": 0.1869687055373793, "grad_norm": 0.4638453722000122, "learning_rate": 1.832392235397589e-05, "loss": 0.18, "step": 10355 }, { "epoch": 0.18705898497028, "grad_norm": 0.6245995163917542, "learning_rate": 1.8322350210497092e-05, "loss": 0.2667, "step": 10360 }, { "epoch": 0.18714926440318072, "grad_norm": 0.39613252878189087, "learning_rate": 1.832077739754572e-05, "loss": 0.4438, "step": 10365 }, { "epoch": 0.18723954383608143, "grad_norm": 0.5103363990783691, "learning_rate": 1.831920391524831e-05, "loss": 0.3216, "step": 10370 }, { "epoch": 0.18732982326898215, "grad_norm": 0.3647821545600891, "learning_rate": 1.831762976373143e-05, "loss": 0.2869, "step": 10375 }, { "epoch": 0.18742010270188286, "grad_norm": 0.4628177583217621, "learning_rate": 1.831605494312171e-05, "loss": 0.2764, "step": 10380 }, { "epoch": 0.18751038213478358, "grad_norm": 0.23167622089385986, "learning_rate": 1.8314479453545836e-05, "loss": 0.2092, "step": 10385 }, { "epoch": 0.1876006615676843, "grad_norm": 0.39833158254623413, "learning_rate": 1.8312903295130538e-05, "loss": 0.3064, "step": 10390 }, { "epoch": 0.187690941000585, "grad_norm": 0.5542736053466797, "learning_rate": 1.8311326468002613e-05, "loss": 0.2753, "step": 10395 }, { "epoch": 0.18778122043348572, "grad_norm": 0.46590352058410645, "learning_rate": 1.83097489722889e-05, "loss": 0.3642, "step": 10400 }, { "epoch": 0.18787149986638643, "grad_norm": 0.41070303320884705, "learning_rate": 1.83081708081163e-05, "loss": 0.3594, "step": 10405 }, { "epoch": 0.18796177929928715, "grad_norm": 0.42542794346809387, "learning_rate": 1.830659197561177e-05, "loss": 0.2966, "step": 10410 }, { "epoch": 0.18805205873218786, "grad_norm": 0.3459967076778412, "learning_rate": 1.8305012474902305e-05, "loss": 0.3753, "step": 10415 }, { "epoch": 0.18814233816508857, "grad_norm": 0.35253915190696716, "learning_rate": 1.8303432306114972e-05, "loss": 0.2909, "step": 10420 }, { "epoch": 0.1882326175979893, "grad_norm": 0.34542712569236755, "learning_rate": 1.8301851469376885e-05, "loss": 0.2579, "step": 10425 }, { "epoch": 0.18832289703089, "grad_norm": 0.4709479510784149, "learning_rate": 1.8300269964815203e-05, "loss": 0.3706, "step": 10430 }, { "epoch": 0.18841317646379072, "grad_norm": 0.4012754261493683, "learning_rate": 1.829868779255715e-05, "loss": 0.2178, "step": 10435 }, { "epoch": 0.18850345589669143, "grad_norm": 0.3635656535625458, "learning_rate": 1.8297104952730003e-05, "loss": 0.3075, "step": 10440 }, { "epoch": 0.18859373532959214, "grad_norm": 0.3134946823120117, "learning_rate": 1.829552144546109e-05, "loss": 0.2807, "step": 10445 }, { "epoch": 0.18868401476249286, "grad_norm": 0.3195219933986664, "learning_rate": 1.829393727087779e-05, "loss": 0.294, "step": 10450 }, { "epoch": 0.18877429419539357, "grad_norm": 0.5945743322372437, "learning_rate": 1.829235242910754e-05, "loss": 0.3601, "step": 10455 }, { "epoch": 0.18886457362829429, "grad_norm": 0.41216209530830383, "learning_rate": 1.8290766920277826e-05, "loss": 0.3322, "step": 10460 }, { "epoch": 0.188954853061195, "grad_norm": 0.36902883648872375, "learning_rate": 1.8289180744516194e-05, "loss": 0.2222, "step": 10465 }, { "epoch": 0.1890451324940957, "grad_norm": 0.6106470227241516, "learning_rate": 1.8287593901950242e-05, "loss": 0.4442, "step": 10470 }, { "epoch": 0.18913541192699643, "grad_norm": 0.5889748930931091, "learning_rate": 1.8286006392707616e-05, "loss": 0.3389, "step": 10475 }, { "epoch": 0.18922569135989714, "grad_norm": 0.38097482919692993, "learning_rate": 1.8284418216916017e-05, "loss": 0.3128, "step": 10480 }, { "epoch": 0.18931597079279786, "grad_norm": 0.3452233374118805, "learning_rate": 1.828282937470321e-05, "loss": 0.2697, "step": 10485 }, { "epoch": 0.18940625022569857, "grad_norm": 0.6536673307418823, "learning_rate": 1.8281239866197e-05, "loss": 0.3097, "step": 10490 }, { "epoch": 0.18949652965859928, "grad_norm": 1.0002179145812988, "learning_rate": 1.8279649691525255e-05, "loss": 0.449, "step": 10495 }, { "epoch": 0.1895868090915, "grad_norm": 0.3609928488731384, "learning_rate": 1.827805885081589e-05, "loss": 0.236, "step": 10500 }, { "epoch": 0.18967708852440074, "grad_norm": 0.4937584102153778, "learning_rate": 1.8276467344196876e-05, "loss": 0.363, "step": 10505 }, { "epoch": 0.18976736795730145, "grad_norm": 0.4397891163825989, "learning_rate": 1.8274875171796244e-05, "loss": 0.3586, "step": 10510 }, { "epoch": 0.18985764739020217, "grad_norm": 0.40915870666503906, "learning_rate": 1.8273282333742065e-05, "loss": 0.2808, "step": 10515 }, { "epoch": 0.18994792682310288, "grad_norm": 0.36115843057632446, "learning_rate": 1.8271688830162477e-05, "loss": 0.3517, "step": 10520 }, { "epoch": 0.1900382062560036, "grad_norm": 0.6014983057975769, "learning_rate": 1.8270094661185662e-05, "loss": 0.3273, "step": 10525 }, { "epoch": 0.1901284856889043, "grad_norm": 0.5097769498825073, "learning_rate": 1.826849982693986e-05, "loss": 0.435, "step": 10530 }, { "epoch": 0.19021876512180502, "grad_norm": 0.5310569405555725, "learning_rate": 1.8266904327553367e-05, "loss": 0.3127, "step": 10535 }, { "epoch": 0.19030904455470574, "grad_norm": 0.5188735127449036, "learning_rate": 1.8265308163154525e-05, "loss": 0.3436, "step": 10540 }, { "epoch": 0.19039932398760645, "grad_norm": 0.5465673804283142, "learning_rate": 1.8263711333871737e-05, "loss": 0.2674, "step": 10545 }, { "epoch": 0.19048960342050716, "grad_norm": 0.5335100889205933, "learning_rate": 1.8262113839833457e-05, "loss": 0.3317, "step": 10550 }, { "epoch": 0.19057988285340788, "grad_norm": 0.5629705190658569, "learning_rate": 1.826051568116819e-05, "loss": 0.4154, "step": 10555 }, { "epoch": 0.1906701622863086, "grad_norm": 0.4672110378742218, "learning_rate": 1.8258916858004493e-05, "loss": 0.3633, "step": 10560 }, { "epoch": 0.1907604417192093, "grad_norm": 0.6804426908493042, "learning_rate": 1.8257317370470983e-05, "loss": 0.4061, "step": 10565 }, { "epoch": 0.19085072115211002, "grad_norm": 0.44888773560523987, "learning_rate": 1.8255717218696333e-05, "loss": 0.2871, "step": 10570 }, { "epoch": 0.19094100058501073, "grad_norm": 0.4249172806739807, "learning_rate": 1.825411640280925e-05, "loss": 0.2793, "step": 10575 }, { "epoch": 0.19103128001791145, "grad_norm": 0.2748965620994568, "learning_rate": 1.825251492293852e-05, "loss": 0.382, "step": 10580 }, { "epoch": 0.19112155945081216, "grad_norm": 0.5372962355613708, "learning_rate": 1.8250912779212965e-05, "loss": 0.3791, "step": 10585 }, { "epoch": 0.19121183888371288, "grad_norm": 0.4297412633895874, "learning_rate": 1.8249309971761465e-05, "loss": 0.3343, "step": 10590 }, { "epoch": 0.1913021183166136, "grad_norm": 0.49882203340530396, "learning_rate": 1.8247706500712957e-05, "loss": 0.3254, "step": 10595 }, { "epoch": 0.1913923977495143, "grad_norm": 0.37007367610931396, "learning_rate": 1.824610236619643e-05, "loss": 0.316, "step": 10600 }, { "epoch": 0.19148267718241502, "grad_norm": 0.35884764790534973, "learning_rate": 1.824449756834092e-05, "loss": 0.2357, "step": 10605 }, { "epoch": 0.19157295661531573, "grad_norm": 0.35997143387794495, "learning_rate": 1.8242892107275526e-05, "loss": 0.3166, "step": 10610 }, { "epoch": 0.19166323604821645, "grad_norm": 0.41313913464546204, "learning_rate": 1.8241285983129393e-05, "loss": 0.3539, "step": 10615 }, { "epoch": 0.19175351548111716, "grad_norm": 0.3478839695453644, "learning_rate": 1.8239679196031722e-05, "loss": 0.3434, "step": 10620 }, { "epoch": 0.19184379491401787, "grad_norm": 0.4098747670650482, "learning_rate": 1.8238071746111768e-05, "loss": 0.3339, "step": 10625 }, { "epoch": 0.1919340743469186, "grad_norm": 0.587702751159668, "learning_rate": 1.8236463633498838e-05, "loss": 0.3052, "step": 10630 }, { "epoch": 0.1920243537798193, "grad_norm": 0.7023888230323792, "learning_rate": 1.8234854858322292e-05, "loss": 0.2935, "step": 10635 }, { "epoch": 0.19211463321272002, "grad_norm": 0.40667617321014404, "learning_rate": 1.823324542071155e-05, "loss": 0.3309, "step": 10640 }, { "epoch": 0.19220491264562073, "grad_norm": 0.3113376498222351, "learning_rate": 1.8231635320796072e-05, "loss": 0.3944, "step": 10645 }, { "epoch": 0.19229519207852144, "grad_norm": 0.38273394107818604, "learning_rate": 1.8230024558705384e-05, "loss": 0.2757, "step": 10650 }, { "epoch": 0.19238547151142216, "grad_norm": 0.38133129477500916, "learning_rate": 1.8228413134569058e-05, "loss": 0.3228, "step": 10655 }, { "epoch": 0.19247575094432287, "grad_norm": 0.5952944755554199, "learning_rate": 1.822680104851672e-05, "loss": 0.3121, "step": 10660 }, { "epoch": 0.19256603037722358, "grad_norm": 0.4904284179210663, "learning_rate": 1.8225188300678055e-05, "loss": 0.3406, "step": 10665 }, { "epoch": 0.1926563098101243, "grad_norm": 0.5586575865745544, "learning_rate": 1.8223574891182793e-05, "loss": 0.2835, "step": 10670 }, { "epoch": 0.192746589243025, "grad_norm": 0.4699321687221527, "learning_rate": 1.8221960820160722e-05, "loss": 0.3106, "step": 10675 }, { "epoch": 0.19283686867592573, "grad_norm": 0.756039559841156, "learning_rate": 1.822034608774168e-05, "loss": 0.212, "step": 10680 }, { "epoch": 0.19292714810882644, "grad_norm": 0.43197789788246155, "learning_rate": 1.8218730694055566e-05, "loss": 0.2714, "step": 10685 }, { "epoch": 0.19301742754172715, "grad_norm": 0.5693886876106262, "learning_rate": 1.8217114639232327e-05, "loss": 0.4156, "step": 10690 }, { "epoch": 0.19310770697462787, "grad_norm": 0.45394495129585266, "learning_rate": 1.8215497923401953e-05, "loss": 0.3603, "step": 10695 }, { "epoch": 0.19319798640752858, "grad_norm": 0.6002922058105469, "learning_rate": 1.8213880546694506e-05, "loss": 0.3311, "step": 10700 }, { "epoch": 0.1932882658404293, "grad_norm": 0.3669627606868744, "learning_rate": 1.8212262509240092e-05, "loss": 0.3286, "step": 10705 }, { "epoch": 0.19337854527333, "grad_norm": 0.3457637429237366, "learning_rate": 1.8210643811168863e-05, "loss": 0.3186, "step": 10710 }, { "epoch": 0.19346882470623072, "grad_norm": 0.32688283920288086, "learning_rate": 1.820902445261104e-05, "loss": 0.2353, "step": 10715 }, { "epoch": 0.19355910413913144, "grad_norm": 0.5048171281814575, "learning_rate": 1.8207404433696883e-05, "loss": 0.2595, "step": 10720 }, { "epoch": 0.19364938357203215, "grad_norm": 0.4015275835990906, "learning_rate": 1.8205783754556717e-05, "loss": 0.2992, "step": 10725 }, { "epoch": 0.19373966300493287, "grad_norm": 0.5477710366249084, "learning_rate": 1.820416241532091e-05, "loss": 0.3586, "step": 10730 }, { "epoch": 0.19382994243783358, "grad_norm": 0.4308444857597351, "learning_rate": 1.820254041611988e-05, "loss": 0.3833, "step": 10735 }, { "epoch": 0.1939202218707343, "grad_norm": 0.6709811091423035, "learning_rate": 1.8200917757084117e-05, "loss": 0.2602, "step": 10740 }, { "epoch": 0.194010501303635, "grad_norm": 0.34628912806510925, "learning_rate": 1.8199294438344145e-05, "loss": 0.3714, "step": 10745 }, { "epoch": 0.19410078073653572, "grad_norm": 0.4252835214138031, "learning_rate": 1.819767046003055e-05, "loss": 0.3544, "step": 10750 }, { "epoch": 0.19419106016943644, "grad_norm": 0.6269519329071045, "learning_rate": 1.819604582227397e-05, "loss": 0.2497, "step": 10755 }, { "epoch": 0.19428133960233715, "grad_norm": 0.3291480541229248, "learning_rate": 1.8194420525205094e-05, "loss": 0.3225, "step": 10760 }, { "epoch": 0.19437161903523786, "grad_norm": 0.33760547637939453, "learning_rate": 1.8192794568954667e-05, "loss": 0.2476, "step": 10765 }, { "epoch": 0.19446189846813858, "grad_norm": 0.5854130983352661, "learning_rate": 1.8191167953653483e-05, "loss": 0.419, "step": 10770 }, { "epoch": 0.1945521779010393, "grad_norm": 0.5488379001617432, "learning_rate": 1.8189540679432393e-05, "loss": 0.3921, "step": 10775 }, { "epoch": 0.19464245733394, "grad_norm": 0.4348668158054352, "learning_rate": 1.81879127464223e-05, "loss": 0.2644, "step": 10780 }, { "epoch": 0.19473273676684072, "grad_norm": 0.4301225244998932, "learning_rate": 1.8186284154754158e-05, "loss": 0.366, "step": 10785 }, { "epoch": 0.19482301619974143, "grad_norm": 0.43786948919296265, "learning_rate": 1.8184654904558976e-05, "loss": 0.3075, "step": 10790 }, { "epoch": 0.19491329563264215, "grad_norm": 0.440784752368927, "learning_rate": 1.8183024995967815e-05, "loss": 0.3134, "step": 10795 }, { "epoch": 0.19500357506554286, "grad_norm": 0.4838646352291107, "learning_rate": 1.818139442911179e-05, "loss": 0.2791, "step": 10800 }, { "epoch": 0.19509385449844358, "grad_norm": 0.3454958200454712, "learning_rate": 1.8179763204122066e-05, "loss": 0.383, "step": 10805 }, { "epoch": 0.1951841339313443, "grad_norm": 0.49316585063934326, "learning_rate": 1.8178131321129865e-05, "loss": 0.379, "step": 10810 }, { "epoch": 0.195274413364245, "grad_norm": 0.7945411205291748, "learning_rate": 1.8176498780266462e-05, "loss": 0.3332, "step": 10815 }, { "epoch": 0.19536469279714572, "grad_norm": 0.6220671534538269, "learning_rate": 1.817486558166318e-05, "loss": 0.3037, "step": 10820 }, { "epoch": 0.19545497223004643, "grad_norm": 0.4711354672908783, "learning_rate": 1.8173231725451395e-05, "loss": 0.2762, "step": 10825 }, { "epoch": 0.19554525166294714, "grad_norm": 0.5520678758621216, "learning_rate": 1.817159721176255e-05, "loss": 0.4785, "step": 10830 }, { "epoch": 0.19563553109584786, "grad_norm": 0.43351519107818604, "learning_rate": 1.816996204072812e-05, "loss": 0.2783, "step": 10835 }, { "epoch": 0.19572581052874857, "grad_norm": 0.4619917571544647, "learning_rate": 1.8168326212479643e-05, "loss": 0.4033, "step": 10840 }, { "epoch": 0.1958160899616493, "grad_norm": 0.3751065135002136, "learning_rate": 1.8166689727148715e-05, "loss": 0.2889, "step": 10845 }, { "epoch": 0.19590636939455, "grad_norm": 0.5061205625534058, "learning_rate": 1.8165052584866972e-05, "loss": 0.2545, "step": 10850 }, { "epoch": 0.19599664882745071, "grad_norm": 0.43692877888679504, "learning_rate": 1.8163414785766117e-05, "loss": 0.2992, "step": 10855 }, { "epoch": 0.19608692826035143, "grad_norm": 0.41942158341407776, "learning_rate": 1.8161776329977897e-05, "loss": 0.1642, "step": 10860 }, { "epoch": 0.19617720769325214, "grad_norm": 0.4778582453727722, "learning_rate": 1.8160137217634108e-05, "loss": 0.2666, "step": 10865 }, { "epoch": 0.19626748712615286, "grad_norm": 0.559712827205658, "learning_rate": 1.8158497448866615e-05, "loss": 0.3298, "step": 10870 }, { "epoch": 0.19635776655905357, "grad_norm": 0.493183970451355, "learning_rate": 1.8156857023807316e-05, "loss": 0.3691, "step": 10875 }, { "epoch": 0.19644804599195428, "grad_norm": 0.5540546178817749, "learning_rate": 1.815521594258818e-05, "loss": 0.3137, "step": 10880 }, { "epoch": 0.196538325424855, "grad_norm": 0.3892921805381775, "learning_rate": 1.8153574205341214e-05, "loss": 0.26, "step": 10885 }, { "epoch": 0.1966286048577557, "grad_norm": 0.4154384732246399, "learning_rate": 1.8151931812198483e-05, "loss": 0.2584, "step": 10890 }, { "epoch": 0.19671888429065643, "grad_norm": 0.5406801104545593, "learning_rate": 1.815028876329211e-05, "loss": 0.3055, "step": 10895 }, { "epoch": 0.19680916372355717, "grad_norm": 0.6369251012802124, "learning_rate": 1.8148645058754265e-05, "loss": 0.4461, "step": 10900 }, { "epoch": 0.19689944315645788, "grad_norm": 0.4940812885761261, "learning_rate": 1.814700069871717e-05, "loss": 0.2958, "step": 10905 }, { "epoch": 0.1969897225893586, "grad_norm": 0.4416468143463135, "learning_rate": 1.8145355683313108e-05, "loss": 0.2517, "step": 10910 }, { "epoch": 0.1970800020222593, "grad_norm": 0.3803589344024658, "learning_rate": 1.8143710012674396e-05, "loss": 0.3081, "step": 10915 }, { "epoch": 0.19717028145516002, "grad_norm": 0.6976109743118286, "learning_rate": 1.814206368693343e-05, "loss": 0.3953, "step": 10920 }, { "epoch": 0.19726056088806074, "grad_norm": 0.3147580921649933, "learning_rate": 1.8140416706222638e-05, "loss": 0.2785, "step": 10925 }, { "epoch": 0.19735084032096145, "grad_norm": 0.3766973912715912, "learning_rate": 1.8138769070674503e-05, "loss": 0.2744, "step": 10930 }, { "epoch": 0.19744111975386217, "grad_norm": 0.3725707530975342, "learning_rate": 1.8137120780421576e-05, "loss": 0.2794, "step": 10935 }, { "epoch": 0.19753139918676288, "grad_norm": 0.3790420591831207, "learning_rate": 1.8135471835596444e-05, "loss": 0.265, "step": 10940 }, { "epoch": 0.1976216786196636, "grad_norm": 0.4833362400531769, "learning_rate": 1.8133822236331756e-05, "loss": 0.2857, "step": 10945 }, { "epoch": 0.1977119580525643, "grad_norm": 0.5792388916015625, "learning_rate": 1.8132171982760202e-05, "loss": 0.3057, "step": 10950 }, { "epoch": 0.19780223748546502, "grad_norm": 0.4359922409057617, "learning_rate": 1.813052107501454e-05, "loss": 0.2782, "step": 10955 }, { "epoch": 0.19789251691836574, "grad_norm": 0.3914576768875122, "learning_rate": 1.8128869513227577e-05, "loss": 0.2832, "step": 10960 }, { "epoch": 0.19798279635126645, "grad_norm": 0.49458959698677063, "learning_rate": 1.812721729753216e-05, "loss": 0.2529, "step": 10965 }, { "epoch": 0.19807307578416716, "grad_norm": 0.40677347779273987, "learning_rate": 1.81255644280612e-05, "loss": 0.1789, "step": 10970 }, { "epoch": 0.19816335521706788, "grad_norm": 0.3918363153934479, "learning_rate": 1.8123910904947665e-05, "loss": 0.3244, "step": 10975 }, { "epoch": 0.1982536346499686, "grad_norm": 0.44329938292503357, "learning_rate": 1.8122256728324563e-05, "loss": 0.2231, "step": 10980 }, { "epoch": 0.1983439140828693, "grad_norm": 0.390706866979599, "learning_rate": 1.812060189832496e-05, "loss": 0.3747, "step": 10985 }, { "epoch": 0.19843419351577002, "grad_norm": 0.4873644709587097, "learning_rate": 1.811894641508198e-05, "loss": 0.3232, "step": 10990 }, { "epoch": 0.19852447294867073, "grad_norm": 0.32289350032806396, "learning_rate": 1.811729027872879e-05, "loss": 0.3092, "step": 10995 }, { "epoch": 0.19861475238157145, "grad_norm": 0.8177574872970581, "learning_rate": 1.811563348939861e-05, "loss": 0.3436, "step": 11000 }, { "epoch": 0.19870503181447216, "grad_norm": 0.6034993529319763, "learning_rate": 1.811397604722473e-05, "loss": 0.3432, "step": 11005 }, { "epoch": 0.19879531124737287, "grad_norm": 0.5464856028556824, "learning_rate": 1.8112317952340467e-05, "loss": 0.2737, "step": 11010 }, { "epoch": 0.1988855906802736, "grad_norm": 0.41048601269721985, "learning_rate": 1.8110659204879208e-05, "loss": 0.3888, "step": 11015 }, { "epoch": 0.1989758701131743, "grad_norm": 0.5404345989227295, "learning_rate": 1.8108999804974386e-05, "loss": 0.3413, "step": 11020 }, { "epoch": 0.19906614954607502, "grad_norm": 0.3721117079257965, "learning_rate": 1.810733975275949e-05, "loss": 0.2864, "step": 11025 }, { "epoch": 0.19915642897897573, "grad_norm": 0.4072383940219879, "learning_rate": 1.8105679048368056e-05, "loss": 0.2911, "step": 11030 }, { "epoch": 0.19924670841187644, "grad_norm": 0.4085667133331299, "learning_rate": 1.8104017691933674e-05, "loss": 0.2769, "step": 11035 }, { "epoch": 0.19933698784477716, "grad_norm": 0.529788076877594, "learning_rate": 1.8102355683589993e-05, "loss": 0.3122, "step": 11040 }, { "epoch": 0.19942726727767787, "grad_norm": 0.30550462007522583, "learning_rate": 1.8100693023470705e-05, "loss": 0.2086, "step": 11045 }, { "epoch": 0.19951754671057859, "grad_norm": 0.6182094812393188, "learning_rate": 1.8099029711709564e-05, "loss": 0.3375, "step": 11050 }, { "epoch": 0.1996078261434793, "grad_norm": 0.449313223361969, "learning_rate": 1.8097365748440367e-05, "loss": 0.2956, "step": 11055 }, { "epoch": 0.19969810557638001, "grad_norm": 3.341446876525879, "learning_rate": 1.8095701133796968e-05, "loss": 0.2941, "step": 11060 }, { "epoch": 0.19978838500928073, "grad_norm": 0.3782710134983063, "learning_rate": 1.8094035867913274e-05, "loss": 0.4157, "step": 11065 }, { "epoch": 0.19987866444218144, "grad_norm": 0.9143231511116028, "learning_rate": 1.8092369950923244e-05, "loss": 0.3257, "step": 11070 }, { "epoch": 0.19996894387508216, "grad_norm": 0.5001149773597717, "learning_rate": 1.809070338296089e-05, "loss": 0.338, "step": 11075 }, { "epoch": 0.20005922330798287, "grad_norm": 0.5527740716934204, "learning_rate": 1.808903616416027e-05, "loss": 0.4017, "step": 11080 }, { "epoch": 0.20014950274088358, "grad_norm": 0.24858571588993073, "learning_rate": 1.808736829465551e-05, "loss": 0.3122, "step": 11085 }, { "epoch": 0.2002397821737843, "grad_norm": 0.4394090473651886, "learning_rate": 1.8085699774580767e-05, "loss": 0.2676, "step": 11090 }, { "epoch": 0.200330061606685, "grad_norm": 0.4978356957435608, "learning_rate": 1.8084030604070268e-05, "loss": 0.3148, "step": 11095 }, { "epoch": 0.20042034103958573, "grad_norm": 0.5462652444839478, "learning_rate": 1.808236078325828e-05, "loss": 0.2628, "step": 11100 }, { "epoch": 0.20051062047248644, "grad_norm": 0.46849769353866577, "learning_rate": 1.8080690312279137e-05, "loss": 0.3624, "step": 11105 }, { "epoch": 0.20060089990538715, "grad_norm": 0.4754716157913208, "learning_rate": 1.8079019191267208e-05, "loss": 0.265, "step": 11110 }, { "epoch": 0.20069117933828787, "grad_norm": 0.24722929298877716, "learning_rate": 1.8077347420356925e-05, "loss": 0.335, "step": 11115 }, { "epoch": 0.20078145877118858, "grad_norm": 0.5693946480751038, "learning_rate": 1.8075674999682768e-05, "loss": 0.3621, "step": 11120 }, { "epoch": 0.2008717382040893, "grad_norm": 0.3881761133670807, "learning_rate": 1.8074001929379277e-05, "loss": 0.2863, "step": 11125 }, { "epoch": 0.20096201763699, "grad_norm": 0.46133917570114136, "learning_rate": 1.8072328209581033e-05, "loss": 0.4142, "step": 11130 }, { "epoch": 0.20105229706989072, "grad_norm": 0.4075993299484253, "learning_rate": 1.8070653840422676e-05, "loss": 0.3182, "step": 11135 }, { "epoch": 0.20114257650279144, "grad_norm": 0.5719988346099854, "learning_rate": 1.8068978822038895e-05, "loss": 0.3761, "step": 11140 }, { "epoch": 0.20123285593569215, "grad_norm": 0.45642632246017456, "learning_rate": 1.8067303154564438e-05, "loss": 0.3196, "step": 11145 }, { "epoch": 0.20132313536859286, "grad_norm": 0.6719714999198914, "learning_rate": 1.8065626838134097e-05, "loss": 0.2788, "step": 11150 }, { "epoch": 0.20141341480149358, "grad_norm": 0.4135879576206207, "learning_rate": 1.8063949872882715e-05, "loss": 0.2748, "step": 11155 }, { "epoch": 0.2015036942343943, "grad_norm": 0.542934775352478, "learning_rate": 1.80622722589452e-05, "loss": 0.3757, "step": 11160 }, { "epoch": 0.201593973667295, "grad_norm": 0.36689525842666626, "learning_rate": 1.80605939964565e-05, "loss": 0.314, "step": 11165 }, { "epoch": 0.20168425310019572, "grad_norm": 0.48210838437080383, "learning_rate": 1.805891508555162e-05, "loss": 0.3291, "step": 11170 }, { "epoch": 0.20177453253309643, "grad_norm": 0.5560554265975952, "learning_rate": 1.8057235526365612e-05, "loss": 0.3179, "step": 11175 }, { "epoch": 0.20186481196599715, "grad_norm": 0.3980230391025543, "learning_rate": 1.805555531903359e-05, "loss": 0.319, "step": 11180 }, { "epoch": 0.20195509139889786, "grad_norm": 0.6233363151550293, "learning_rate": 1.8053874463690713e-05, "loss": 0.3082, "step": 11185 }, { "epoch": 0.20204537083179858, "grad_norm": 0.4474990665912628, "learning_rate": 1.805219296047219e-05, "loss": 0.3062, "step": 11190 }, { "epoch": 0.2021356502646993, "grad_norm": 0.40642160177230835, "learning_rate": 1.805051080951329e-05, "loss": 0.317, "step": 11195 }, { "epoch": 0.2022259296976, "grad_norm": 0.4216107130050659, "learning_rate": 1.804882801094933e-05, "loss": 0.3419, "step": 11200 }, { "epoch": 0.20231620913050072, "grad_norm": 0.31105688214302063, "learning_rate": 1.8047144564915674e-05, "loss": 0.192, "step": 11205 }, { "epoch": 0.20240648856340143, "grad_norm": 0.44025835394859314, "learning_rate": 1.804546047154775e-05, "loss": 0.2557, "step": 11210 }, { "epoch": 0.20249676799630215, "grad_norm": 0.808018684387207, "learning_rate": 1.8043775730981027e-05, "loss": 0.3543, "step": 11215 }, { "epoch": 0.20258704742920286, "grad_norm": 0.45786386728286743, "learning_rate": 1.8042090343351026e-05, "loss": 0.3015, "step": 11220 }, { "epoch": 0.20267732686210357, "grad_norm": 0.4077109396457672, "learning_rate": 1.804040430879333e-05, "loss": 0.3178, "step": 11225 }, { "epoch": 0.2027676062950043, "grad_norm": 1.3298776149749756, "learning_rate": 1.8038717627443566e-05, "loss": 0.3159, "step": 11230 }, { "epoch": 0.202857885727905, "grad_norm": 0.47611549496650696, "learning_rate": 1.8037030299437418e-05, "loss": 0.1996, "step": 11235 }, { "epoch": 0.20294816516080572, "grad_norm": 0.429661363363266, "learning_rate": 1.803534232491062e-05, "loss": 0.2496, "step": 11240 }, { "epoch": 0.20303844459370643, "grad_norm": 0.5555011034011841, "learning_rate": 1.8033653703998947e-05, "loss": 0.3312, "step": 11245 }, { "epoch": 0.20312872402660714, "grad_norm": 0.6299427151679993, "learning_rate": 1.8031964436838253e-05, "loss": 0.2935, "step": 11250 }, { "epoch": 0.20321900345950786, "grad_norm": 0.4307705760002136, "learning_rate": 1.8030274523564412e-05, "loss": 0.1676, "step": 11255 }, { "epoch": 0.20330928289240857, "grad_norm": 0.3759838044643402, "learning_rate": 1.8028583964313372e-05, "loss": 0.3089, "step": 11260 }, { "epoch": 0.20339956232530929, "grad_norm": 0.381085604429245, "learning_rate": 1.8026892759221126e-05, "loss": 0.2365, "step": 11265 }, { "epoch": 0.20348984175821, "grad_norm": 0.41883227229118347, "learning_rate": 1.8025200908423718e-05, "loss": 0.2366, "step": 11270 }, { "epoch": 0.2035801211911107, "grad_norm": 0.5832628607749939, "learning_rate": 1.8023508412057246e-05, "loss": 0.3491, "step": 11275 }, { "epoch": 0.20367040062401143, "grad_norm": 0.48159259557724, "learning_rate": 1.8021815270257858e-05, "loss": 0.2536, "step": 11280 }, { "epoch": 0.20376068005691214, "grad_norm": 0.36705389618873596, "learning_rate": 1.802012148316176e-05, "loss": 0.2455, "step": 11285 }, { "epoch": 0.20385095948981286, "grad_norm": 0.5337108373641968, "learning_rate": 1.8018427050905197e-05, "loss": 0.2974, "step": 11290 }, { "epoch": 0.2039412389227136, "grad_norm": 0.7574781775474548, "learning_rate": 1.801673197362448e-05, "loss": 0.3361, "step": 11295 }, { "epoch": 0.2040315183556143, "grad_norm": 0.46422940492630005, "learning_rate": 1.801503625145596e-05, "loss": 0.3119, "step": 11300 }, { "epoch": 0.20412179778851502, "grad_norm": 0.4504513144493103, "learning_rate": 1.801333988453605e-05, "loss": 0.2062, "step": 11305 }, { "epoch": 0.20421207722141574, "grad_norm": 1.045087456703186, "learning_rate": 1.8011642873001206e-05, "loss": 0.2846, "step": 11310 }, { "epoch": 0.20430235665431645, "grad_norm": 0.5900313854217529, "learning_rate": 1.800994521698795e-05, "loss": 0.2714, "step": 11315 }, { "epoch": 0.20439263608721717, "grad_norm": 0.47045981884002686, "learning_rate": 1.8008246916632832e-05, "loss": 0.311, "step": 11320 }, { "epoch": 0.20448291552011788, "grad_norm": 0.5298904776573181, "learning_rate": 1.8006547972072476e-05, "loss": 0.3081, "step": 11325 }, { "epoch": 0.2045731949530186, "grad_norm": 0.6724792718887329, "learning_rate": 1.800484838344355e-05, "loss": 0.3135, "step": 11330 }, { "epoch": 0.2046634743859193, "grad_norm": 0.5559977293014526, "learning_rate": 1.8003148150882777e-05, "loss": 0.3187, "step": 11335 }, { "epoch": 0.20475375381882002, "grad_norm": 0.5023704171180725, "learning_rate": 1.8001447274526917e-05, "loss": 0.2417, "step": 11340 }, { "epoch": 0.20484403325172074, "grad_norm": 0.45989304780960083, "learning_rate": 1.7999745754512802e-05, "loss": 0.2501, "step": 11345 }, { "epoch": 0.20493431268462145, "grad_norm": 0.5148830413818359, "learning_rate": 1.7998043590977307e-05, "loss": 0.3235, "step": 11350 }, { "epoch": 0.20502459211752216, "grad_norm": 0.7312597632408142, "learning_rate": 1.7996340784057354e-05, "loss": 0.1839, "step": 11355 }, { "epoch": 0.20511487155042288, "grad_norm": 0.5773922801017761, "learning_rate": 1.7994637333889925e-05, "loss": 0.3008, "step": 11360 }, { "epoch": 0.2052051509833236, "grad_norm": 0.5040149092674255, "learning_rate": 1.799293324061205e-05, "loss": 0.3197, "step": 11365 }, { "epoch": 0.2052954304162243, "grad_norm": 0.3866426944732666, "learning_rate": 1.799122850436081e-05, "loss": 0.2589, "step": 11370 }, { "epoch": 0.20538570984912502, "grad_norm": 1.08524489402771, "learning_rate": 1.7989523125273338e-05, "loss": 0.3366, "step": 11375 }, { "epoch": 0.20547598928202573, "grad_norm": 0.5162799954414368, "learning_rate": 1.7987817103486824e-05, "loss": 0.2687, "step": 11380 }, { "epoch": 0.20556626871492645, "grad_norm": 0.44467848539352417, "learning_rate": 1.7986110439138493e-05, "loss": 0.261, "step": 11385 }, { "epoch": 0.20565654814782716, "grad_norm": 0.5967909693717957, "learning_rate": 1.7984403132365647e-05, "loss": 0.2886, "step": 11390 }, { "epoch": 0.20574682758072788, "grad_norm": 0.46552544832229614, "learning_rate": 1.798269518330562e-05, "loss": 0.2784, "step": 11395 }, { "epoch": 0.2058371070136286, "grad_norm": 0.5744720697402954, "learning_rate": 1.7980986592095804e-05, "loss": 0.3054, "step": 11400 }, { "epoch": 0.2059273864465293, "grad_norm": 0.39584240317344666, "learning_rate": 1.7979277358873646e-05, "loss": 0.3351, "step": 11405 }, { "epoch": 0.20601766587943002, "grad_norm": 0.4696711301803589, "learning_rate": 1.797756748377664e-05, "loss": 0.2547, "step": 11410 }, { "epoch": 0.20610794531233073, "grad_norm": 0.49084359407424927, "learning_rate": 1.797585696694233e-05, "loss": 0.3508, "step": 11415 }, { "epoch": 0.20619822474523145, "grad_norm": 0.5010688900947571, "learning_rate": 1.7974145808508318e-05, "loss": 0.3102, "step": 11420 }, { "epoch": 0.20628850417813216, "grad_norm": 0.46311667561531067, "learning_rate": 1.7972434008612256e-05, "loss": 0.3046, "step": 11425 }, { "epoch": 0.20637878361103287, "grad_norm": 0.4460143446922302, "learning_rate": 1.797072156739184e-05, "loss": 0.2743, "step": 11430 }, { "epoch": 0.2064690630439336, "grad_norm": 0.370007187128067, "learning_rate": 1.7969008484984827e-05, "loss": 0.3649, "step": 11435 }, { "epoch": 0.2065593424768343, "grad_norm": 0.375128835439682, "learning_rate": 1.796729476152902e-05, "loss": 0.3949, "step": 11440 }, { "epoch": 0.20664962190973502, "grad_norm": 0.4817360043525696, "learning_rate": 1.7965580397162282e-05, "loss": 0.3477, "step": 11445 }, { "epoch": 0.20673990134263573, "grad_norm": 0.593788206577301, "learning_rate": 1.7963865392022515e-05, "loss": 0.3004, "step": 11450 }, { "epoch": 0.20683018077553644, "grad_norm": 0.4985922574996948, "learning_rate": 1.7962149746247678e-05, "loss": 0.2844, "step": 11455 }, { "epoch": 0.20692046020843716, "grad_norm": 0.31397101283073425, "learning_rate": 1.7960433459975786e-05, "loss": 0.2237, "step": 11460 }, { "epoch": 0.20701073964133787, "grad_norm": 0.5667983889579773, "learning_rate": 1.79587165333449e-05, "loss": 0.2377, "step": 11465 }, { "epoch": 0.20710101907423858, "grad_norm": 0.39671066403388977, "learning_rate": 1.7956998966493133e-05, "loss": 0.283, "step": 11470 }, { "epoch": 0.2071912985071393, "grad_norm": 0.44250059127807617, "learning_rate": 1.7955280759558655e-05, "loss": 0.438, "step": 11475 }, { "epoch": 0.20728157794004, "grad_norm": 0.465035617351532, "learning_rate": 1.7953561912679678e-05, "loss": 0.2873, "step": 11480 }, { "epoch": 0.20737185737294073, "grad_norm": 0.34597525000572205, "learning_rate": 1.7951842425994473e-05, "loss": 0.2748, "step": 11485 }, { "epoch": 0.20746213680584144, "grad_norm": 0.4495750069618225, "learning_rate": 1.795012229964136e-05, "loss": 0.3538, "step": 11490 }, { "epoch": 0.20755241623874215, "grad_norm": 0.35256561636924744, "learning_rate": 1.794840153375871e-05, "loss": 0.3767, "step": 11495 }, { "epoch": 0.20764269567164287, "grad_norm": 0.32467296719551086, "learning_rate": 1.794668012848495e-05, "loss": 0.2314, "step": 11500 }, { "epoch": 0.20773297510454358, "grad_norm": 0.4653473198413849, "learning_rate": 1.7944958083958553e-05, "loss": 0.2843, "step": 11505 }, { "epoch": 0.2078232545374443, "grad_norm": 0.7906263470649719, "learning_rate": 1.7943235400318037e-05, "loss": 0.2777, "step": 11510 }, { "epoch": 0.207913533970345, "grad_norm": 0.6436835527420044, "learning_rate": 1.7941512077701992e-05, "loss": 0.2752, "step": 11515 }, { "epoch": 0.20800381340324572, "grad_norm": 0.44068244099617004, "learning_rate": 1.793978811624904e-05, "loss": 0.2844, "step": 11520 }, { "epoch": 0.20809409283614644, "grad_norm": 0.7325283288955688, "learning_rate": 1.7938063516097858e-05, "loss": 0.3731, "step": 11525 }, { "epoch": 0.20818437226904715, "grad_norm": 0.4470127820968628, "learning_rate": 1.7936338277387183e-05, "loss": 0.2738, "step": 11530 }, { "epoch": 0.20827465170194787, "grad_norm": 0.4569440186023712, "learning_rate": 1.79346124002558e-05, "loss": 0.3692, "step": 11535 }, { "epoch": 0.20836493113484858, "grad_norm": 0.34077921509742737, "learning_rate": 1.7932885884842538e-05, "loss": 0.3582, "step": 11540 }, { "epoch": 0.2084552105677493, "grad_norm": 0.3526357114315033, "learning_rate": 1.7931158731286282e-05, "loss": 0.3567, "step": 11545 }, { "epoch": 0.20854549000065, "grad_norm": 0.429093599319458, "learning_rate": 1.7929430939725972e-05, "loss": 0.3146, "step": 11550 }, { "epoch": 0.20863576943355072, "grad_norm": 0.48118454217910767, "learning_rate": 1.7927702510300596e-05, "loss": 0.2938, "step": 11555 }, { "epoch": 0.20872604886645144, "grad_norm": 0.35543933510780334, "learning_rate": 1.7925973443149193e-05, "loss": 0.3047, "step": 11560 }, { "epoch": 0.20881632829935215, "grad_norm": 0.9324852228164673, "learning_rate": 1.7924243738410853e-05, "loss": 0.3363, "step": 11565 }, { "epoch": 0.20890660773225286, "grad_norm": 0.5078498125076294, "learning_rate": 1.792251339622472e-05, "loss": 0.3852, "step": 11570 }, { "epoch": 0.20899688716515358, "grad_norm": 0.46870550513267517, "learning_rate": 1.792078241672999e-05, "loss": 0.3656, "step": 11575 }, { "epoch": 0.2090871665980543, "grad_norm": 0.4630391299724579, "learning_rate": 1.7919050800065898e-05, "loss": 0.2391, "step": 11580 }, { "epoch": 0.209177446030955, "grad_norm": 0.4416666030883789, "learning_rate": 1.791731854637175e-05, "loss": 0.4418, "step": 11585 }, { "epoch": 0.20926772546385572, "grad_norm": 0.43558019399642944, "learning_rate": 1.7915585655786888e-05, "loss": 0.2451, "step": 11590 }, { "epoch": 0.20935800489675643, "grad_norm": 0.5845248699188232, "learning_rate": 1.791385212845071e-05, "loss": 0.3297, "step": 11595 }, { "epoch": 0.20944828432965715, "grad_norm": 0.41678881645202637, "learning_rate": 1.791211796450267e-05, "loss": 0.2795, "step": 11600 }, { "epoch": 0.20953856376255786, "grad_norm": 0.453337699174881, "learning_rate": 1.7910383164082266e-05, "loss": 0.2509, "step": 11605 }, { "epoch": 0.20962884319545858, "grad_norm": 0.46536174416542053, "learning_rate": 1.790864772732905e-05, "loss": 0.2221, "step": 11610 }, { "epoch": 0.2097191226283593, "grad_norm": 0.41316506266593933, "learning_rate": 1.7906911654382625e-05, "loss": 0.2622, "step": 11615 }, { "epoch": 0.20980940206126, "grad_norm": 0.8957769870758057, "learning_rate": 1.790517494538265e-05, "loss": 0.301, "step": 11620 }, { "epoch": 0.20989968149416072, "grad_norm": 0.5003489255905151, "learning_rate": 1.7903437600468822e-05, "loss": 0.2682, "step": 11625 }, { "epoch": 0.20998996092706143, "grad_norm": 0.3303603231906891, "learning_rate": 1.7901699619780905e-05, "loss": 0.2477, "step": 11630 }, { "epoch": 0.21008024035996214, "grad_norm": 0.46241119503974915, "learning_rate": 1.7899961003458705e-05, "loss": 0.2684, "step": 11635 }, { "epoch": 0.21017051979286286, "grad_norm": 0.4073708951473236, "learning_rate": 1.789822175164208e-05, "loss": 0.2705, "step": 11640 }, { "epoch": 0.21026079922576357, "grad_norm": 0.8670423030853271, "learning_rate": 1.7896481864470942e-05, "loss": 0.3147, "step": 11645 }, { "epoch": 0.2103510786586643, "grad_norm": 0.45808741450309753, "learning_rate": 1.7894741342085246e-05, "loss": 0.2539, "step": 11650 }, { "epoch": 0.210441358091565, "grad_norm": 0.4415580928325653, "learning_rate": 1.7893000184625013e-05, "loss": 0.3669, "step": 11655 }, { "epoch": 0.21053163752446571, "grad_norm": 0.2889484763145447, "learning_rate": 1.7891258392230303e-05, "loss": 0.2558, "step": 11660 }, { "epoch": 0.21062191695736643, "grad_norm": 0.3614408075809479, "learning_rate": 1.788951596504123e-05, "loss": 0.2535, "step": 11665 }, { "epoch": 0.21071219639026714, "grad_norm": 0.33568477630615234, "learning_rate": 1.788777290319796e-05, "loss": 0.1984, "step": 11670 }, { "epoch": 0.21080247582316786, "grad_norm": 0.46689480543136597, "learning_rate": 1.788602920684071e-05, "loss": 0.3371, "step": 11675 }, { "epoch": 0.21089275525606857, "grad_norm": 0.7152404189109802, "learning_rate": 1.788428487610975e-05, "loss": 0.303, "step": 11680 }, { "epoch": 0.21098303468896928, "grad_norm": 0.3899504244327545, "learning_rate": 1.788253991114539e-05, "loss": 0.2941, "step": 11685 }, { "epoch": 0.21107331412187, "grad_norm": 0.45941299200057983, "learning_rate": 1.788079431208801e-05, "loss": 0.309, "step": 11690 }, { "epoch": 0.21116359355477074, "grad_norm": 0.560175895690918, "learning_rate": 1.7879048079078024e-05, "loss": 0.3159, "step": 11695 }, { "epoch": 0.21125387298767145, "grad_norm": 0.5322653651237488, "learning_rate": 1.7877301212255908e-05, "loss": 0.3391, "step": 11700 }, { "epoch": 0.21134415242057217, "grad_norm": 0.5604745745658875, "learning_rate": 1.7875553711762182e-05, "loss": 0.3619, "step": 11705 }, { "epoch": 0.21143443185347288, "grad_norm": 0.46025556325912476, "learning_rate": 1.787380557773742e-05, "loss": 0.2314, "step": 11710 }, { "epoch": 0.2115247112863736, "grad_norm": 0.5464009046554565, "learning_rate": 1.787205681032225e-05, "loss": 0.2467, "step": 11715 }, { "epoch": 0.2116149907192743, "grad_norm": 0.8008533120155334, "learning_rate": 1.7870307409657344e-05, "loss": 0.2839, "step": 11720 }, { "epoch": 0.21170527015217502, "grad_norm": 0.38943585753440857, "learning_rate": 1.7868557375883426e-05, "loss": 0.2827, "step": 11725 }, { "epoch": 0.21179554958507574, "grad_norm": 0.5736641883850098, "learning_rate": 1.786680670914128e-05, "loss": 0.2569, "step": 11730 }, { "epoch": 0.21188582901797645, "grad_norm": 0.3588363230228424, "learning_rate": 1.7865055409571733e-05, "loss": 0.3281, "step": 11735 }, { "epoch": 0.21197610845087717, "grad_norm": 0.4896489083766937, "learning_rate": 1.786330347731566e-05, "loss": 0.2802, "step": 11740 }, { "epoch": 0.21206638788377788, "grad_norm": 0.4987027049064636, "learning_rate": 1.7861550912513997e-05, "loss": 0.2674, "step": 11745 }, { "epoch": 0.2121566673166786, "grad_norm": 0.4357168674468994, "learning_rate": 1.785979771530772e-05, "loss": 0.2681, "step": 11750 }, { "epoch": 0.2122469467495793, "grad_norm": 0.679339587688446, "learning_rate": 1.7858043885837863e-05, "loss": 0.2868, "step": 11755 }, { "epoch": 0.21233722618248002, "grad_norm": 0.6235727071762085, "learning_rate": 1.785628942424551e-05, "loss": 0.3005, "step": 11760 }, { "epoch": 0.21242750561538074, "grad_norm": 0.6073330640792847, "learning_rate": 1.7854534330671793e-05, "loss": 0.3244, "step": 11765 }, { "epoch": 0.21251778504828145, "grad_norm": 0.5216356515884399, "learning_rate": 1.7852778605257895e-05, "loss": 0.2622, "step": 11770 }, { "epoch": 0.21260806448118216, "grad_norm": 0.7519828677177429, "learning_rate": 1.7851022248145057e-05, "loss": 0.2536, "step": 11775 }, { "epoch": 0.21269834391408288, "grad_norm": 0.6628133058547974, "learning_rate": 1.784926525947456e-05, "loss": 0.3209, "step": 11780 }, { "epoch": 0.2127886233469836, "grad_norm": 0.45866918563842773, "learning_rate": 1.784750763938774e-05, "loss": 0.1715, "step": 11785 }, { "epoch": 0.2128789027798843, "grad_norm": 0.4917178452014923, "learning_rate": 1.784574938802599e-05, "loss": 0.2321, "step": 11790 }, { "epoch": 0.21296918221278502, "grad_norm": 0.4127363860607147, "learning_rate": 1.7843990505530747e-05, "loss": 0.3582, "step": 11795 }, { "epoch": 0.21305946164568573, "grad_norm": 0.5475683808326721, "learning_rate": 1.7842230992043496e-05, "loss": 0.3792, "step": 11800 }, { "epoch": 0.21314974107858645, "grad_norm": 1.1832107305526733, "learning_rate": 1.7840470847705784e-05, "loss": 0.3258, "step": 11805 }, { "epoch": 0.21324002051148716, "grad_norm": 0.5049902200698853, "learning_rate": 1.7838710072659194e-05, "loss": 0.3705, "step": 11810 }, { "epoch": 0.21333029994438787, "grad_norm": 0.38012224435806274, "learning_rate": 1.7836948667045375e-05, "loss": 0.2989, "step": 11815 }, { "epoch": 0.2134205793772886, "grad_norm": 0.4135642647743225, "learning_rate": 1.7835186631006013e-05, "loss": 0.2915, "step": 11820 }, { "epoch": 0.2135108588101893, "grad_norm": 0.31480443477630615, "learning_rate": 1.7833423964682856e-05, "loss": 0.2658, "step": 11825 }, { "epoch": 0.21360113824309002, "grad_norm": 0.617190957069397, "learning_rate": 1.7831660668217697e-05, "loss": 0.2827, "step": 11830 }, { "epoch": 0.21369141767599073, "grad_norm": 0.5191602110862732, "learning_rate": 1.7829896741752375e-05, "loss": 0.376, "step": 11835 }, { "epoch": 0.21378169710889144, "grad_norm": 0.23170405626296997, "learning_rate": 1.782813218542879e-05, "loss": 0.2318, "step": 11840 }, { "epoch": 0.21387197654179216, "grad_norm": 0.3107124865055084, "learning_rate": 1.782636699938889e-05, "loss": 0.2315, "step": 11845 }, { "epoch": 0.21396225597469287, "grad_norm": 0.362413614988327, "learning_rate": 1.7824601183774665e-05, "loss": 0.29, "step": 11850 }, { "epoch": 0.21405253540759359, "grad_norm": 0.349123477935791, "learning_rate": 1.782283473872816e-05, "loss": 0.2097, "step": 11855 }, { "epoch": 0.2141428148404943, "grad_norm": 0.514616072177887, "learning_rate": 1.782106766439149e-05, "loss": 0.2799, "step": 11860 }, { "epoch": 0.214233094273395, "grad_norm": 0.47504672408103943, "learning_rate": 1.781929996090678e-05, "loss": 0.2358, "step": 11865 }, { "epoch": 0.21432337370629573, "grad_norm": 0.5488296747207642, "learning_rate": 1.7817531628416246e-05, "loss": 0.2715, "step": 11870 }, { "epoch": 0.21441365313919644, "grad_norm": 0.5415710806846619, "learning_rate": 1.781576266706213e-05, "loss": 0.2744, "step": 11875 }, { "epoch": 0.21450393257209716, "grad_norm": 0.4214060604572296, "learning_rate": 1.7813993076986733e-05, "loss": 0.2486, "step": 11880 }, { "epoch": 0.21459421200499787, "grad_norm": 0.27810266613960266, "learning_rate": 1.781222285833241e-05, "loss": 0.213, "step": 11885 }, { "epoch": 0.21468449143789858, "grad_norm": 0.4299297332763672, "learning_rate": 1.7810452011241554e-05, "loss": 0.2751, "step": 11890 }, { "epoch": 0.2147747708707993, "grad_norm": 0.4374350905418396, "learning_rate": 1.780868053585662e-05, "loss": 0.2703, "step": 11895 }, { "epoch": 0.2148650503037, "grad_norm": 0.39960458874702454, "learning_rate": 1.7806908432320113e-05, "loss": 0.3154, "step": 11900 }, { "epoch": 0.21495532973660073, "grad_norm": 0.3405298590660095, "learning_rate": 1.7805135700774588e-05, "loss": 0.4555, "step": 11905 }, { "epoch": 0.21504560916950144, "grad_norm": 0.6461913585662842, "learning_rate": 1.780336234136264e-05, "loss": 0.1844, "step": 11910 }, { "epoch": 0.21513588860240215, "grad_norm": 0.539243221282959, "learning_rate": 1.780158835422693e-05, "loss": 0.3345, "step": 11915 }, { "epoch": 0.21522616803530287, "grad_norm": 0.40096384286880493, "learning_rate": 1.779981373951016e-05, "loss": 0.3022, "step": 11920 }, { "epoch": 0.21531644746820358, "grad_norm": 0.5648438334465027, "learning_rate": 1.7798038497355082e-05, "loss": 0.3982, "step": 11925 }, { "epoch": 0.2154067269011043, "grad_norm": 0.3935416638851166, "learning_rate": 1.7796262627904504e-05, "loss": 0.2931, "step": 11930 }, { "epoch": 0.215497006334005, "grad_norm": 0.4855777621269226, "learning_rate": 1.7794486131301286e-05, "loss": 0.3144, "step": 11935 }, { "epoch": 0.21558728576690572, "grad_norm": 0.535211980342865, "learning_rate": 1.779270900768833e-05, "loss": 0.2997, "step": 11940 }, { "epoch": 0.21567756519980644, "grad_norm": 0.35269421339035034, "learning_rate": 1.779093125720859e-05, "loss": 0.2473, "step": 11945 }, { "epoch": 0.21576784463270715, "grad_norm": 0.33629870414733887, "learning_rate": 1.7789152880005075e-05, "loss": 0.2585, "step": 11950 }, { "epoch": 0.21585812406560786, "grad_norm": 0.27106332778930664, "learning_rate": 1.7787373876220847e-05, "loss": 0.2862, "step": 11955 }, { "epoch": 0.21594840349850858, "grad_norm": 0.6125961542129517, "learning_rate": 1.7785594245999007e-05, "loss": 0.2712, "step": 11960 }, { "epoch": 0.2160386829314093, "grad_norm": 0.3257152736186981, "learning_rate": 1.778381398948272e-05, "loss": 0.2519, "step": 11965 }, { "epoch": 0.21612896236431, "grad_norm": 0.3735145032405853, "learning_rate": 1.778203310681519e-05, "loss": 0.2877, "step": 11970 }, { "epoch": 0.21621924179721072, "grad_norm": 0.3832739293575287, "learning_rate": 1.7780251598139674e-05, "loss": 0.4543, "step": 11975 }, { "epoch": 0.21630952123011143, "grad_norm": 0.4531655013561249, "learning_rate": 1.777846946359949e-05, "loss": 0.3394, "step": 11980 }, { "epoch": 0.21639980066301215, "grad_norm": 0.46312928199768066, "learning_rate": 1.777668670333799e-05, "loss": 0.3099, "step": 11985 }, { "epoch": 0.21649008009591286, "grad_norm": 0.9675629734992981, "learning_rate": 1.7774903317498587e-05, "loss": 0.3223, "step": 11990 }, { "epoch": 0.21658035952881358, "grad_norm": 0.44589224457740784, "learning_rate": 1.777311930622474e-05, "loss": 0.3231, "step": 11995 }, { "epoch": 0.2166706389617143, "grad_norm": 0.34616875648498535, "learning_rate": 1.7771334669659962e-05, "loss": 0.2153, "step": 12000 }, { "epoch": 0.216760918394615, "grad_norm": 0.6218799948692322, "learning_rate": 1.7769549407947812e-05, "loss": 0.3489, "step": 12005 }, { "epoch": 0.21685119782751572, "grad_norm": 0.5074170231819153, "learning_rate": 1.7767763521231904e-05, "loss": 0.3349, "step": 12010 }, { "epoch": 0.21694147726041643, "grad_norm": 0.5704885125160217, "learning_rate": 1.7765977009655897e-05, "loss": 0.2668, "step": 12015 }, { "epoch": 0.21703175669331715, "grad_norm": 0.4201370179653168, "learning_rate": 1.7764189873363503e-05, "loss": 0.2427, "step": 12020 }, { "epoch": 0.21712203612621786, "grad_norm": 0.5044018030166626, "learning_rate": 1.7762402112498484e-05, "loss": 0.2947, "step": 12025 }, { "epoch": 0.21721231555911857, "grad_norm": 0.46398696303367615, "learning_rate": 1.7760613727204656e-05, "loss": 0.3881, "step": 12030 }, { "epoch": 0.2173025949920193, "grad_norm": 0.34880530834198, "learning_rate": 1.775882471762588e-05, "loss": 0.4522, "step": 12035 }, { "epoch": 0.21739287442492, "grad_norm": 0.6617854237556458, "learning_rate": 1.7757035083906063e-05, "loss": 0.2752, "step": 12040 }, { "epoch": 0.21748315385782072, "grad_norm": 0.4192894399166107, "learning_rate": 1.7755244826189173e-05, "loss": 0.3229, "step": 12045 }, { "epoch": 0.21757343329072143, "grad_norm": 0.4937689006328583, "learning_rate": 1.7753453944619227e-05, "loss": 0.2935, "step": 12050 }, { "epoch": 0.21766371272362214, "grad_norm": 0.42962369322776794, "learning_rate": 1.775166243934028e-05, "loss": 0.3625, "step": 12055 }, { "epoch": 0.21775399215652286, "grad_norm": 0.5614218711853027, "learning_rate": 1.774987031049645e-05, "loss": 0.2634, "step": 12060 }, { "epoch": 0.21784427158942357, "grad_norm": 0.3307231664657593, "learning_rate": 1.7748077558231905e-05, "loss": 0.228, "step": 12065 }, { "epoch": 0.21793455102232429, "grad_norm": 0.5150547027587891, "learning_rate": 1.774628418269085e-05, "loss": 0.2382, "step": 12070 }, { "epoch": 0.218024830455225, "grad_norm": 0.9636505246162415, "learning_rate": 1.7744490184017554e-05, "loss": 0.3017, "step": 12075 }, { "epoch": 0.2181151098881257, "grad_norm": 0.40887361764907837, "learning_rate": 1.7742695562356333e-05, "loss": 0.3922, "step": 12080 }, { "epoch": 0.21820538932102643, "grad_norm": 0.44358113408088684, "learning_rate": 1.7740900317851544e-05, "loss": 0.4168, "step": 12085 }, { "epoch": 0.21829566875392717, "grad_norm": 0.28855910897254944, "learning_rate": 1.7739104450647612e-05, "loss": 0.2614, "step": 12090 }, { "epoch": 0.21838594818682788, "grad_norm": 0.4369180202484131, "learning_rate": 1.773730796088899e-05, "loss": 0.3756, "step": 12095 }, { "epoch": 0.2184762276197286, "grad_norm": 0.6822949051856995, "learning_rate": 1.7735510848720204e-05, "loss": 0.3275, "step": 12100 }, { "epoch": 0.2185665070526293, "grad_norm": 0.3817140460014343, "learning_rate": 1.773371311428581e-05, "loss": 0.3013, "step": 12105 }, { "epoch": 0.21865678648553002, "grad_norm": 0.4731243848800659, "learning_rate": 1.7731914757730423e-05, "loss": 0.3143, "step": 12110 }, { "epoch": 0.21874706591843074, "grad_norm": 0.4177074432373047, "learning_rate": 1.7730115779198713e-05, "loss": 0.229, "step": 12115 }, { "epoch": 0.21883734535133145, "grad_norm": 0.2697894871234894, "learning_rate": 1.7728316178835392e-05, "loss": 0.3405, "step": 12120 }, { "epoch": 0.21892762478423217, "grad_norm": 0.6174373626708984, "learning_rate": 1.7726515956785224e-05, "loss": 0.2283, "step": 12125 }, { "epoch": 0.21901790421713288, "grad_norm": 0.27584612369537354, "learning_rate": 1.7724715113193025e-05, "loss": 0.2863, "step": 12130 }, { "epoch": 0.2191081836500336, "grad_norm": 0.5504199266433716, "learning_rate": 1.772291364820366e-05, "loss": 0.3787, "step": 12135 }, { "epoch": 0.2191984630829343, "grad_norm": 0.6736400127410889, "learning_rate": 1.7721111561962043e-05, "loss": 0.3155, "step": 12140 }, { "epoch": 0.21928874251583502, "grad_norm": 0.6010838150978088, "learning_rate": 1.7719308854613137e-05, "loss": 0.3521, "step": 12145 }, { "epoch": 0.21937902194873574, "grad_norm": 0.4695472717285156, "learning_rate": 1.7717505526301958e-05, "loss": 0.2443, "step": 12150 }, { "epoch": 0.21946930138163645, "grad_norm": 0.3113847076892853, "learning_rate": 1.7715701577173573e-05, "loss": 0.2631, "step": 12155 }, { "epoch": 0.21955958081453716, "grad_norm": 0.33892154693603516, "learning_rate": 1.771389700737309e-05, "loss": 0.3811, "step": 12160 }, { "epoch": 0.21964986024743788, "grad_norm": 0.6578267812728882, "learning_rate": 1.7712091817045685e-05, "loss": 0.3701, "step": 12165 }, { "epoch": 0.2197401396803386, "grad_norm": 0.32275834679603577, "learning_rate": 1.7710286006336566e-05, "loss": 0.2341, "step": 12170 }, { "epoch": 0.2198304191132393, "grad_norm": 0.5772724151611328, "learning_rate": 1.770847957539099e-05, "loss": 0.3192, "step": 12175 }, { "epoch": 0.21992069854614002, "grad_norm": 0.5542657971382141, "learning_rate": 1.7706672524354283e-05, "loss": 0.3476, "step": 12180 }, { "epoch": 0.22001097797904073, "grad_norm": 0.4902056157588959, "learning_rate": 1.7704864853371805e-05, "loss": 0.4199, "step": 12185 }, { "epoch": 0.22010125741194145, "grad_norm": 0.4195903539657593, "learning_rate": 1.7703056562588966e-05, "loss": 0.2742, "step": 12190 }, { "epoch": 0.22019153684484216, "grad_norm": 0.5244849324226379, "learning_rate": 1.770124765215124e-05, "loss": 0.3205, "step": 12195 }, { "epoch": 0.22028181627774288, "grad_norm": 0.35584715008735657, "learning_rate": 1.7699438122204126e-05, "loss": 0.2856, "step": 12200 }, { "epoch": 0.2203720957106436, "grad_norm": 0.34900498390197754, "learning_rate": 1.7697627972893195e-05, "loss": 0.2837, "step": 12205 }, { "epoch": 0.2204623751435443, "grad_norm": 0.3512410819530487, "learning_rate": 1.7695817204364067e-05, "loss": 0.2292, "step": 12210 }, { "epoch": 0.22055265457644502, "grad_norm": 0.29271936416625977, "learning_rate": 1.7694005816762393e-05, "loss": 0.2282, "step": 12215 }, { "epoch": 0.22064293400934573, "grad_norm": 0.520492672920227, "learning_rate": 1.7692193810233896e-05, "loss": 0.341, "step": 12220 }, { "epoch": 0.22073321344224645, "grad_norm": 0.4727719724178314, "learning_rate": 1.7690381184924335e-05, "loss": 0.3201, "step": 12225 }, { "epoch": 0.22082349287514716, "grad_norm": 0.7370137572288513, "learning_rate": 1.768856794097952e-05, "loss": 0.37, "step": 12230 }, { "epoch": 0.22091377230804787, "grad_norm": 0.8863621354103088, "learning_rate": 1.7686754078545317e-05, "loss": 0.2645, "step": 12235 }, { "epoch": 0.2210040517409486, "grad_norm": 0.2831362187862396, "learning_rate": 1.7684939597767635e-05, "loss": 0.2702, "step": 12240 }, { "epoch": 0.2210943311738493, "grad_norm": 0.8353209495544434, "learning_rate": 1.7683124498792436e-05, "loss": 0.2362, "step": 12245 }, { "epoch": 0.22118461060675002, "grad_norm": 0.5277425050735474, "learning_rate": 1.7681308781765736e-05, "loss": 0.252, "step": 12250 }, { "epoch": 0.22127489003965073, "grad_norm": 0.4916650950908661, "learning_rate": 1.7679492446833594e-05, "loss": 0.2815, "step": 12255 }, { "epoch": 0.22136516947255144, "grad_norm": 0.44698864221572876, "learning_rate": 1.7677675494142115e-05, "loss": 0.3273, "step": 12260 }, { "epoch": 0.22145544890545216, "grad_norm": 0.42281705141067505, "learning_rate": 1.7675857923837467e-05, "loss": 0.359, "step": 12265 }, { "epoch": 0.22154572833835287, "grad_norm": 0.3011576533317566, "learning_rate": 1.767403973606586e-05, "loss": 0.2526, "step": 12270 }, { "epoch": 0.22163600777125358, "grad_norm": 0.4375670850276947, "learning_rate": 1.7672220930973548e-05, "loss": 0.2751, "step": 12275 }, { "epoch": 0.2217262872041543, "grad_norm": 0.5508677363395691, "learning_rate": 1.767040150870685e-05, "loss": 0.3449, "step": 12280 }, { "epoch": 0.221816566637055, "grad_norm": 0.8546921610832214, "learning_rate": 1.7668581469412114e-05, "loss": 0.2519, "step": 12285 }, { "epoch": 0.22190684606995573, "grad_norm": 0.748357892036438, "learning_rate": 1.7666760813235756e-05, "loss": 0.2205, "step": 12290 }, { "epoch": 0.22199712550285644, "grad_norm": 0.6457489132881165, "learning_rate": 1.7664939540324236e-05, "loss": 0.311, "step": 12295 }, { "epoch": 0.22208740493575715, "grad_norm": 0.5729885697364807, "learning_rate": 1.7663117650824057e-05, "loss": 0.3258, "step": 12300 }, { "epoch": 0.22217768436865787, "grad_norm": 0.5058196783065796, "learning_rate": 1.766129514488178e-05, "loss": 0.3847, "step": 12305 }, { "epoch": 0.22226796380155858, "grad_norm": 0.5538528561592102, "learning_rate": 1.7659472022644012e-05, "loss": 0.2343, "step": 12310 }, { "epoch": 0.2223582432344593, "grad_norm": 0.34853270649909973, "learning_rate": 1.7657648284257406e-05, "loss": 0.3126, "step": 12315 }, { "epoch": 0.22244852266736, "grad_norm": 0.5706051588058472, "learning_rate": 1.7655823929868677e-05, "loss": 0.346, "step": 12320 }, { "epoch": 0.22253880210026072, "grad_norm": 0.6547861099243164, "learning_rate": 1.7653998959624573e-05, "loss": 0.4004, "step": 12325 }, { "epoch": 0.22262908153316144, "grad_norm": 0.5828825831413269, "learning_rate": 1.7652173373671905e-05, "loss": 0.32, "step": 12330 }, { "epoch": 0.22271936096606215, "grad_norm": 0.7183968424797058, "learning_rate": 1.7650347172157523e-05, "loss": 0.2042, "step": 12335 }, { "epoch": 0.22280964039896287, "grad_norm": 0.4506465494632721, "learning_rate": 1.7648520355228335e-05, "loss": 0.234, "step": 12340 }, { "epoch": 0.22289991983186358, "grad_norm": 0.40599194169044495, "learning_rate": 1.7646692923031298e-05, "loss": 0.3196, "step": 12345 }, { "epoch": 0.2229901992647643, "grad_norm": 0.6732317805290222, "learning_rate": 1.764486487571341e-05, "loss": 0.3119, "step": 12350 }, { "epoch": 0.223080478697665, "grad_norm": 0.5106104612350464, "learning_rate": 1.7643036213421724e-05, "loss": 0.3718, "step": 12355 }, { "epoch": 0.22317075813056572, "grad_norm": 0.29887324571609497, "learning_rate": 1.7641206936303348e-05, "loss": 0.214, "step": 12360 }, { "epoch": 0.22326103756346644, "grad_norm": 0.4310643970966339, "learning_rate": 1.763937704450543e-05, "loss": 0.2257, "step": 12365 }, { "epoch": 0.22335131699636715, "grad_norm": 0.5841691493988037, "learning_rate": 1.763754653817517e-05, "loss": 0.3455, "step": 12370 }, { "epoch": 0.22344159642926786, "grad_norm": 0.26802316308021545, "learning_rate": 1.7635715417459828e-05, "loss": 0.2342, "step": 12375 }, { "epoch": 0.22353187586216858, "grad_norm": 0.734042227268219, "learning_rate": 1.763388368250669e-05, "loss": 0.3666, "step": 12380 }, { "epoch": 0.2236221552950693, "grad_norm": 0.5031940937042236, "learning_rate": 1.763205133346312e-05, "loss": 0.3489, "step": 12385 }, { "epoch": 0.22371243472797, "grad_norm": 0.4350252151489258, "learning_rate": 1.7630218370476506e-05, "loss": 0.3381, "step": 12390 }, { "epoch": 0.22380271416087072, "grad_norm": 0.3086232542991638, "learning_rate": 1.7628384793694302e-05, "loss": 0.2875, "step": 12395 }, { "epoch": 0.22389299359377143, "grad_norm": 0.6226441860198975, "learning_rate": 1.7626550603264005e-05, "loss": 0.2564, "step": 12400 }, { "epoch": 0.22398327302667215, "grad_norm": 0.42126598954200745, "learning_rate": 1.7624715799333163e-05, "loss": 0.3476, "step": 12405 }, { "epoch": 0.22407355245957286, "grad_norm": 0.4345131814479828, "learning_rate": 1.762288038204937e-05, "loss": 0.3486, "step": 12410 }, { "epoch": 0.22416383189247358, "grad_norm": 0.41714054346084595, "learning_rate": 1.7621044351560276e-05, "loss": 0.2472, "step": 12415 }, { "epoch": 0.2242541113253743, "grad_norm": 0.3679058253765106, "learning_rate": 1.7619207708013575e-05, "loss": 0.3669, "step": 12420 }, { "epoch": 0.224344390758275, "grad_norm": 0.421440988779068, "learning_rate": 1.761737045155701e-05, "loss": 0.3501, "step": 12425 }, { "epoch": 0.22443467019117572, "grad_norm": 0.5317081809043884, "learning_rate": 1.761553258233838e-05, "loss": 0.3304, "step": 12430 }, { "epoch": 0.22452494962407643, "grad_norm": 0.4892217814922333, "learning_rate": 1.7613694100505515e-05, "loss": 0.3608, "step": 12435 }, { "epoch": 0.22461522905697714, "grad_norm": 0.590078592300415, "learning_rate": 1.7611855006206322e-05, "loss": 0.2907, "step": 12440 }, { "epoch": 0.22470550848987786, "grad_norm": 0.37204211950302124, "learning_rate": 1.7610015299588737e-05, "loss": 0.2403, "step": 12445 }, { "epoch": 0.22479578792277857, "grad_norm": 0.3007202446460724, "learning_rate": 1.7608174980800748e-05, "loss": 0.3343, "step": 12450 }, { "epoch": 0.2248860673556793, "grad_norm": 0.38273823261260986, "learning_rate": 1.7606334049990405e-05, "loss": 0.2856, "step": 12455 }, { "epoch": 0.22497634678858, "grad_norm": 0.8064092993736267, "learning_rate": 1.7604492507305788e-05, "loss": 0.2644, "step": 12460 }, { "epoch": 0.22506662622148071, "grad_norm": 0.44359609484672546, "learning_rate": 1.7602650352895037e-05, "loss": 0.3132, "step": 12465 }, { "epoch": 0.22515690565438143, "grad_norm": 0.3986911475658417, "learning_rate": 1.7600807586906343e-05, "loss": 0.2842, "step": 12470 }, { "epoch": 0.22524718508728214, "grad_norm": 0.4192790985107422, "learning_rate": 1.7598964209487945e-05, "loss": 0.2547, "step": 12475 }, { "epoch": 0.22533746452018286, "grad_norm": 0.4528927505016327, "learning_rate": 1.759712022078812e-05, "loss": 0.2174, "step": 12480 }, { "epoch": 0.2254277439530836, "grad_norm": 0.8167347311973572, "learning_rate": 1.759527562095521e-05, "loss": 0.42, "step": 12485 }, { "epoch": 0.2255180233859843, "grad_norm": 0.34179383516311646, "learning_rate": 1.7593430410137604e-05, "loss": 0.2672, "step": 12490 }, { "epoch": 0.22560830281888503, "grad_norm": 0.5470631122589111, "learning_rate": 1.759158458848373e-05, "loss": 0.3076, "step": 12495 }, { "epoch": 0.22569858225178574, "grad_norm": 0.38790619373321533, "learning_rate": 1.758973815614207e-05, "loss": 0.2712, "step": 12500 }, { "epoch": 0.22578886168468645, "grad_norm": 0.5486795902252197, "learning_rate": 1.7587891113261163e-05, "loss": 0.3046, "step": 12505 }, { "epoch": 0.22587914111758717, "grad_norm": 0.5758505463600159, "learning_rate": 1.758604345998958e-05, "loss": 0.2934, "step": 12510 }, { "epoch": 0.22596942055048788, "grad_norm": 0.5941413044929504, "learning_rate": 1.7584195196475956e-05, "loss": 0.2367, "step": 12515 }, { "epoch": 0.2260596999833886, "grad_norm": 0.4801849126815796, "learning_rate": 1.758234632286897e-05, "loss": 0.3754, "step": 12520 }, { "epoch": 0.2261499794162893, "grad_norm": 0.5735433101654053, "learning_rate": 1.758049683931736e-05, "loss": 0.3029, "step": 12525 }, { "epoch": 0.22624025884919002, "grad_norm": 0.35783183574676514, "learning_rate": 1.7578646745969886e-05, "loss": 0.2535, "step": 12530 }, { "epoch": 0.22633053828209074, "grad_norm": 0.38716718554496765, "learning_rate": 1.757679604297539e-05, "loss": 0.2672, "step": 12535 }, { "epoch": 0.22642081771499145, "grad_norm": 0.3761030435562134, "learning_rate": 1.7574944730482735e-05, "loss": 0.2515, "step": 12540 }, { "epoch": 0.22651109714789217, "grad_norm": 0.3976900279521942, "learning_rate": 1.757309280864085e-05, "loss": 0.4006, "step": 12545 }, { "epoch": 0.22660137658079288, "grad_norm": 0.46611571311950684, "learning_rate": 1.7571240277598716e-05, "loss": 0.2155, "step": 12550 }, { "epoch": 0.2266916560136936, "grad_norm": 0.44464078545570374, "learning_rate": 1.756938713750535e-05, "loss": 0.3367, "step": 12555 }, { "epoch": 0.2267819354465943, "grad_norm": 0.3445928394794464, "learning_rate": 1.756753338850982e-05, "loss": 0.3938, "step": 12560 }, { "epoch": 0.22687221487949502, "grad_norm": 0.5699447393417358, "learning_rate": 1.756567903076125e-05, "loss": 0.3994, "step": 12565 }, { "epoch": 0.22696249431239574, "grad_norm": 0.4590452015399933, "learning_rate": 1.7563824064408814e-05, "loss": 0.2082, "step": 12570 }, { "epoch": 0.22705277374529645, "grad_norm": 0.3316526412963867, "learning_rate": 1.7561968489601725e-05, "loss": 0.3287, "step": 12575 }, { "epoch": 0.22714305317819716, "grad_norm": 0.5490213632583618, "learning_rate": 1.756011230648925e-05, "loss": 0.3007, "step": 12580 }, { "epoch": 0.22723333261109788, "grad_norm": 0.6003979444503784, "learning_rate": 1.7558255515220707e-05, "loss": 0.2635, "step": 12585 }, { "epoch": 0.2273236120439986, "grad_norm": 0.5353501439094543, "learning_rate": 1.7556398115945463e-05, "loss": 0.3996, "step": 12590 }, { "epoch": 0.2274138914768993, "grad_norm": 0.3782620131969452, "learning_rate": 1.755454010881293e-05, "loss": 0.2767, "step": 12595 }, { "epoch": 0.22750417090980002, "grad_norm": 0.3071882128715515, "learning_rate": 1.7552681493972573e-05, "loss": 0.255, "step": 12600 }, { "epoch": 0.22759445034270073, "grad_norm": 0.3926979899406433, "learning_rate": 1.7550822271573903e-05, "loss": 0.1907, "step": 12605 }, { "epoch": 0.22768472977560145, "grad_norm": 0.44747939705848694, "learning_rate": 1.754896244176648e-05, "loss": 0.2919, "step": 12610 }, { "epoch": 0.22777500920850216, "grad_norm": 0.49764126539230347, "learning_rate": 1.7547102004699917e-05, "loss": 0.2617, "step": 12615 }, { "epoch": 0.22786528864140287, "grad_norm": 0.7542524337768555, "learning_rate": 1.7545240960523865e-05, "loss": 0.3394, "step": 12620 }, { "epoch": 0.2279555680743036, "grad_norm": 0.45615649223327637, "learning_rate": 1.7543379309388045e-05, "loss": 0.2543, "step": 12625 }, { "epoch": 0.2280458475072043, "grad_norm": 0.642106831073761, "learning_rate": 1.75415170514422e-05, "loss": 0.2593, "step": 12630 }, { "epoch": 0.22813612694010502, "grad_norm": 0.4519185721874237, "learning_rate": 1.753965418683614e-05, "loss": 0.2457, "step": 12635 }, { "epoch": 0.22822640637300573, "grad_norm": 0.9257187843322754, "learning_rate": 1.7537790715719723e-05, "loss": 0.3096, "step": 12640 }, { "epoch": 0.22831668580590644, "grad_norm": 0.34161514043807983, "learning_rate": 1.7535926638242843e-05, "loss": 0.2755, "step": 12645 }, { "epoch": 0.22840696523880716, "grad_norm": 0.5337136387825012, "learning_rate": 1.753406195455546e-05, "loss": 0.2925, "step": 12650 }, { "epoch": 0.22849724467170787, "grad_norm": 0.730758786201477, "learning_rate": 1.753219666480757e-05, "loss": 0.3478, "step": 12655 }, { "epoch": 0.22858752410460859, "grad_norm": 0.40018904209136963, "learning_rate": 1.7530330769149222e-05, "loss": 0.2867, "step": 12660 }, { "epoch": 0.2286778035375093, "grad_norm": 0.4671535789966583, "learning_rate": 1.7528464267730518e-05, "loss": 0.2723, "step": 12665 }, { "epoch": 0.22876808297041, "grad_norm": 0.5026867985725403, "learning_rate": 1.7526597160701596e-05, "loss": 0.3095, "step": 12670 }, { "epoch": 0.22885836240331073, "grad_norm": 0.53568035364151, "learning_rate": 1.752472944821266e-05, "loss": 0.282, "step": 12675 }, { "epoch": 0.22894864183621144, "grad_norm": 0.3272496163845062, "learning_rate": 1.7522861130413944e-05, "loss": 0.2907, "step": 12680 }, { "epoch": 0.22903892126911216, "grad_norm": 0.393658310174942, "learning_rate": 1.7520992207455752e-05, "loss": 0.3212, "step": 12685 }, { "epoch": 0.22912920070201287, "grad_norm": 0.35009339451789856, "learning_rate": 1.7519122679488417e-05, "loss": 0.252, "step": 12690 }, { "epoch": 0.22921948013491358, "grad_norm": 0.6266551613807678, "learning_rate": 1.7517252546662333e-05, "loss": 0.2781, "step": 12695 }, { "epoch": 0.2293097595678143, "grad_norm": 0.37133973836898804, "learning_rate": 1.7515381809127938e-05, "loss": 0.3435, "step": 12700 }, { "epoch": 0.229400039000715, "grad_norm": 0.46204811334609985, "learning_rate": 1.7513510467035717e-05, "loss": 0.2844, "step": 12705 }, { "epoch": 0.22949031843361573, "grad_norm": 0.3849553167819977, "learning_rate": 1.7511638520536208e-05, "loss": 0.2516, "step": 12710 }, { "epoch": 0.22958059786651644, "grad_norm": 0.4111543595790863, "learning_rate": 1.7509765969779996e-05, "loss": 0.2439, "step": 12715 }, { "epoch": 0.22967087729941715, "grad_norm": 0.3730176091194153, "learning_rate": 1.750789281491771e-05, "loss": 0.3372, "step": 12720 }, { "epoch": 0.22976115673231787, "grad_norm": 0.5229792594909668, "learning_rate": 1.750601905610004e-05, "loss": 0.3364, "step": 12725 }, { "epoch": 0.22985143616521858, "grad_norm": 0.43100789189338684, "learning_rate": 1.7504144693477706e-05, "loss": 0.2306, "step": 12730 }, { "epoch": 0.2299417155981193, "grad_norm": 0.5689313411712646, "learning_rate": 1.7502269727201494e-05, "loss": 0.2632, "step": 12735 }, { "epoch": 0.23003199503102, "grad_norm": 0.5734375715255737, "learning_rate": 1.7500394157422228e-05, "loss": 0.2136, "step": 12740 }, { "epoch": 0.23012227446392072, "grad_norm": 0.5006566643714905, "learning_rate": 1.7498517984290786e-05, "loss": 0.2878, "step": 12745 }, { "epoch": 0.23021255389682144, "grad_norm": 0.4980749487876892, "learning_rate": 1.749664120795809e-05, "loss": 0.3401, "step": 12750 }, { "epoch": 0.23030283332972215, "grad_norm": 0.8709957599639893, "learning_rate": 1.749476382857512e-05, "loss": 0.2772, "step": 12755 }, { "epoch": 0.23039311276262286, "grad_norm": 0.5566146373748779, "learning_rate": 1.7492885846292887e-05, "loss": 0.2313, "step": 12760 }, { "epoch": 0.23048339219552358, "grad_norm": 0.37837234139442444, "learning_rate": 1.7491007261262466e-05, "loss": 0.2771, "step": 12765 }, { "epoch": 0.2305736716284243, "grad_norm": 0.4560678005218506, "learning_rate": 1.7489128073634978e-05, "loss": 0.2826, "step": 12770 }, { "epoch": 0.230663951061325, "grad_norm": 0.4916342794895172, "learning_rate": 1.7487248283561588e-05, "loss": 0.339, "step": 12775 }, { "epoch": 0.23075423049422572, "grad_norm": 0.5683901906013489, "learning_rate": 1.748536789119351e-05, "loss": 0.3836, "step": 12780 }, { "epoch": 0.23084450992712643, "grad_norm": 0.31839752197265625, "learning_rate": 1.748348689668201e-05, "loss": 0.2103, "step": 12785 }, { "epoch": 0.23093478936002715, "grad_norm": 0.3599136769771576, "learning_rate": 1.74816053001784e-05, "loss": 0.3974, "step": 12790 }, { "epoch": 0.23102506879292786, "grad_norm": 0.594417154788971, "learning_rate": 1.747972310183404e-05, "loss": 0.2634, "step": 12795 }, { "epoch": 0.23111534822582858, "grad_norm": 0.7117186784744263, "learning_rate": 1.7477840301800336e-05, "loss": 0.3887, "step": 12800 }, { "epoch": 0.2312056276587293, "grad_norm": 0.3433570861816406, "learning_rate": 1.747595690022875e-05, "loss": 0.3705, "step": 12805 }, { "epoch": 0.23129590709163, "grad_norm": 0.4502484202384949, "learning_rate": 1.747407289727079e-05, "loss": 0.2893, "step": 12810 }, { "epoch": 0.23138618652453072, "grad_norm": 0.5645655989646912, "learning_rate": 1.7472188293078003e-05, "loss": 0.3901, "step": 12815 }, { "epoch": 0.23147646595743143, "grad_norm": 0.5908382534980774, "learning_rate": 1.7470303087801998e-05, "loss": 0.3332, "step": 12820 }, { "epoch": 0.23156674539033215, "grad_norm": 0.32872527837753296, "learning_rate": 1.7468417281594426e-05, "loss": 0.2765, "step": 12825 }, { "epoch": 0.23165702482323286, "grad_norm": 0.38247713446617126, "learning_rate": 1.746653087460698e-05, "loss": 0.2047, "step": 12830 }, { "epoch": 0.23174730425613357, "grad_norm": 0.435955673456192, "learning_rate": 1.746464386699141e-05, "loss": 0.2916, "step": 12835 }, { "epoch": 0.2318375836890343, "grad_norm": 0.6716747283935547, "learning_rate": 1.746275625889952e-05, "loss": 0.343, "step": 12840 }, { "epoch": 0.231927863121935, "grad_norm": 0.4624778926372528, "learning_rate": 1.746086805048315e-05, "loss": 0.1836, "step": 12845 }, { "epoch": 0.23201814255483572, "grad_norm": 0.39304831624031067, "learning_rate": 1.7458979241894188e-05, "loss": 0.3215, "step": 12850 }, { "epoch": 0.23210842198773643, "grad_norm": 0.5324734449386597, "learning_rate": 1.745708983328458e-05, "loss": 0.3876, "step": 12855 }, { "epoch": 0.23219870142063714, "grad_norm": 0.36722588539123535, "learning_rate": 1.7455199824806307e-05, "loss": 0.2532, "step": 12860 }, { "epoch": 0.23228898085353786, "grad_norm": 0.4719274342060089, "learning_rate": 1.745330921661142e-05, "loss": 0.3529, "step": 12865 }, { "epoch": 0.23237926028643857, "grad_norm": 0.6899310350418091, "learning_rate": 1.7451418008851992e-05, "loss": 0.4033, "step": 12870 }, { "epoch": 0.23246953971933929, "grad_norm": 0.36658185720443726, "learning_rate": 1.7449526201680165e-05, "loss": 0.2566, "step": 12875 }, { "epoch": 0.23255981915224003, "grad_norm": 0.6452036499977112, "learning_rate": 1.7447633795248117e-05, "loss": 0.2768, "step": 12880 }, { "epoch": 0.23265009858514074, "grad_norm": 0.7364159226417542, "learning_rate": 1.744574078970808e-05, "loss": 0.3431, "step": 12885 }, { "epoch": 0.23274037801804145, "grad_norm": 0.37923142313957214, "learning_rate": 1.7443847185212335e-05, "loss": 0.1545, "step": 12890 }, { "epoch": 0.23283065745094217, "grad_norm": 0.48003754019737244, "learning_rate": 1.7441952981913202e-05, "loss": 0.304, "step": 12895 }, { "epoch": 0.23292093688384288, "grad_norm": 0.4077073037624359, "learning_rate": 1.744005817996306e-05, "loss": 0.2717, "step": 12900 }, { "epoch": 0.2330112163167436, "grad_norm": 0.3822029232978821, "learning_rate": 1.7438162779514333e-05, "loss": 0.2555, "step": 12905 }, { "epoch": 0.2331014957496443, "grad_norm": 0.40967702865600586, "learning_rate": 1.743626678071949e-05, "loss": 0.3063, "step": 12910 }, { "epoch": 0.23319177518254502, "grad_norm": 0.8108331561088562, "learning_rate": 1.7434370183731057e-05, "loss": 0.2122, "step": 12915 }, { "epoch": 0.23328205461544574, "grad_norm": 0.599997878074646, "learning_rate": 1.743247298870159e-05, "loss": 0.2751, "step": 12920 }, { "epoch": 0.23337233404834645, "grad_norm": 0.32683104276657104, "learning_rate": 1.743057519578371e-05, "loss": 0.2915, "step": 12925 }, { "epoch": 0.23346261348124717, "grad_norm": 0.650092363357544, "learning_rate": 1.7428676805130084e-05, "loss": 0.3193, "step": 12930 }, { "epoch": 0.23355289291414788, "grad_norm": 0.7250920534133911, "learning_rate": 1.742677781689342e-05, "loss": 0.3154, "step": 12935 }, { "epoch": 0.2336431723470486, "grad_norm": 0.5595054626464844, "learning_rate": 1.7424878231226476e-05, "loss": 0.2339, "step": 12940 }, { "epoch": 0.2337334517799493, "grad_norm": 0.6523693203926086, "learning_rate": 1.7422978048282066e-05, "loss": 0.3467, "step": 12945 }, { "epoch": 0.23382373121285002, "grad_norm": 0.7626236081123352, "learning_rate": 1.742107726821304e-05, "loss": 0.3687, "step": 12950 }, { "epoch": 0.23391401064575074, "grad_norm": 0.570476770401001, "learning_rate": 1.7419175891172308e-05, "loss": 0.3433, "step": 12955 }, { "epoch": 0.23400429007865145, "grad_norm": 0.5262225866317749, "learning_rate": 1.7417273917312812e-05, "loss": 0.2786, "step": 12960 }, { "epoch": 0.23409456951155216, "grad_norm": 0.5009921789169312, "learning_rate": 1.7415371346787564e-05, "loss": 0.2433, "step": 12965 }, { "epoch": 0.23418484894445288, "grad_norm": 0.4326619803905487, "learning_rate": 1.74134681797496e-05, "loss": 0.3382, "step": 12970 }, { "epoch": 0.2342751283773536, "grad_norm": 0.7674878239631653, "learning_rate": 1.7411564416352028e-05, "loss": 0.2951, "step": 12975 }, { "epoch": 0.2343654078102543, "grad_norm": 0.45992231369018555, "learning_rate": 1.7409660056747983e-05, "loss": 0.3278, "step": 12980 }, { "epoch": 0.23445568724315502, "grad_norm": 0.5807640552520752, "learning_rate": 1.740775510109066e-05, "loss": 0.2749, "step": 12985 }, { "epoch": 0.23454596667605573, "grad_norm": 0.8449848294258118, "learning_rate": 1.74058495495333e-05, "loss": 0.3264, "step": 12990 }, { "epoch": 0.23463624610895645, "grad_norm": 0.336150199174881, "learning_rate": 1.7403943402229186e-05, "loss": 0.3483, "step": 12995 }, { "epoch": 0.23472652554185716, "grad_norm": 0.4193001985549927, "learning_rate": 1.740203665933166e-05, "loss": 0.3628, "step": 13000 }, { "epoch": 0.23481680497475788, "grad_norm": 0.2537536919116974, "learning_rate": 1.7400129320994105e-05, "loss": 0.2823, "step": 13005 }, { "epoch": 0.2349070844076586, "grad_norm": 0.5223426222801208, "learning_rate": 1.7398221387369945e-05, "loss": 0.272, "step": 13010 }, { "epoch": 0.2349973638405593, "grad_norm": 0.4388841390609741, "learning_rate": 1.7396312858612668e-05, "loss": 0.2198, "step": 13015 }, { "epoch": 0.23508764327346002, "grad_norm": 0.7844226360321045, "learning_rate": 1.73944037348758e-05, "loss": 0.3239, "step": 13020 }, { "epoch": 0.23517792270636073, "grad_norm": 0.3475538492202759, "learning_rate": 1.739249401631291e-05, "loss": 0.27, "step": 13025 }, { "epoch": 0.23526820213926145, "grad_norm": 0.5528688430786133, "learning_rate": 1.7390583703077627e-05, "loss": 0.3892, "step": 13030 }, { "epoch": 0.23535848157216216, "grad_norm": 0.3805859386920929, "learning_rate": 1.7388672795323617e-05, "loss": 0.2687, "step": 13035 }, { "epoch": 0.23544876100506287, "grad_norm": 0.4538038372993469, "learning_rate": 1.7386761293204606e-05, "loss": 0.2865, "step": 13040 }, { "epoch": 0.2355390404379636, "grad_norm": 0.4108891487121582, "learning_rate": 1.738484919687435e-05, "loss": 0.2868, "step": 13045 }, { "epoch": 0.2356293198708643, "grad_norm": 0.35251399874687195, "learning_rate": 1.7382936506486676e-05, "loss": 0.201, "step": 13050 }, { "epoch": 0.23571959930376502, "grad_norm": 0.4963095784187317, "learning_rate": 1.7381023222195436e-05, "loss": 0.2389, "step": 13055 }, { "epoch": 0.23580987873666573, "grad_norm": 1.053669810295105, "learning_rate": 1.737910934415454e-05, "loss": 0.1928, "step": 13060 }, { "epoch": 0.23590015816956644, "grad_norm": 0.34278637170791626, "learning_rate": 1.7377194872517953e-05, "loss": 0.2457, "step": 13065 }, { "epoch": 0.23599043760246716, "grad_norm": 0.35964125394821167, "learning_rate": 1.7375279807439675e-05, "loss": 0.2703, "step": 13070 }, { "epoch": 0.23608071703536787, "grad_norm": 0.773762583732605, "learning_rate": 1.7373364149073757e-05, "loss": 0.239, "step": 13075 }, { "epoch": 0.23617099646826858, "grad_norm": 0.504447877407074, "learning_rate": 1.7371447897574305e-05, "loss": 0.2171, "step": 13080 }, { "epoch": 0.2362612759011693, "grad_norm": 0.4185939431190491, "learning_rate": 1.7369531053095466e-05, "loss": 0.3082, "step": 13085 }, { "epoch": 0.23635155533407, "grad_norm": 0.4185703694820404, "learning_rate": 1.7367613615791437e-05, "loss": 0.1746, "step": 13090 }, { "epoch": 0.23644183476697073, "grad_norm": 0.5308434963226318, "learning_rate": 1.736569558581646e-05, "loss": 0.3001, "step": 13095 }, { "epoch": 0.23653211419987144, "grad_norm": 0.5180681347846985, "learning_rate": 1.736377696332482e-05, "loss": 0.2886, "step": 13100 }, { "epoch": 0.23662239363277215, "grad_norm": 0.4812846779823303, "learning_rate": 1.736185774847087e-05, "loss": 0.2846, "step": 13105 }, { "epoch": 0.23671267306567287, "grad_norm": 0.5751941204071045, "learning_rate": 1.7359937941408986e-05, "loss": 0.4113, "step": 13110 }, { "epoch": 0.23680295249857358, "grad_norm": 0.546187162399292, "learning_rate": 1.735801754229361e-05, "loss": 0.273, "step": 13115 }, { "epoch": 0.2368932319314743, "grad_norm": 0.3179701864719391, "learning_rate": 1.735609655127922e-05, "loss": 0.3454, "step": 13120 }, { "epoch": 0.236983511364375, "grad_norm": 0.39066609740257263, "learning_rate": 1.7354174968520345e-05, "loss": 0.2601, "step": 13125 }, { "epoch": 0.23707379079727572, "grad_norm": 0.494882732629776, "learning_rate": 1.7352252794171564e-05, "loss": 0.3156, "step": 13130 }, { "epoch": 0.23716407023017644, "grad_norm": 0.37384721636772156, "learning_rate": 1.7350330028387504e-05, "loss": 0.2408, "step": 13135 }, { "epoch": 0.23725434966307715, "grad_norm": 0.40844231843948364, "learning_rate": 1.734840667132283e-05, "loss": 0.1769, "step": 13140 }, { "epoch": 0.23734462909597787, "grad_norm": 0.4068164825439453, "learning_rate": 1.7346482723132273e-05, "loss": 0.2484, "step": 13145 }, { "epoch": 0.23743490852887858, "grad_norm": 1.2531694173812866, "learning_rate": 1.734455818397059e-05, "loss": 0.3211, "step": 13150 }, { "epoch": 0.2375251879617793, "grad_norm": 0.2892794907093048, "learning_rate": 1.7342633053992607e-05, "loss": 0.331, "step": 13155 }, { "epoch": 0.23761546739468, "grad_norm": 0.6131587028503418, "learning_rate": 1.7340707333353178e-05, "loss": 0.2376, "step": 13160 }, { "epoch": 0.23770574682758072, "grad_norm": 0.5176225900650024, "learning_rate": 1.7338781022207217e-05, "loss": 0.2529, "step": 13165 }, { "epoch": 0.23779602626048144, "grad_norm": 0.38245654106140137, "learning_rate": 1.733685412070968e-05, "loss": 0.2604, "step": 13170 }, { "epoch": 0.23788630569338215, "grad_norm": 1.9461857080459595, "learning_rate": 1.7334926629015574e-05, "loss": 0.3495, "step": 13175 }, { "epoch": 0.23797658512628286, "grad_norm": 0.39455553889274597, "learning_rate": 1.733299854727995e-05, "loss": 0.287, "step": 13180 }, { "epoch": 0.23806686455918358, "grad_norm": 0.5157811045646667, "learning_rate": 1.733106987565791e-05, "loss": 0.2726, "step": 13185 }, { "epoch": 0.2381571439920843, "grad_norm": 0.5824441909790039, "learning_rate": 1.73291406143046e-05, "loss": 0.3267, "step": 13190 }, { "epoch": 0.238247423424985, "grad_norm": 0.5017169117927551, "learning_rate": 1.7327210763375213e-05, "loss": 0.3701, "step": 13195 }, { "epoch": 0.23833770285788572, "grad_norm": 0.47364509105682373, "learning_rate": 1.7325280323024997e-05, "loss": 0.3399, "step": 13200 }, { "epoch": 0.23842798229078643, "grad_norm": 0.3858482241630554, "learning_rate": 1.7323349293409236e-05, "loss": 0.3277, "step": 13205 }, { "epoch": 0.23851826172368715, "grad_norm": 0.4815135598182678, "learning_rate": 1.732141767468327e-05, "loss": 0.2735, "step": 13210 }, { "epoch": 0.23860854115658786, "grad_norm": 0.5121126174926758, "learning_rate": 1.731948546700249e-05, "loss": 0.3464, "step": 13215 }, { "epoch": 0.23869882058948858, "grad_norm": 0.4351355731487274, "learning_rate": 1.7317552670522317e-05, "loss": 0.1957, "step": 13220 }, { "epoch": 0.2387891000223893, "grad_norm": 0.38218650221824646, "learning_rate": 1.7315619285398234e-05, "loss": 0.2746, "step": 13225 }, { "epoch": 0.23887937945529, "grad_norm": 0.34511899948120117, "learning_rate": 1.7313685311785776e-05, "loss": 0.3125, "step": 13230 }, { "epoch": 0.23896965888819072, "grad_norm": 0.44880589842796326, "learning_rate": 1.7311750749840503e-05, "loss": 0.393, "step": 13235 }, { "epoch": 0.23905993832109143, "grad_norm": 0.4408525824546814, "learning_rate": 1.730981559971805e-05, "loss": 0.3846, "step": 13240 }, { "epoch": 0.23915021775399214, "grad_norm": 0.4854593873023987, "learning_rate": 1.7307879861574074e-05, "loss": 0.2986, "step": 13245 }, { "epoch": 0.23924049718689286, "grad_norm": 0.51319420337677, "learning_rate": 1.73059435355643e-05, "loss": 0.3189, "step": 13250 }, { "epoch": 0.23933077661979357, "grad_norm": 0.6964123249053955, "learning_rate": 1.7304006621844487e-05, "loss": 0.2609, "step": 13255 }, { "epoch": 0.2394210560526943, "grad_norm": 0.6647042036056519, "learning_rate": 1.7302069120570448e-05, "loss": 0.3141, "step": 13260 }, { "epoch": 0.239511335485595, "grad_norm": 0.4176599085330963, "learning_rate": 1.7300131031898036e-05, "loss": 0.2714, "step": 13265 }, { "epoch": 0.23960161491849571, "grad_norm": 0.3075650930404663, "learning_rate": 1.729819235598316e-05, "loss": 0.2895, "step": 13270 }, { "epoch": 0.23969189435139643, "grad_norm": 0.48430925607681274, "learning_rate": 1.7296253092981775e-05, "loss": 0.3174, "step": 13275 }, { "epoch": 0.23978217378429717, "grad_norm": 0.45159584283828735, "learning_rate": 1.7294313243049874e-05, "loss": 0.2679, "step": 13280 }, { "epoch": 0.23987245321719788, "grad_norm": 0.332610547542572, "learning_rate": 1.7292372806343508e-05, "loss": 0.282, "step": 13285 }, { "epoch": 0.2399627326500986, "grad_norm": 0.39457467198371887, "learning_rate": 1.7290431783018772e-05, "loss": 0.2021, "step": 13290 }, { "epoch": 0.2400530120829993, "grad_norm": 0.31859877705574036, "learning_rate": 1.7288490173231805e-05, "loss": 0.336, "step": 13295 }, { "epoch": 0.24014329151590003, "grad_norm": 0.36984288692474365, "learning_rate": 1.7286547977138797e-05, "loss": 0.2781, "step": 13300 }, { "epoch": 0.24023357094880074, "grad_norm": 0.3430190682411194, "learning_rate": 1.7284605194895983e-05, "loss": 0.331, "step": 13305 }, { "epoch": 0.24032385038170145, "grad_norm": 0.41883084177970886, "learning_rate": 1.7282661826659643e-05, "loss": 0.2829, "step": 13310 }, { "epoch": 0.24041412981460217, "grad_norm": 0.4166741669178009, "learning_rate": 1.7280717872586107e-05, "loss": 0.2906, "step": 13315 }, { "epoch": 0.24050440924750288, "grad_norm": 0.6059514880180359, "learning_rate": 1.7278773332831758e-05, "loss": 0.2372, "step": 13320 }, { "epoch": 0.2405946886804036, "grad_norm": 0.4602368175983429, "learning_rate": 1.7276828207553017e-05, "loss": 0.2972, "step": 13325 }, { "epoch": 0.2406849681133043, "grad_norm": 0.4222080409526825, "learning_rate": 1.7274882496906353e-05, "loss": 0.3206, "step": 13330 }, { "epoch": 0.24077524754620502, "grad_norm": 0.4080156981945038, "learning_rate": 1.7272936201048285e-05, "loss": 0.3971, "step": 13335 }, { "epoch": 0.24086552697910574, "grad_norm": 0.6896274089813232, "learning_rate": 1.727098932013538e-05, "loss": 0.3359, "step": 13340 }, { "epoch": 0.24095580641200645, "grad_norm": 0.40516936779022217, "learning_rate": 1.726904185432425e-05, "loss": 0.2301, "step": 13345 }, { "epoch": 0.24104608584490717, "grad_norm": 0.4957811236381531, "learning_rate": 1.726709380377155e-05, "loss": 0.3243, "step": 13350 }, { "epoch": 0.24113636527780788, "grad_norm": 0.6360941529273987, "learning_rate": 1.7265145168633992e-05, "loss": 0.343, "step": 13355 }, { "epoch": 0.2412266447107086, "grad_norm": 0.5391273498535156, "learning_rate": 1.726319594906833e-05, "loss": 0.3611, "step": 13360 }, { "epoch": 0.2413169241436093, "grad_norm": 0.4269796907901764, "learning_rate": 1.726124614523136e-05, "loss": 0.2855, "step": 13365 }, { "epoch": 0.24140720357651002, "grad_norm": 0.4511514902114868, "learning_rate": 1.7259295757279936e-05, "loss": 0.3093, "step": 13370 }, { "epoch": 0.24149748300941073, "grad_norm": 0.5537800788879395, "learning_rate": 1.7257344785370943e-05, "loss": 0.294, "step": 13375 }, { "epoch": 0.24158776244231145, "grad_norm": 0.36378413438796997, "learning_rate": 1.7255393229661334e-05, "loss": 0.2353, "step": 13380 }, { "epoch": 0.24167804187521216, "grad_norm": 0.30874359607696533, "learning_rate": 1.7253441090308087e-05, "loss": 0.33, "step": 13385 }, { "epoch": 0.24176832130811288, "grad_norm": 0.5672562718391418, "learning_rate": 1.7251488367468242e-05, "loss": 0.2755, "step": 13390 }, { "epoch": 0.2418586007410136, "grad_norm": 0.6343515515327454, "learning_rate": 1.7249535061298884e-05, "loss": 0.3293, "step": 13395 }, { "epoch": 0.2419488801739143, "grad_norm": 0.3627150356769562, "learning_rate": 1.7247581171957136e-05, "loss": 0.3181, "step": 13400 }, { "epoch": 0.24203915960681502, "grad_norm": 0.4247315526008606, "learning_rate": 1.724562669960018e-05, "loss": 0.2795, "step": 13405 }, { "epoch": 0.24212943903971573, "grad_norm": 0.37419256567955017, "learning_rate": 1.7243671644385237e-05, "loss": 0.2257, "step": 13410 }, { "epoch": 0.24221971847261645, "grad_norm": 0.391397088766098, "learning_rate": 1.7241716006469575e-05, "loss": 0.2604, "step": 13415 }, { "epoch": 0.24230999790551716, "grad_norm": 0.2934398055076599, "learning_rate": 1.7239759786010513e-05, "loss": 0.2096, "step": 13420 }, { "epoch": 0.24240027733841787, "grad_norm": 0.4060172438621521, "learning_rate": 1.7237802983165418e-05, "loss": 0.3578, "step": 13425 }, { "epoch": 0.2424905567713186, "grad_norm": 0.6043990850448608, "learning_rate": 1.7235845598091694e-05, "loss": 0.3541, "step": 13430 }, { "epoch": 0.2425808362042193, "grad_norm": 0.3376462161540985, "learning_rate": 1.72338876309468e-05, "loss": 0.223, "step": 13435 }, { "epoch": 0.24267111563712002, "grad_norm": 0.4019452929496765, "learning_rate": 1.7231929081888245e-05, "loss": 0.2463, "step": 13440 }, { "epoch": 0.24276139507002073, "grad_norm": 0.4395068883895874, "learning_rate": 1.7229969951073572e-05, "loss": 0.2632, "step": 13445 }, { "epoch": 0.24285167450292144, "grad_norm": 0.5303803086280823, "learning_rate": 1.7228010238660385e-05, "loss": 0.3481, "step": 13450 }, { "epoch": 0.24294195393582216, "grad_norm": 0.43933194875717163, "learning_rate": 1.722604994480633e-05, "loss": 0.3125, "step": 13455 }, { "epoch": 0.24303223336872287, "grad_norm": 0.42659083008766174, "learning_rate": 1.7224089069669093e-05, "loss": 0.2887, "step": 13460 }, { "epoch": 0.24312251280162359, "grad_norm": 0.5992361307144165, "learning_rate": 1.7222127613406417e-05, "loss": 0.3262, "step": 13465 }, { "epoch": 0.2432127922345243, "grad_norm": 0.31876376271247864, "learning_rate": 1.7220165576176082e-05, "loss": 0.3356, "step": 13470 }, { "epoch": 0.243303071667425, "grad_norm": 0.40272951126098633, "learning_rate": 1.721820295813592e-05, "loss": 0.3032, "step": 13475 }, { "epoch": 0.24339335110032573, "grad_norm": 0.46437716484069824, "learning_rate": 1.7216239759443814e-05, "loss": 0.2207, "step": 13480 }, { "epoch": 0.24348363053322644, "grad_norm": 0.48192399740219116, "learning_rate": 1.7214275980257688e-05, "loss": 0.2173, "step": 13485 }, { "epoch": 0.24357390996612716, "grad_norm": 0.4291000962257385, "learning_rate": 1.721231162073551e-05, "loss": 0.3343, "step": 13490 }, { "epoch": 0.24366418939902787, "grad_norm": 0.2683699429035187, "learning_rate": 1.7210346681035302e-05, "loss": 0.2025, "step": 13495 }, { "epoch": 0.24375446883192858, "grad_norm": 0.3970652222633362, "learning_rate": 1.7208381161315125e-05, "loss": 0.2634, "step": 13500 }, { "epoch": 0.2438447482648293, "grad_norm": 0.5615600943565369, "learning_rate": 1.7206415061733096e-05, "loss": 0.2508, "step": 13505 }, { "epoch": 0.24393502769773, "grad_norm": 0.3180173337459564, "learning_rate": 1.7204448382447375e-05, "loss": 0.3716, "step": 13510 }, { "epoch": 0.24402530713063073, "grad_norm": 0.7189514636993408, "learning_rate": 1.7202481123616156e-05, "loss": 0.2565, "step": 13515 }, { "epoch": 0.24411558656353144, "grad_norm": 0.3452807366847992, "learning_rate": 1.72005132853977e-05, "loss": 0.2418, "step": 13520 }, { "epoch": 0.24420586599643215, "grad_norm": 0.4875531494617462, "learning_rate": 1.7198544867950306e-05, "loss": 0.2748, "step": 13525 }, { "epoch": 0.24429614542933287, "grad_norm": 0.3451898694038391, "learning_rate": 1.7196575871432313e-05, "loss": 0.2704, "step": 13530 }, { "epoch": 0.24438642486223358, "grad_norm": 0.3998042047023773, "learning_rate": 1.7194606296002114e-05, "loss": 0.3006, "step": 13535 }, { "epoch": 0.2444767042951343, "grad_norm": 0.5872548818588257, "learning_rate": 1.719263614181815e-05, "loss": 0.3101, "step": 13540 }, { "epoch": 0.244566983728035, "grad_norm": 0.5717666149139404, "learning_rate": 1.7190665409038904e-05, "loss": 0.2535, "step": 13545 }, { "epoch": 0.24465726316093572, "grad_norm": 0.38812255859375, "learning_rate": 1.7188694097822907e-05, "loss": 0.327, "step": 13550 }, { "epoch": 0.24474754259383644, "grad_norm": 0.532560408115387, "learning_rate": 1.7186722208328735e-05, "loss": 0.3805, "step": 13555 }, { "epoch": 0.24483782202673715, "grad_norm": 0.5711003541946411, "learning_rate": 1.7184749740715017e-05, "loss": 0.2442, "step": 13560 }, { "epoch": 0.24492810145963786, "grad_norm": 0.4644785225391388, "learning_rate": 1.7182776695140416e-05, "loss": 0.2651, "step": 13565 }, { "epoch": 0.24501838089253858, "grad_norm": 0.3790689706802368, "learning_rate": 1.718080307176366e-05, "loss": 0.1993, "step": 13570 }, { "epoch": 0.2451086603254393, "grad_norm": 0.36735817790031433, "learning_rate": 1.7178828870743502e-05, "loss": 0.2514, "step": 13575 }, { "epoch": 0.24519893975834, "grad_norm": 0.4536728858947754, "learning_rate": 1.7176854092238756e-05, "loss": 0.232, "step": 13580 }, { "epoch": 0.24528921919124072, "grad_norm": 0.4569501578807831, "learning_rate": 1.7174878736408283e-05, "loss": 0.29, "step": 13585 }, { "epoch": 0.24537949862414143, "grad_norm": 0.4566640853881836, "learning_rate": 1.717290280341098e-05, "loss": 0.3562, "step": 13590 }, { "epoch": 0.24546977805704215, "grad_norm": 0.295539528131485, "learning_rate": 1.7170926293405795e-05, "loss": 0.2617, "step": 13595 }, { "epoch": 0.24556005748994286, "grad_norm": 0.7204494476318359, "learning_rate": 1.716894920655173e-05, "loss": 0.3241, "step": 13600 }, { "epoch": 0.24565033692284358, "grad_norm": 0.5700797438621521, "learning_rate": 1.7166971543007826e-05, "loss": 0.3685, "step": 13605 }, { "epoch": 0.2457406163557443, "grad_norm": 0.3525966703891754, "learning_rate": 1.716499330293317e-05, "loss": 0.2742, "step": 13610 }, { "epoch": 0.245830895788645, "grad_norm": 0.6157240867614746, "learning_rate": 1.71630144864869e-05, "loss": 0.4179, "step": 13615 }, { "epoch": 0.24592117522154572, "grad_norm": 0.37160009145736694, "learning_rate": 1.716103509382819e-05, "loss": 0.2981, "step": 13620 }, { "epoch": 0.24601145465444643, "grad_norm": 0.4578617215156555, "learning_rate": 1.7159055125116273e-05, "loss": 0.3848, "step": 13625 }, { "epoch": 0.24610173408734715, "grad_norm": 0.36614182591438293, "learning_rate": 1.7157074580510423e-05, "loss": 0.3155, "step": 13630 }, { "epoch": 0.24619201352024786, "grad_norm": 0.6067430973052979, "learning_rate": 1.715509346016996e-05, "loss": 0.224, "step": 13635 }, { "epoch": 0.24628229295314857, "grad_norm": 0.31626978516578674, "learning_rate": 1.715311176425425e-05, "loss": 0.2568, "step": 13640 }, { "epoch": 0.2463725723860493, "grad_norm": 0.3033553659915924, "learning_rate": 1.7151129492922708e-05, "loss": 0.2641, "step": 13645 }, { "epoch": 0.24646285181895, "grad_norm": 0.38154780864715576, "learning_rate": 1.7149146646334793e-05, "loss": 0.2752, "step": 13650 }, { "epoch": 0.24655313125185072, "grad_norm": 0.3966079652309418, "learning_rate": 1.7147163224650007e-05, "loss": 0.3328, "step": 13655 }, { "epoch": 0.24664341068475143, "grad_norm": 0.36520111560821533, "learning_rate": 1.7145179228027906e-05, "loss": 0.213, "step": 13660 }, { "epoch": 0.24673369011765214, "grad_norm": 0.4331825375556946, "learning_rate": 1.7143194656628088e-05, "loss": 0.3735, "step": 13665 }, { "epoch": 0.24682396955055286, "grad_norm": 0.453413188457489, "learning_rate": 1.7141209510610193e-05, "loss": 0.3221, "step": 13670 }, { "epoch": 0.2469142489834536, "grad_norm": 0.4321773946285248, "learning_rate": 1.7139223790133915e-05, "loss": 0.3019, "step": 13675 }, { "epoch": 0.2470045284163543, "grad_norm": 1.5241320133209229, "learning_rate": 1.7137237495358993e-05, "loss": 0.2381, "step": 13680 }, { "epoch": 0.24709480784925503, "grad_norm": 0.3903169631958008, "learning_rate": 1.7135250626445207e-05, "loss": 0.299, "step": 13685 }, { "epoch": 0.24718508728215574, "grad_norm": 0.3825934827327728, "learning_rate": 1.713326318355238e-05, "loss": 0.3858, "step": 13690 }, { "epoch": 0.24727536671505645, "grad_norm": 0.2576453685760498, "learning_rate": 1.7131275166840403e-05, "loss": 0.2984, "step": 13695 }, { "epoch": 0.24736564614795717, "grad_norm": 1.0343531370162964, "learning_rate": 1.712928657646918e-05, "loss": 0.3072, "step": 13700 }, { "epoch": 0.24745592558085788, "grad_norm": 0.45805513858795166, "learning_rate": 1.7127297412598693e-05, "loss": 0.2331, "step": 13705 }, { "epoch": 0.2475462050137586, "grad_norm": 0.20073974132537842, "learning_rate": 1.7125307675388947e-05, "loss": 0.2494, "step": 13710 }, { "epoch": 0.2476364844466593, "grad_norm": 0.3512426018714905, "learning_rate": 1.7123317365000004e-05, "loss": 0.3414, "step": 13715 }, { "epoch": 0.24772676387956002, "grad_norm": 0.25139570236206055, "learning_rate": 1.7121326481591972e-05, "loss": 0.2823, "step": 13720 }, { "epoch": 0.24781704331246074, "grad_norm": 0.5818011164665222, "learning_rate": 1.7119335025325e-05, "loss": 0.3291, "step": 13725 }, { "epoch": 0.24790732274536145, "grad_norm": 0.4541693329811096, "learning_rate": 1.711734299635929e-05, "loss": 0.2056, "step": 13730 }, { "epoch": 0.24799760217826217, "grad_norm": 0.3896014094352722, "learning_rate": 1.7115350394855083e-05, "loss": 0.2645, "step": 13735 }, { "epoch": 0.24808788161116288, "grad_norm": 0.42318210005760193, "learning_rate": 1.7113357220972668e-05, "loss": 0.2799, "step": 13740 }, { "epoch": 0.2481781610440636, "grad_norm": 0.48783060908317566, "learning_rate": 1.7111363474872385e-05, "loss": 0.2642, "step": 13745 }, { "epoch": 0.2482684404769643, "grad_norm": 0.4491179287433624, "learning_rate": 1.710936915671462e-05, "loss": 0.4675, "step": 13750 }, { "epoch": 0.24835871990986502, "grad_norm": 0.40137410163879395, "learning_rate": 1.710737426665979e-05, "loss": 0.2452, "step": 13755 }, { "epoch": 0.24844899934276574, "grad_norm": 0.5091173052787781, "learning_rate": 1.710537880486838e-05, "loss": 0.2596, "step": 13760 }, { "epoch": 0.24853927877566645, "grad_norm": 0.43215659260749817, "learning_rate": 1.71033827715009e-05, "loss": 0.2368, "step": 13765 }, { "epoch": 0.24862955820856716, "grad_norm": 0.5628123879432678, "learning_rate": 1.7101386166717928e-05, "loss": 0.2523, "step": 13770 }, { "epoch": 0.24871983764146788, "grad_norm": 0.376011461019516, "learning_rate": 1.709938899068007e-05, "loss": 0.3567, "step": 13775 }, { "epoch": 0.2488101170743686, "grad_norm": 0.4350323975086212, "learning_rate": 1.7097391243547985e-05, "loss": 0.2366, "step": 13780 }, { "epoch": 0.2489003965072693, "grad_norm": 0.49537816643714905, "learning_rate": 1.7095392925482378e-05, "loss": 0.252, "step": 13785 }, { "epoch": 0.24899067594017002, "grad_norm": 0.42540234327316284, "learning_rate": 1.7093394036644e-05, "loss": 0.2173, "step": 13790 }, { "epoch": 0.24908095537307073, "grad_norm": 0.4067361652851105, "learning_rate": 1.709139457719364e-05, "loss": 0.271, "step": 13795 }, { "epoch": 0.24917123480597145, "grad_norm": 0.5016900897026062, "learning_rate": 1.708939454729215e-05, "loss": 0.362, "step": 13800 }, { "epoch": 0.24926151423887216, "grad_norm": 0.5326197743415833, "learning_rate": 1.7087393947100413e-05, "loss": 0.3433, "step": 13805 }, { "epoch": 0.24935179367177288, "grad_norm": 0.4544302225112915, "learning_rate": 1.7085392776779363e-05, "loss": 0.3027, "step": 13810 }, { "epoch": 0.2494420731046736, "grad_norm": 0.5088821053504944, "learning_rate": 1.708339103648998e-05, "loss": 0.3039, "step": 13815 }, { "epoch": 0.2495323525375743, "grad_norm": 0.2824195921421051, "learning_rate": 1.708138872639329e-05, "loss": 0.1874, "step": 13820 }, { "epoch": 0.24962263197047502, "grad_norm": 0.39786961674690247, "learning_rate": 1.7079385846650366e-05, "loss": 0.2766, "step": 13825 }, { "epoch": 0.24971291140337573, "grad_norm": 0.5772325396537781, "learning_rate": 1.7077382397422317e-05, "loss": 0.2935, "step": 13830 }, { "epoch": 0.24980319083627645, "grad_norm": 0.39464205503463745, "learning_rate": 1.7075378378870315e-05, "loss": 0.2754, "step": 13835 }, { "epoch": 0.24989347026917716, "grad_norm": 0.4776792824268341, "learning_rate": 1.7073373791155564e-05, "loss": 0.3107, "step": 13840 }, { "epoch": 0.24998374970207787, "grad_norm": 0.7751340866088867, "learning_rate": 1.7071368634439322e-05, "loss": 0.3653, "step": 13845 }, { "epoch": 0.25007402913497856, "grad_norm": 0.3707751929759979, "learning_rate": 1.7069362908882885e-05, "loss": 0.2124, "step": 13850 }, { "epoch": 0.2501643085678793, "grad_norm": 0.48276251554489136, "learning_rate": 1.7067356614647603e-05, "loss": 0.2934, "step": 13855 }, { "epoch": 0.25025458800078, "grad_norm": 0.7727112174034119, "learning_rate": 1.7065349751894865e-05, "loss": 0.3284, "step": 13860 }, { "epoch": 0.25034486743368073, "grad_norm": 0.8128131628036499, "learning_rate": 1.706334232078611e-05, "loss": 0.3005, "step": 13865 }, { "epoch": 0.25043514686658147, "grad_norm": 0.20768576860427856, "learning_rate": 1.7061334321482822e-05, "loss": 0.2003, "step": 13870 }, { "epoch": 0.25052542629948216, "grad_norm": 0.3125922977924347, "learning_rate": 1.7059325754146528e-05, "loss": 0.2798, "step": 13875 }, { "epoch": 0.2506157057323829, "grad_norm": 0.5686619281768799, "learning_rate": 1.7057316618938803e-05, "loss": 0.3689, "step": 13880 }, { "epoch": 0.2507059851652836, "grad_norm": 0.5619451999664307, "learning_rate": 1.7055306916021266e-05, "loss": 0.2803, "step": 13885 }, { "epoch": 0.2507962645981843, "grad_norm": 0.5215336680412292, "learning_rate": 1.7053296645555587e-05, "loss": 0.3849, "step": 13890 }, { "epoch": 0.250886544031085, "grad_norm": 0.38564562797546387, "learning_rate": 1.7051285807703472e-05, "loss": 0.4052, "step": 13895 }, { "epoch": 0.25097682346398575, "grad_norm": 0.5514028668403625, "learning_rate": 1.7049274402626685e-05, "loss": 0.2089, "step": 13900 }, { "epoch": 0.25106710289688644, "grad_norm": 0.538511335849762, "learning_rate": 1.7047262430487024e-05, "loss": 0.2185, "step": 13905 }, { "epoch": 0.2511573823297872, "grad_norm": 0.736876368522644, "learning_rate": 1.7045249891446342e-05, "loss": 0.4343, "step": 13910 }, { "epoch": 0.25124766176268787, "grad_norm": 0.481503427028656, "learning_rate": 1.7043236785666523e-05, "loss": 0.254, "step": 13915 }, { "epoch": 0.2513379411955886, "grad_norm": 0.5032512545585632, "learning_rate": 1.704122311330952e-05, "loss": 0.2532, "step": 13920 }, { "epoch": 0.2514282206284893, "grad_norm": 0.41925984621047974, "learning_rate": 1.7039208874537313e-05, "loss": 0.3603, "step": 13925 }, { "epoch": 0.25151850006139004, "grad_norm": 0.5571647882461548, "learning_rate": 1.7037194069511925e-05, "loss": 0.2823, "step": 13930 }, { "epoch": 0.2516087794942907, "grad_norm": 0.5781228542327881, "learning_rate": 1.7035178698395445e-05, "loss": 0.379, "step": 13935 }, { "epoch": 0.25169905892719147, "grad_norm": 0.48722633719444275, "learning_rate": 1.7033162761349992e-05, "loss": 0.2829, "step": 13940 }, { "epoch": 0.25178933836009215, "grad_norm": 0.3715059161186218, "learning_rate": 1.7031146258537725e-05, "loss": 0.3258, "step": 13945 }, { "epoch": 0.2518796177929929, "grad_norm": 0.5888335704803467, "learning_rate": 1.7029129190120867e-05, "loss": 0.2693, "step": 13950 }, { "epoch": 0.2519698972258936, "grad_norm": 0.33220377564430237, "learning_rate": 1.702711155626167e-05, "loss": 0.227, "step": 13955 }, { "epoch": 0.2520601766587943, "grad_norm": 0.6103527545928955, "learning_rate": 1.702509335712244e-05, "loss": 0.2299, "step": 13960 }, { "epoch": 0.252150456091695, "grad_norm": 0.38191163539886475, "learning_rate": 1.7023074592865528e-05, "loss": 0.2695, "step": 13965 }, { "epoch": 0.25224073552459575, "grad_norm": 0.5467475652694702, "learning_rate": 1.702105526365333e-05, "loss": 0.3741, "step": 13970 }, { "epoch": 0.25233101495749644, "grad_norm": 0.6334359049797058, "learning_rate": 1.7019035369648278e-05, "loss": 0.2445, "step": 13975 }, { "epoch": 0.2524212943903972, "grad_norm": 0.4496508836746216, "learning_rate": 1.7017014911012868e-05, "loss": 0.242, "step": 13980 }, { "epoch": 0.25251157382329786, "grad_norm": 0.5502260327339172, "learning_rate": 1.7014993887909625e-05, "loss": 0.292, "step": 13985 }, { "epoch": 0.2526018532561986, "grad_norm": 0.42615535855293274, "learning_rate": 1.7012972300501128e-05, "loss": 0.2112, "step": 13990 }, { "epoch": 0.2526921326890993, "grad_norm": 0.32613807916641235, "learning_rate": 1.7010950148950002e-05, "loss": 0.2911, "step": 13995 }, { "epoch": 0.25278241212200003, "grad_norm": 0.3294132351875305, "learning_rate": 1.700892743341891e-05, "loss": 0.3252, "step": 14000 }, { "epoch": 0.2528726915549007, "grad_norm": 0.5091342329978943, "learning_rate": 1.7006904154070562e-05, "loss": 0.1856, "step": 14005 }, { "epoch": 0.25296297098780146, "grad_norm": 0.4071289300918579, "learning_rate": 1.7004880311067723e-05, "loss": 0.2937, "step": 14010 }, { "epoch": 0.25305325042070215, "grad_norm": 0.3675556480884552, "learning_rate": 1.7002855904573193e-05, "loss": 0.3034, "step": 14015 }, { "epoch": 0.2531435298536029, "grad_norm": 0.6735295057296753, "learning_rate": 1.700083093474982e-05, "loss": 0.2503, "step": 14020 }, { "epoch": 0.2532338092865036, "grad_norm": 0.7142743468284607, "learning_rate": 1.69988054017605e-05, "loss": 0.242, "step": 14025 }, { "epoch": 0.2533240887194043, "grad_norm": 0.32563072443008423, "learning_rate": 1.699677930576817e-05, "loss": 0.168, "step": 14030 }, { "epoch": 0.253414368152305, "grad_norm": 0.38879600167274475, "learning_rate": 1.699475264693582e-05, "loss": 0.3649, "step": 14035 }, { "epoch": 0.25350464758520574, "grad_norm": 0.5603982210159302, "learning_rate": 1.6992725425426474e-05, "loss": 0.3281, "step": 14040 }, { "epoch": 0.25359492701810643, "grad_norm": 0.7267544269561768, "learning_rate": 1.6990697641403212e-05, "loss": 0.3527, "step": 14045 }, { "epoch": 0.2536852064510072, "grad_norm": 0.4394630491733551, "learning_rate": 1.698866929502915e-05, "loss": 0.4412, "step": 14050 }, { "epoch": 0.25377548588390786, "grad_norm": 0.2933726906776428, "learning_rate": 1.6986640386467456e-05, "loss": 0.3036, "step": 14055 }, { "epoch": 0.2538657653168086, "grad_norm": 0.8265781402587891, "learning_rate": 1.6984610915881338e-05, "loss": 0.2785, "step": 14060 }, { "epoch": 0.2539560447497093, "grad_norm": 0.33802053332328796, "learning_rate": 1.698258088343406e-05, "loss": 0.3516, "step": 14065 }, { "epoch": 0.25404632418261003, "grad_norm": 0.29324543476104736, "learning_rate": 1.6980550289288915e-05, "loss": 0.2806, "step": 14070 }, { "epoch": 0.2541366036155107, "grad_norm": 0.41427239775657654, "learning_rate": 1.697851913360925e-05, "loss": 0.2985, "step": 14075 }, { "epoch": 0.25422688304841146, "grad_norm": 0.4136227071285248, "learning_rate": 1.6976487416558464e-05, "loss": 0.2698, "step": 14080 }, { "epoch": 0.25431716248131214, "grad_norm": 0.2818601727485657, "learning_rate": 1.6974455138299987e-05, "loss": 0.2504, "step": 14085 }, { "epoch": 0.2544074419142129, "grad_norm": 0.3512054979801178, "learning_rate": 1.6972422298997305e-05, "loss": 0.1858, "step": 14090 }, { "epoch": 0.25449772134711357, "grad_norm": 0.3316943943500519, "learning_rate": 1.6970388898813945e-05, "loss": 0.3162, "step": 14095 }, { "epoch": 0.2545880007800143, "grad_norm": 0.3768051266670227, "learning_rate": 1.6968354937913476e-05, "loss": 0.2573, "step": 14100 }, { "epoch": 0.254678280212915, "grad_norm": 0.40512460470199585, "learning_rate": 1.6966320416459515e-05, "loss": 0.2814, "step": 14105 }, { "epoch": 0.25476855964581574, "grad_norm": 0.3147158920764923, "learning_rate": 1.696428533461573e-05, "loss": 0.3551, "step": 14110 }, { "epoch": 0.2548588390787164, "grad_norm": 0.3642359972000122, "learning_rate": 1.696224969254582e-05, "loss": 0.2817, "step": 14115 }, { "epoch": 0.25494911851161717, "grad_norm": 0.5084564089775085, "learning_rate": 1.6960213490413546e-05, "loss": 0.2969, "step": 14120 }, { "epoch": 0.25503939794451785, "grad_norm": 0.4527064859867096, "learning_rate": 1.6958176728382704e-05, "loss": 0.3291, "step": 14125 }, { "epoch": 0.2551296773774186, "grad_norm": 0.5112528204917908, "learning_rate": 1.695613940661713e-05, "loss": 0.308, "step": 14130 }, { "epoch": 0.2552199568103193, "grad_norm": 0.4065954387187958, "learning_rate": 1.6954101525280723e-05, "loss": 0.3138, "step": 14135 }, { "epoch": 0.25531023624322, "grad_norm": 0.33163678646087646, "learning_rate": 1.6952063084537407e-05, "loss": 0.2584, "step": 14140 }, { "epoch": 0.2554005156761207, "grad_norm": 0.5004115104675293, "learning_rate": 1.6950024084551162e-05, "loss": 0.33, "step": 14145 }, { "epoch": 0.25549079510902145, "grad_norm": 0.5348025560379028, "learning_rate": 1.694798452548601e-05, "loss": 0.2925, "step": 14150 }, { "epoch": 0.25558107454192214, "grad_norm": 0.4292595088481903, "learning_rate": 1.694594440750602e-05, "loss": 0.3471, "step": 14155 }, { "epoch": 0.2556713539748229, "grad_norm": 0.39084112644195557, "learning_rate": 1.6943903730775307e-05, "loss": 0.2318, "step": 14160 }, { "epoch": 0.25576163340772357, "grad_norm": 0.5658642053604126, "learning_rate": 1.694186249545802e-05, "loss": 0.4035, "step": 14165 }, { "epoch": 0.2558519128406243, "grad_norm": 0.3004071116447449, "learning_rate": 1.693982070171837e-05, "loss": 0.3158, "step": 14170 }, { "epoch": 0.255942192273525, "grad_norm": 0.40406376123428345, "learning_rate": 1.6937778349720604e-05, "loss": 0.2249, "step": 14175 }, { "epoch": 0.25603247170642573, "grad_norm": 0.47171875834465027, "learning_rate": 1.6935735439629012e-05, "loss": 0.3313, "step": 14180 }, { "epoch": 0.2561227511393264, "grad_norm": 0.6781940460205078, "learning_rate": 1.693369197160793e-05, "loss": 0.2692, "step": 14185 }, { "epoch": 0.25621303057222716, "grad_norm": 0.9075122475624084, "learning_rate": 1.6931647945821743e-05, "loss": 0.3161, "step": 14190 }, { "epoch": 0.25630331000512785, "grad_norm": 0.34464186429977417, "learning_rate": 1.6929603362434878e-05, "loss": 0.2726, "step": 14195 }, { "epoch": 0.2563935894380286, "grad_norm": 0.7514514923095703, "learning_rate": 1.69275582216118e-05, "loss": 0.2552, "step": 14200 }, { "epoch": 0.2564838688709293, "grad_norm": 0.40465232729911804, "learning_rate": 1.692551252351704e-05, "loss": 0.3369, "step": 14205 }, { "epoch": 0.25657414830383, "grad_norm": 0.43503934144973755, "learning_rate": 1.6923466268315144e-05, "loss": 0.2949, "step": 14210 }, { "epoch": 0.2566644277367307, "grad_norm": 0.5278799533843994, "learning_rate": 1.6921419456170728e-05, "loss": 0.2964, "step": 14215 }, { "epoch": 0.25675470716963145, "grad_norm": 0.3669053316116333, "learning_rate": 1.691937208724844e-05, "loss": 0.2675, "step": 14220 }, { "epoch": 0.25684498660253213, "grad_norm": 0.4335341453552246, "learning_rate": 1.6917324161712973e-05, "loss": 0.3035, "step": 14225 }, { "epoch": 0.2569352660354329, "grad_norm": 0.45222485065460205, "learning_rate": 1.6915275679729075e-05, "loss": 0.2902, "step": 14230 }, { "epoch": 0.25702554546833356, "grad_norm": 0.46283671259880066, "learning_rate": 1.6913226641461528e-05, "loss": 0.3303, "step": 14235 }, { "epoch": 0.2571158249012343, "grad_norm": 0.6368562579154968, "learning_rate": 1.691117704707516e-05, "loss": 0.2445, "step": 14240 }, { "epoch": 0.257206104334135, "grad_norm": 0.44071435928344727, "learning_rate": 1.6909126896734847e-05, "loss": 0.2725, "step": 14245 }, { "epoch": 0.25729638376703573, "grad_norm": 0.40742605924606323, "learning_rate": 1.6907076190605507e-05, "loss": 0.3206, "step": 14250 }, { "epoch": 0.2573866631999364, "grad_norm": 0.5893548130989075, "learning_rate": 1.6905024928852112e-05, "loss": 0.2501, "step": 14255 }, { "epoch": 0.25747694263283716, "grad_norm": 0.3587595224380493, "learning_rate": 1.690297311163966e-05, "loss": 0.3273, "step": 14260 }, { "epoch": 0.25756722206573784, "grad_norm": 0.3252449035644531, "learning_rate": 1.690092073913322e-05, "loss": 0.2712, "step": 14265 }, { "epoch": 0.2576575014986386, "grad_norm": 0.5918602347373962, "learning_rate": 1.6898867811497874e-05, "loss": 0.1865, "step": 14270 }, { "epoch": 0.2577477809315393, "grad_norm": 1.2052277326583862, "learning_rate": 1.6896814328898772e-05, "loss": 0.2629, "step": 14275 }, { "epoch": 0.25783806036444, "grad_norm": 0.3375871181488037, "learning_rate": 1.6894760291501103e-05, "loss": 0.2149, "step": 14280 }, { "epoch": 0.25792833979734076, "grad_norm": 0.5106332302093506, "learning_rate": 1.6892705699470096e-05, "loss": 0.2918, "step": 14285 }, { "epoch": 0.25801861923024144, "grad_norm": 0.4305230975151062, "learning_rate": 1.6890650552971034e-05, "loss": 0.255, "step": 14290 }, { "epoch": 0.2581088986631422, "grad_norm": 0.5305595993995667, "learning_rate": 1.6888594852169236e-05, "loss": 0.4565, "step": 14295 }, { "epoch": 0.25819917809604287, "grad_norm": 0.4331403076648712, "learning_rate": 1.6886538597230062e-05, "loss": 0.286, "step": 14300 }, { "epoch": 0.2582894575289436, "grad_norm": 0.3630242347717285, "learning_rate": 1.6884481788318933e-05, "loss": 0.2657, "step": 14305 }, { "epoch": 0.2583797369618443, "grad_norm": 0.3561517298221588, "learning_rate": 1.6882424425601295e-05, "loss": 0.2423, "step": 14310 }, { "epoch": 0.25847001639474504, "grad_norm": 0.7556095719337463, "learning_rate": 1.6880366509242658e-05, "loss": 0.3607, "step": 14315 }, { "epoch": 0.2585602958276457, "grad_norm": 0.4049709737300873, "learning_rate": 1.6878308039408553e-05, "loss": 0.2307, "step": 14320 }, { "epoch": 0.25865057526054647, "grad_norm": 0.3996032774448395, "learning_rate": 1.687624901626458e-05, "loss": 0.3906, "step": 14325 }, { "epoch": 0.25874085469344715, "grad_norm": 0.47113409638404846, "learning_rate": 1.6874189439976372e-05, "loss": 0.3392, "step": 14330 }, { "epoch": 0.2588311341263479, "grad_norm": 0.6860496997833252, "learning_rate": 1.6872129310709598e-05, "loss": 0.3603, "step": 14335 }, { "epoch": 0.2589214135592486, "grad_norm": 0.3963136076927185, "learning_rate": 1.6870068628629986e-05, "loss": 0.4027, "step": 14340 }, { "epoch": 0.2590116929921493, "grad_norm": 0.5941218733787537, "learning_rate": 1.6868007393903308e-05, "loss": 0.3303, "step": 14345 }, { "epoch": 0.25910197242505, "grad_norm": 0.46553128957748413, "learning_rate": 1.6865945606695365e-05, "loss": 0.3103, "step": 14350 }, { "epoch": 0.25919225185795075, "grad_norm": 0.44235265254974365, "learning_rate": 1.6863883267172023e-05, "loss": 0.2916, "step": 14355 }, { "epoch": 0.25928253129085144, "grad_norm": 0.3624558448791504, "learning_rate": 1.6861820375499175e-05, "loss": 0.1915, "step": 14360 }, { "epoch": 0.2593728107237522, "grad_norm": 0.3028622269630432, "learning_rate": 1.685975693184277e-05, "loss": 0.2714, "step": 14365 }, { "epoch": 0.25946309015665286, "grad_norm": 0.5631296038627625, "learning_rate": 1.6857692936368795e-05, "loss": 0.2846, "step": 14370 }, { "epoch": 0.2595533695895536, "grad_norm": 0.4793113172054291, "learning_rate": 1.685562838924328e-05, "loss": 0.3616, "step": 14375 }, { "epoch": 0.2596436490224543, "grad_norm": 0.5509387850761414, "learning_rate": 1.6853563290632308e-05, "loss": 0.317, "step": 14380 }, { "epoch": 0.25973392845535503, "grad_norm": 0.29282665252685547, "learning_rate": 1.6851497640702e-05, "loss": 0.3121, "step": 14385 }, { "epoch": 0.2598242078882557, "grad_norm": 0.5319273471832275, "learning_rate": 1.6849431439618522e-05, "loss": 0.3166, "step": 14390 }, { "epoch": 0.25991448732115646, "grad_norm": 0.7315474152565002, "learning_rate": 1.6847364687548084e-05, "loss": 0.3886, "step": 14395 }, { "epoch": 0.26000476675405715, "grad_norm": 0.391114741563797, "learning_rate": 1.6845297384656944e-05, "loss": 0.2796, "step": 14400 }, { "epoch": 0.2600950461869579, "grad_norm": 0.403923898935318, "learning_rate": 1.6843229531111397e-05, "loss": 0.267, "step": 14405 }, { "epoch": 0.2601853256198586, "grad_norm": 0.5094287991523743, "learning_rate": 1.6841161127077792e-05, "loss": 0.261, "step": 14410 }, { "epoch": 0.2602756050527593, "grad_norm": 0.8942059874534607, "learning_rate": 1.6839092172722514e-05, "loss": 0.2961, "step": 14415 }, { "epoch": 0.26036588448566, "grad_norm": 0.40858590602874756, "learning_rate": 1.6837022668211995e-05, "loss": 0.3393, "step": 14420 }, { "epoch": 0.26045616391856075, "grad_norm": 0.4373575747013092, "learning_rate": 1.683495261371271e-05, "loss": 0.3228, "step": 14425 }, { "epoch": 0.26054644335146143, "grad_norm": 0.33809569478034973, "learning_rate": 1.6832882009391187e-05, "loss": 0.2772, "step": 14430 }, { "epoch": 0.2606367227843622, "grad_norm": 0.3225255310535431, "learning_rate": 1.683081085541398e-05, "loss": 0.2417, "step": 14435 }, { "epoch": 0.26072700221726286, "grad_norm": 0.523601770401001, "learning_rate": 1.682873915194771e-05, "loss": 0.2947, "step": 14440 }, { "epoch": 0.2608172816501636, "grad_norm": 0.44271430373191833, "learning_rate": 1.6826666899159022e-05, "loss": 0.2128, "step": 14445 }, { "epoch": 0.2609075610830643, "grad_norm": 0.4109349846839905, "learning_rate": 1.682459409721462e-05, "loss": 0.3225, "step": 14450 }, { "epoch": 0.26099784051596503, "grad_norm": 0.8876522183418274, "learning_rate": 1.682252074628124e-05, "loss": 0.2871, "step": 14455 }, { "epoch": 0.2610881199488657, "grad_norm": 1.0981746912002563, "learning_rate": 1.6820446846525668e-05, "loss": 0.3108, "step": 14460 }, { "epoch": 0.26117839938176646, "grad_norm": 0.3024000823497772, "learning_rate": 1.6818372398114736e-05, "loss": 0.3046, "step": 14465 }, { "epoch": 0.26126867881466714, "grad_norm": 0.9253686666488647, "learning_rate": 1.6816297401215328e-05, "loss": 0.2024, "step": 14470 }, { "epoch": 0.2613589582475679, "grad_norm": 0.42614516615867615, "learning_rate": 1.6814221855994342e-05, "loss": 0.3555, "step": 14475 }, { "epoch": 0.26144923768046857, "grad_norm": 0.38271841406822205, "learning_rate": 1.681214576261876e-05, "loss": 0.2707, "step": 14480 }, { "epoch": 0.2615395171133693, "grad_norm": 0.3673293888568878, "learning_rate": 1.6810069121255577e-05, "loss": 0.2125, "step": 14485 }, { "epoch": 0.26162979654627, "grad_norm": 0.398209810256958, "learning_rate": 1.680799193207185e-05, "loss": 0.2979, "step": 14490 }, { "epoch": 0.26172007597917074, "grad_norm": 0.5076440572738647, "learning_rate": 1.6805914195234666e-05, "loss": 0.2757, "step": 14495 }, { "epoch": 0.2618103554120714, "grad_norm": 0.9557774066925049, "learning_rate": 1.680383591091117e-05, "loss": 0.2924, "step": 14500 }, { "epoch": 0.26190063484497217, "grad_norm": 0.6554763913154602, "learning_rate": 1.6801757079268546e-05, "loss": 0.2519, "step": 14505 }, { "epoch": 0.26199091427787286, "grad_norm": 0.6285964250564575, "learning_rate": 1.679967770047402e-05, "loss": 0.337, "step": 14510 }, { "epoch": 0.2620811937107736, "grad_norm": 0.40318724513053894, "learning_rate": 1.679759777469486e-05, "loss": 0.2813, "step": 14515 }, { "epoch": 0.2621714731436743, "grad_norm": 0.503894031047821, "learning_rate": 1.6795517302098382e-05, "loss": 0.3493, "step": 14520 }, { "epoch": 0.262261752576575, "grad_norm": 0.37055426836013794, "learning_rate": 1.6793436282851944e-05, "loss": 0.2142, "step": 14525 }, { "epoch": 0.2623520320094757, "grad_norm": 0.47502291202545166, "learning_rate": 1.679135471712295e-05, "loss": 0.2984, "step": 14530 }, { "epoch": 0.26244231144237645, "grad_norm": 0.4711866080760956, "learning_rate": 1.678927260507885e-05, "loss": 0.2904, "step": 14535 }, { "epoch": 0.26253259087527714, "grad_norm": 0.33698809146881104, "learning_rate": 1.6787189946887128e-05, "loss": 0.3327, "step": 14540 }, { "epoch": 0.2626228703081779, "grad_norm": 0.759178638458252, "learning_rate": 1.6785106742715323e-05, "loss": 0.3373, "step": 14545 }, { "epoch": 0.26271314974107857, "grad_norm": 0.3459857702255249, "learning_rate": 1.6783022992731015e-05, "loss": 0.3514, "step": 14550 }, { "epoch": 0.2628034291739793, "grad_norm": 0.4096636176109314, "learning_rate": 1.678093869710182e-05, "loss": 0.2506, "step": 14555 }, { "epoch": 0.26289370860688, "grad_norm": 0.49789562821388245, "learning_rate": 1.6778853855995417e-05, "loss": 0.3218, "step": 14560 }, { "epoch": 0.26298398803978074, "grad_norm": 0.6544811725616455, "learning_rate": 1.6776768469579502e-05, "loss": 0.3461, "step": 14565 }, { "epoch": 0.2630742674726814, "grad_norm": 0.61392742395401, "learning_rate": 1.6774682538021834e-05, "loss": 0.3235, "step": 14570 }, { "epoch": 0.26316454690558216, "grad_norm": 0.24093428254127502, "learning_rate": 1.677259606149022e-05, "loss": 0.2255, "step": 14575 }, { "epoch": 0.26325482633848285, "grad_norm": 0.36252260208129883, "learning_rate": 1.677050904015249e-05, "loss": 0.2101, "step": 14580 }, { "epoch": 0.2633451057713836, "grad_norm": 0.6923762559890747, "learning_rate": 1.6768421474176533e-05, "loss": 0.2716, "step": 14585 }, { "epoch": 0.2634353852042843, "grad_norm": 0.4598599970340729, "learning_rate": 1.6766333363730282e-05, "loss": 0.2299, "step": 14590 }, { "epoch": 0.263525664637185, "grad_norm": 0.24030975997447968, "learning_rate": 1.6764244708981706e-05, "loss": 0.2504, "step": 14595 }, { "epoch": 0.2636159440700857, "grad_norm": 0.6802058815956116, "learning_rate": 1.6762155510098827e-05, "loss": 0.2059, "step": 14600 }, { "epoch": 0.26370622350298645, "grad_norm": 0.4267932176589966, "learning_rate": 1.6760065767249698e-05, "loss": 0.2668, "step": 14605 }, { "epoch": 0.26379650293588713, "grad_norm": 0.45004796981811523, "learning_rate": 1.6757975480602435e-05, "loss": 0.273, "step": 14610 }, { "epoch": 0.2638867823687879, "grad_norm": 0.44872379302978516, "learning_rate": 1.6755884650325174e-05, "loss": 0.3135, "step": 14615 }, { "epoch": 0.26397706180168856, "grad_norm": 0.4489907920360565, "learning_rate": 1.6753793276586113e-05, "loss": 0.2998, "step": 14620 }, { "epoch": 0.2640673412345893, "grad_norm": 0.7032406330108643, "learning_rate": 1.675170135955349e-05, "loss": 0.2633, "step": 14625 }, { "epoch": 0.26415762066749, "grad_norm": 0.3216649889945984, "learning_rate": 1.674960889939558e-05, "loss": 0.2874, "step": 14630 }, { "epoch": 0.26424790010039073, "grad_norm": 0.45787546038627625, "learning_rate": 1.674751589628071e-05, "loss": 0.2568, "step": 14635 }, { "epoch": 0.2643381795332914, "grad_norm": 0.3306974470615387, "learning_rate": 1.6745422350377246e-05, "loss": 0.192, "step": 14640 }, { "epoch": 0.26442845896619216, "grad_norm": 0.559463620185852, "learning_rate": 1.67433282618536e-05, "loss": 0.3781, "step": 14645 }, { "epoch": 0.26451873839909285, "grad_norm": 0.35545146465301514, "learning_rate": 1.6741233630878218e-05, "loss": 0.2881, "step": 14650 }, { "epoch": 0.2646090178319936, "grad_norm": 0.47090232372283936, "learning_rate": 1.6739138457619607e-05, "loss": 0.2371, "step": 14655 }, { "epoch": 0.2646992972648943, "grad_norm": 0.5291762351989746, "learning_rate": 1.673704274224631e-05, "loss": 0.3067, "step": 14660 }, { "epoch": 0.264789576697795, "grad_norm": 0.522714376449585, "learning_rate": 1.67349464849269e-05, "loss": 0.3224, "step": 14665 }, { "epoch": 0.26487985613069576, "grad_norm": 0.36193498969078064, "learning_rate": 1.673284968583002e-05, "loss": 0.2753, "step": 14670 }, { "epoch": 0.26497013556359644, "grad_norm": 0.4829557240009308, "learning_rate": 1.6730752345124328e-05, "loss": 0.2871, "step": 14675 }, { "epoch": 0.2650604149964972, "grad_norm": 0.4847275912761688, "learning_rate": 1.672865446297855e-05, "loss": 0.3003, "step": 14680 }, { "epoch": 0.26515069442939787, "grad_norm": 0.5189873576164246, "learning_rate": 1.6726556039561446e-05, "loss": 0.2667, "step": 14685 }, { "epoch": 0.2652409738622986, "grad_norm": 0.32874205708503723, "learning_rate": 1.6724457075041815e-05, "loss": 0.2473, "step": 14690 }, { "epoch": 0.2653312532951993, "grad_norm": 0.3475160300731659, "learning_rate": 1.6722357569588498e-05, "loss": 0.3136, "step": 14695 }, { "epoch": 0.26542153272810004, "grad_norm": 0.40261968970298767, "learning_rate": 1.6720257523370395e-05, "loss": 0.3522, "step": 14700 }, { "epoch": 0.2655118121610007, "grad_norm": 0.5326187610626221, "learning_rate": 1.6718156936556434e-05, "loss": 0.2753, "step": 14705 }, { "epoch": 0.26560209159390147, "grad_norm": 0.3311368227005005, "learning_rate": 1.6716055809315594e-05, "loss": 0.3193, "step": 14710 }, { "epoch": 0.26569237102680215, "grad_norm": 0.2994149327278137, "learning_rate": 1.6713954141816894e-05, "loss": 0.308, "step": 14715 }, { "epoch": 0.2657826504597029, "grad_norm": 0.581963300704956, "learning_rate": 1.67118519342294e-05, "loss": 0.2923, "step": 14720 }, { "epoch": 0.2658729298926036, "grad_norm": 0.7782924175262451, "learning_rate": 1.6709749186722214e-05, "loss": 0.2698, "step": 14725 }, { "epoch": 0.2659632093255043, "grad_norm": 0.3711308240890503, "learning_rate": 1.6707645899464492e-05, "loss": 0.2136, "step": 14730 }, { "epoch": 0.266053488758405, "grad_norm": 0.2079039067029953, "learning_rate": 1.6705542072625428e-05, "loss": 0.2666, "step": 14735 }, { "epoch": 0.26614376819130575, "grad_norm": 0.6214173436164856, "learning_rate": 1.670343770637426e-05, "loss": 0.2074, "step": 14740 }, { "epoch": 0.26623404762420644, "grad_norm": 0.4272468388080597, "learning_rate": 1.6701332800880266e-05, "loss": 0.2835, "step": 14745 }, { "epoch": 0.2663243270571072, "grad_norm": 0.23270417749881744, "learning_rate": 1.669922735631277e-05, "loss": 0.2348, "step": 14750 }, { "epoch": 0.26641460649000787, "grad_norm": 0.35107874870300293, "learning_rate": 1.669712137284114e-05, "loss": 0.3177, "step": 14755 }, { "epoch": 0.2665048859229086, "grad_norm": 0.6691100001335144, "learning_rate": 1.6695014850634792e-05, "loss": 0.3361, "step": 14760 }, { "epoch": 0.2665951653558093, "grad_norm": 0.39851701259613037, "learning_rate": 1.6692907789863173e-05, "loss": 0.3069, "step": 14765 }, { "epoch": 0.26668544478871004, "grad_norm": 0.5575686097145081, "learning_rate": 1.6690800190695786e-05, "loss": 0.2871, "step": 14770 }, { "epoch": 0.2667757242216107, "grad_norm": 0.34226763248443604, "learning_rate": 1.668869205330217e-05, "loss": 0.3051, "step": 14775 }, { "epoch": 0.26686600365451146, "grad_norm": 0.4824385941028595, "learning_rate": 1.668658337785191e-05, "loss": 0.2723, "step": 14780 }, { "epoch": 0.26695628308741215, "grad_norm": 0.49718043208122253, "learning_rate": 1.6684474164514637e-05, "loss": 0.2672, "step": 14785 }, { "epoch": 0.2670465625203129, "grad_norm": 0.37770745158195496, "learning_rate": 1.6682364413460014e-05, "loss": 0.2864, "step": 14790 }, { "epoch": 0.2671368419532136, "grad_norm": 0.7862424254417419, "learning_rate": 1.6680254124857763e-05, "loss": 0.3358, "step": 14795 }, { "epoch": 0.2672271213861143, "grad_norm": 0.5095865726470947, "learning_rate": 1.6678143298877635e-05, "loss": 0.3336, "step": 14800 }, { "epoch": 0.267317400819015, "grad_norm": 0.4720807373523712, "learning_rate": 1.6676031935689435e-05, "loss": 0.24, "step": 14805 }, { "epoch": 0.26740768025191575, "grad_norm": 0.4367506206035614, "learning_rate": 1.6673920035463004e-05, "loss": 0.2783, "step": 14810 }, { "epoch": 0.26749795968481643, "grad_norm": 0.36520060896873474, "learning_rate": 1.667180759836823e-05, "loss": 0.2842, "step": 14815 }, { "epoch": 0.2675882391177172, "grad_norm": 0.3673902451992035, "learning_rate": 1.6669694624575042e-05, "loss": 0.2936, "step": 14820 }, { "epoch": 0.26767851855061786, "grad_norm": 0.4801824390888214, "learning_rate": 1.6667581114253416e-05, "loss": 0.349, "step": 14825 }, { "epoch": 0.2677687979835186, "grad_norm": 0.5937851071357727, "learning_rate": 1.666546706757337e-05, "loss": 0.2555, "step": 14830 }, { "epoch": 0.2678590774164193, "grad_norm": 0.22917994856834412, "learning_rate": 1.666335248470496e-05, "loss": 0.2769, "step": 14835 }, { "epoch": 0.26794935684932003, "grad_norm": 0.46926772594451904, "learning_rate": 1.6661237365818287e-05, "loss": 0.2867, "step": 14840 }, { "epoch": 0.2680396362822207, "grad_norm": 0.475140243768692, "learning_rate": 1.6659121711083504e-05, "loss": 0.3331, "step": 14845 }, { "epoch": 0.26812991571512146, "grad_norm": 0.38355955481529236, "learning_rate": 1.6657005520670794e-05, "loss": 0.2744, "step": 14850 }, { "epoch": 0.26822019514802214, "grad_norm": 0.4811035990715027, "learning_rate": 1.665488879475039e-05, "loss": 0.2919, "step": 14855 }, { "epoch": 0.2683104745809229, "grad_norm": 0.6225593090057373, "learning_rate": 1.665277153349257e-05, "loss": 0.2979, "step": 14860 }, { "epoch": 0.2684007540138236, "grad_norm": 0.5459991097450256, "learning_rate": 1.6650653737067648e-05, "loss": 0.3335, "step": 14865 }, { "epoch": 0.2684910334467243, "grad_norm": 0.7860650420188904, "learning_rate": 1.664853540564599e-05, "loss": 0.2629, "step": 14870 }, { "epoch": 0.268581312879625, "grad_norm": 0.5207154154777527, "learning_rate": 1.6646416539397998e-05, "loss": 0.1395, "step": 14875 }, { "epoch": 0.26867159231252574, "grad_norm": 0.295149028301239, "learning_rate": 1.664429713849412e-05, "loss": 0.3182, "step": 14880 }, { "epoch": 0.26876187174542643, "grad_norm": 0.6800479292869568, "learning_rate": 1.664217720310484e-05, "loss": 0.2211, "step": 14885 }, { "epoch": 0.26885215117832717, "grad_norm": 0.4096933603286743, "learning_rate": 1.6640056733400706e-05, "loss": 0.3122, "step": 14890 }, { "epoch": 0.26894243061122786, "grad_norm": 0.38134050369262695, "learning_rate": 1.663793572955228e-05, "loss": 0.359, "step": 14895 }, { "epoch": 0.2690327100441286, "grad_norm": 0.7740076184272766, "learning_rate": 1.663581419173019e-05, "loss": 0.2086, "step": 14900 }, { "epoch": 0.2691229894770293, "grad_norm": 0.27558088302612305, "learning_rate": 1.6633692120105092e-05, "loss": 0.2562, "step": 14905 }, { "epoch": 0.26921326890993, "grad_norm": 0.8536859154701233, "learning_rate": 1.6631569514847696e-05, "loss": 0.2939, "step": 14910 }, { "epoch": 0.2693035483428307, "grad_norm": 0.4222334325313568, "learning_rate": 1.662944637612875e-05, "loss": 0.4318, "step": 14915 }, { "epoch": 0.26939382777573145, "grad_norm": 0.34165582060813904, "learning_rate": 1.662732270411904e-05, "loss": 0.2882, "step": 14920 }, { "epoch": 0.26948410720863214, "grad_norm": 0.43178972601890564, "learning_rate": 1.662519849898941e-05, "loss": 0.1692, "step": 14925 }, { "epoch": 0.2695743866415329, "grad_norm": 0.5931707620620728, "learning_rate": 1.6623073760910726e-05, "loss": 0.3197, "step": 14930 }, { "epoch": 0.26966466607443357, "grad_norm": 0.4385814070701599, "learning_rate": 1.6620948490053914e-05, "loss": 0.2658, "step": 14935 }, { "epoch": 0.2697549455073343, "grad_norm": 0.2507951259613037, "learning_rate": 1.6618822686589936e-05, "loss": 0.2285, "step": 14940 }, { "epoch": 0.269845224940235, "grad_norm": 0.38211968541145325, "learning_rate": 1.6616696350689792e-05, "loss": 0.3515, "step": 14945 }, { "epoch": 0.26993550437313574, "grad_norm": 0.4873267710208893, "learning_rate": 1.661456948252454e-05, "loss": 0.3296, "step": 14950 }, { "epoch": 0.2700257838060364, "grad_norm": 0.7359420657157898, "learning_rate": 1.6612442082265265e-05, "loss": 0.3376, "step": 14955 }, { "epoch": 0.27011606323893717, "grad_norm": 0.5286779403686523, "learning_rate": 1.6610314150083096e-05, "loss": 0.2761, "step": 14960 }, { "epoch": 0.27020634267183785, "grad_norm": 0.3935490846633911, "learning_rate": 1.660818568614922e-05, "loss": 0.3092, "step": 14965 }, { "epoch": 0.2702966221047386, "grad_norm": 0.6011040210723877, "learning_rate": 1.6606056690634852e-05, "loss": 0.3448, "step": 14970 }, { "epoch": 0.2703869015376393, "grad_norm": 0.6015837788581848, "learning_rate": 1.6603927163711254e-05, "loss": 0.2432, "step": 14975 }, { "epoch": 0.27047718097054, "grad_norm": 0.5535976886749268, "learning_rate": 1.6601797105549727e-05, "loss": 0.2987, "step": 14980 }, { "epoch": 0.2705674604034407, "grad_norm": 0.5601411461830139, "learning_rate": 1.6599666516321625e-05, "loss": 0.3679, "step": 14985 }, { "epoch": 0.27065773983634145, "grad_norm": 0.5071304440498352, "learning_rate": 1.6597535396198337e-05, "loss": 0.2826, "step": 14990 }, { "epoch": 0.27074801926924214, "grad_norm": 0.3708009123802185, "learning_rate": 1.6595403745351296e-05, "loss": 0.2435, "step": 14995 }, { "epoch": 0.2708382987021429, "grad_norm": 0.47595512866973877, "learning_rate": 1.6593271563951975e-05, "loss": 0.3054, "step": 15000 }, { "epoch": 0.27092857813504356, "grad_norm": 0.318886399269104, "learning_rate": 1.659113885217189e-05, "loss": 0.343, "step": 15005 }, { "epoch": 0.2710188575679443, "grad_norm": 2.0091025829315186, "learning_rate": 1.6589005610182613e-05, "loss": 0.3664, "step": 15010 }, { "epoch": 0.271109137000845, "grad_norm": 0.5164380073547363, "learning_rate": 1.658687183815574e-05, "loss": 0.3674, "step": 15015 }, { "epoch": 0.27119941643374573, "grad_norm": 0.41829022765159607, "learning_rate": 1.6584737536262917e-05, "loss": 0.3181, "step": 15020 }, { "epoch": 0.2712896958666464, "grad_norm": 0.6270329356193542, "learning_rate": 1.6582602704675838e-05, "loss": 0.3107, "step": 15025 }, { "epoch": 0.27137997529954716, "grad_norm": 0.5446771383285522, "learning_rate": 1.6580467343566228e-05, "loss": 0.2632, "step": 15030 }, { "epoch": 0.27147025473244785, "grad_norm": 0.4855729043483734, "learning_rate": 1.6578331453105867e-05, "loss": 0.258, "step": 15035 }, { "epoch": 0.2715605341653486, "grad_norm": 0.4179638922214508, "learning_rate": 1.657619503346657e-05, "loss": 0.2696, "step": 15040 }, { "epoch": 0.2716508135982493, "grad_norm": 0.6478289365768433, "learning_rate": 1.6574058084820195e-05, "loss": 0.2576, "step": 15045 }, { "epoch": 0.27174109303115, "grad_norm": 0.5988754034042358, "learning_rate": 1.6571920607338647e-05, "loss": 0.3109, "step": 15050 }, { "epoch": 0.2718313724640507, "grad_norm": 0.3570278286933899, "learning_rate": 1.6569782601193865e-05, "loss": 0.2561, "step": 15055 }, { "epoch": 0.27192165189695144, "grad_norm": 0.36204129457473755, "learning_rate": 1.656764406655784e-05, "loss": 0.3706, "step": 15060 }, { "epoch": 0.2720119313298522, "grad_norm": 0.3621353209018707, "learning_rate": 1.6565505003602607e-05, "loss": 0.3523, "step": 15065 }, { "epoch": 0.27210221076275287, "grad_norm": 0.4771013557910919, "learning_rate": 1.6563365412500226e-05, "loss": 0.2619, "step": 15070 }, { "epoch": 0.2721924901956536, "grad_norm": 0.6217138171195984, "learning_rate": 1.656122529342282e-05, "loss": 0.257, "step": 15075 }, { "epoch": 0.2722827696285543, "grad_norm": 0.48531246185302734, "learning_rate": 1.6559084646542544e-05, "loss": 0.2432, "step": 15080 }, { "epoch": 0.27237304906145504, "grad_norm": 0.48256370425224304, "learning_rate": 1.65569434720316e-05, "loss": 0.3274, "step": 15085 }, { "epoch": 0.2724633284943557, "grad_norm": 0.3124198317527771, "learning_rate": 1.6554801770062222e-05, "loss": 0.328, "step": 15090 }, { "epoch": 0.27255360792725647, "grad_norm": 0.46326014399528503, "learning_rate": 1.65526595408067e-05, "loss": 0.3229, "step": 15095 }, { "epoch": 0.27264388736015716, "grad_norm": 0.39263132214546204, "learning_rate": 1.6550516784437366e-05, "loss": 0.3548, "step": 15100 }, { "epoch": 0.2727341667930579, "grad_norm": 0.4550118148326874, "learning_rate": 1.6548373501126577e-05, "loss": 0.2756, "step": 15105 }, { "epoch": 0.2728244462259586, "grad_norm": 0.4505438804626465, "learning_rate": 1.6546229691046755e-05, "loss": 0.3097, "step": 15110 }, { "epoch": 0.2729147256588593, "grad_norm": 0.3754425346851349, "learning_rate": 1.654408535437035e-05, "loss": 0.2004, "step": 15115 }, { "epoch": 0.27300500509176, "grad_norm": 0.6223003268241882, "learning_rate": 1.6541940491269857e-05, "loss": 0.2288, "step": 15120 }, { "epoch": 0.27309528452466075, "grad_norm": 0.44022637605667114, "learning_rate": 1.6539795101917816e-05, "loss": 0.2658, "step": 15125 }, { "epoch": 0.27318556395756144, "grad_norm": 0.4375525414943695, "learning_rate": 1.653764918648681e-05, "loss": 0.1978, "step": 15130 }, { "epoch": 0.2732758433904622, "grad_norm": 0.9861794710159302, "learning_rate": 1.6535502745149456e-05, "loss": 0.3321, "step": 15135 }, { "epoch": 0.27336612282336287, "grad_norm": 1.1904473304748535, "learning_rate": 1.6533355778078428e-05, "loss": 0.2882, "step": 15140 }, { "epoch": 0.2734564022562636, "grad_norm": 0.3916078507900238, "learning_rate": 1.6531208285446428e-05, "loss": 0.2744, "step": 15145 }, { "epoch": 0.2735466816891643, "grad_norm": 0.5370838642120361, "learning_rate": 1.652906026742621e-05, "loss": 0.2788, "step": 15150 }, { "epoch": 0.27363696112206504, "grad_norm": 0.5554995536804199, "learning_rate": 1.6526911724190564e-05, "loss": 0.3579, "step": 15155 }, { "epoch": 0.2737272405549657, "grad_norm": 0.43957996368408203, "learning_rate": 1.6524762655912323e-05, "loss": 0.3007, "step": 15160 }, { "epoch": 0.27381751998786646, "grad_norm": 0.5079452991485596, "learning_rate": 1.6522613062764364e-05, "loss": 0.2514, "step": 15165 }, { "epoch": 0.27390779942076715, "grad_norm": 0.39836010336875916, "learning_rate": 1.652046294491961e-05, "loss": 0.3092, "step": 15170 }, { "epoch": 0.2739980788536679, "grad_norm": 0.5519921183586121, "learning_rate": 1.6518312302551024e-05, "loss": 0.2886, "step": 15175 }, { "epoch": 0.2740883582865686, "grad_norm": 0.3608863949775696, "learning_rate": 1.6516161135831602e-05, "loss": 0.3595, "step": 15180 }, { "epoch": 0.2741786377194693, "grad_norm": 0.33774057030677795, "learning_rate": 1.6514009444934392e-05, "loss": 0.3462, "step": 15185 }, { "epoch": 0.27426891715237, "grad_norm": 0.6961424946784973, "learning_rate": 1.6511857230032487e-05, "loss": 0.3188, "step": 15190 }, { "epoch": 0.27435919658527075, "grad_norm": 0.5834682583808899, "learning_rate": 1.6509704491299014e-05, "loss": 0.2196, "step": 15195 }, { "epoch": 0.27444947601817143, "grad_norm": 0.26620805263519287, "learning_rate": 1.6507551228907144e-05, "loss": 0.2567, "step": 15200 }, { "epoch": 0.2745397554510722, "grad_norm": 0.6540822386741638, "learning_rate": 1.6505397443030095e-05, "loss": 0.3038, "step": 15205 }, { "epoch": 0.27463003488397286, "grad_norm": 0.3851265609264374, "learning_rate": 1.6503243133841115e-05, "loss": 0.2346, "step": 15210 }, { "epoch": 0.2747203143168736, "grad_norm": 0.5700597763061523, "learning_rate": 1.6501088301513508e-05, "loss": 0.2433, "step": 15215 }, { "epoch": 0.2748105937497743, "grad_norm": 0.5008401870727539, "learning_rate": 1.6498932946220617e-05, "loss": 0.2142, "step": 15220 }, { "epoch": 0.27490087318267503, "grad_norm": 0.5411930680274963, "learning_rate": 1.6496777068135826e-05, "loss": 0.334, "step": 15225 }, { "epoch": 0.2749911526155757, "grad_norm": 0.4610949456691742, "learning_rate": 1.6494620667432548e-05, "loss": 0.2834, "step": 15230 }, { "epoch": 0.27508143204847646, "grad_norm": 0.6483005881309509, "learning_rate": 1.6492463744284263e-05, "loss": 0.3432, "step": 15235 }, { "epoch": 0.27517171148137715, "grad_norm": 3.1367764472961426, "learning_rate": 1.6490306298864473e-05, "loss": 0.2535, "step": 15240 }, { "epoch": 0.2752619909142779, "grad_norm": 0.31624963879585266, "learning_rate": 1.6488148331346733e-05, "loss": 0.1753, "step": 15245 }, { "epoch": 0.2753522703471786, "grad_norm": 0.472771555185318, "learning_rate": 1.648598984190463e-05, "loss": 0.3184, "step": 15250 }, { "epoch": 0.2754425497800793, "grad_norm": 0.4292396903038025, "learning_rate": 1.6483830830711803e-05, "loss": 0.3006, "step": 15255 }, { "epoch": 0.27553282921298, "grad_norm": 0.32948029041290283, "learning_rate": 1.6481671297941927e-05, "loss": 0.2824, "step": 15260 }, { "epoch": 0.27562310864588074, "grad_norm": 0.4458978474140167, "learning_rate": 1.6479511243768724e-05, "loss": 0.3051, "step": 15265 }, { "epoch": 0.27571338807878143, "grad_norm": 0.3954915702342987, "learning_rate": 1.647735066836595e-05, "loss": 0.3075, "step": 15270 }, { "epoch": 0.27580366751168217, "grad_norm": 0.3933243453502655, "learning_rate": 1.6475189571907408e-05, "loss": 0.2309, "step": 15275 }, { "epoch": 0.27589394694458286, "grad_norm": 0.3683823049068451, "learning_rate": 1.6473027954566948e-05, "loss": 0.3026, "step": 15280 }, { "epoch": 0.2759842263774836, "grad_norm": 0.6095166802406311, "learning_rate": 1.647086581651845e-05, "loss": 0.289, "step": 15285 }, { "epoch": 0.2760745058103843, "grad_norm": 0.47206076979637146, "learning_rate": 1.6468703157935844e-05, "loss": 0.3066, "step": 15290 }, { "epoch": 0.276164785243285, "grad_norm": 0.44620397686958313, "learning_rate": 1.64665399789931e-05, "loss": 0.335, "step": 15295 }, { "epoch": 0.2762550646761857, "grad_norm": 0.49999502301216125, "learning_rate": 1.6464376279864235e-05, "loss": 0.2846, "step": 15300 }, { "epoch": 0.27634534410908645, "grad_norm": 0.486945778131485, "learning_rate": 1.6462212060723297e-05, "loss": 0.2658, "step": 15305 }, { "epoch": 0.27643562354198714, "grad_norm": 0.5022004246711731, "learning_rate": 1.6460047321744384e-05, "loss": 0.3887, "step": 15310 }, { "epoch": 0.2765259029748879, "grad_norm": 0.5504619479179382, "learning_rate": 1.6457882063101635e-05, "loss": 0.3519, "step": 15315 }, { "epoch": 0.27661618240778857, "grad_norm": 0.22563856840133667, "learning_rate": 1.6455716284969222e-05, "loss": 0.2256, "step": 15320 }, { "epoch": 0.2767064618406893, "grad_norm": 0.40650132298469543, "learning_rate": 1.6453549987521376e-05, "loss": 0.2261, "step": 15325 }, { "epoch": 0.27679674127359, "grad_norm": 0.5303844809532166, "learning_rate": 1.645138317093235e-05, "loss": 0.271, "step": 15330 }, { "epoch": 0.27688702070649074, "grad_norm": 0.4735105335712433, "learning_rate": 1.6449215835376462e-05, "loss": 0.2691, "step": 15335 }, { "epoch": 0.2769773001393914, "grad_norm": 0.4318624436855316, "learning_rate": 1.6447047981028042e-05, "loss": 0.2865, "step": 15340 }, { "epoch": 0.27706757957229217, "grad_norm": 0.5579233765602112, "learning_rate": 1.644487960806149e-05, "loss": 0.2117, "step": 15345 }, { "epoch": 0.27715785900519285, "grad_norm": 0.47623273730278015, "learning_rate": 1.6442710716651234e-05, "loss": 0.3592, "step": 15350 }, { "epoch": 0.2772481384380936, "grad_norm": 0.6098501086235046, "learning_rate": 1.6440541306971745e-05, "loss": 0.2889, "step": 15355 }, { "epoch": 0.2773384178709943, "grad_norm": 0.41777414083480835, "learning_rate": 1.6438371379197534e-05, "loss": 0.262, "step": 15360 }, { "epoch": 0.277428697303895, "grad_norm": 0.4153043031692505, "learning_rate": 1.6436200933503156e-05, "loss": 0.2855, "step": 15365 }, { "epoch": 0.2775189767367957, "grad_norm": 0.3103995621204376, "learning_rate": 1.6434029970063207e-05, "loss": 0.2276, "step": 15370 }, { "epoch": 0.27760925616969645, "grad_norm": 0.5243020057678223, "learning_rate": 1.6431858489052327e-05, "loss": 0.3601, "step": 15375 }, { "epoch": 0.27769953560259714, "grad_norm": 0.8446954488754272, "learning_rate": 1.6429686490645198e-05, "loss": 0.2727, "step": 15380 }, { "epoch": 0.2777898150354979, "grad_norm": 0.4470043182373047, "learning_rate": 1.642751397501654e-05, "loss": 0.2341, "step": 15385 }, { "epoch": 0.27788009446839856, "grad_norm": 0.3866370916366577, "learning_rate": 1.642534094234111e-05, "loss": 0.3108, "step": 15390 }, { "epoch": 0.2779703739012993, "grad_norm": 0.4725272059440613, "learning_rate": 1.6423167392793723e-05, "loss": 0.2605, "step": 15395 }, { "epoch": 0.2780606533342, "grad_norm": 0.8187476396560669, "learning_rate": 1.642099332654922e-05, "loss": 0.2855, "step": 15400 }, { "epoch": 0.27815093276710073, "grad_norm": 0.3882555663585663, "learning_rate": 1.641881874378249e-05, "loss": 0.3275, "step": 15405 }, { "epoch": 0.2782412122000014, "grad_norm": 0.4930070638656616, "learning_rate": 1.6416643644668454e-05, "loss": 0.318, "step": 15410 }, { "epoch": 0.27833149163290216, "grad_norm": 0.5323049426078796, "learning_rate": 1.6414468029382095e-05, "loss": 0.2475, "step": 15415 }, { "epoch": 0.27842177106580285, "grad_norm": 0.3736749291419983, "learning_rate": 1.6412291898098423e-05, "loss": 0.3491, "step": 15420 }, { "epoch": 0.2785120504987036, "grad_norm": 0.33540913462638855, "learning_rate": 1.641011525099249e-05, "loss": 0.2865, "step": 15425 }, { "epoch": 0.2786023299316043, "grad_norm": 0.42792782187461853, "learning_rate": 1.640793808823939e-05, "loss": 0.2633, "step": 15430 }, { "epoch": 0.278692609364505, "grad_norm": 0.602237343788147, "learning_rate": 1.640576041001426e-05, "loss": 0.3158, "step": 15435 }, { "epoch": 0.2787828887974057, "grad_norm": 0.43899089097976685, "learning_rate": 1.6403582216492274e-05, "loss": 0.4468, "step": 15440 }, { "epoch": 0.27887316823030645, "grad_norm": 0.5117487907409668, "learning_rate": 1.6401403507848663e-05, "loss": 0.2577, "step": 15445 }, { "epoch": 0.27896344766320713, "grad_norm": 0.524939775466919, "learning_rate": 1.6399224284258683e-05, "loss": 0.3625, "step": 15450 }, { "epoch": 0.2790537270961079, "grad_norm": 0.36036819219589233, "learning_rate": 1.6397044545897636e-05, "loss": 0.2324, "step": 15455 }, { "epoch": 0.2791440065290086, "grad_norm": 0.3167564868927002, "learning_rate": 1.6394864292940866e-05, "loss": 0.3252, "step": 15460 }, { "epoch": 0.2792342859619093, "grad_norm": 0.45076414942741394, "learning_rate": 1.6392683525563757e-05, "loss": 0.3069, "step": 15465 }, { "epoch": 0.27932456539481004, "grad_norm": 0.3625401556491852, "learning_rate": 1.639050224394174e-05, "loss": 0.2986, "step": 15470 }, { "epoch": 0.27941484482771073, "grad_norm": 0.4333385229110718, "learning_rate": 1.6388320448250278e-05, "loss": 0.294, "step": 15475 }, { "epoch": 0.27950512426061147, "grad_norm": 0.5874447822570801, "learning_rate": 1.6386138138664883e-05, "loss": 0.2447, "step": 15480 }, { "epoch": 0.27959540369351216, "grad_norm": 0.36848005652427673, "learning_rate": 1.638395531536111e-05, "loss": 0.1787, "step": 15485 }, { "epoch": 0.2796856831264129, "grad_norm": 0.5163210034370422, "learning_rate": 1.6381771978514548e-05, "loss": 0.278, "step": 15490 }, { "epoch": 0.2797759625593136, "grad_norm": 0.26837676763534546, "learning_rate": 1.6379588128300828e-05, "loss": 0.3203, "step": 15495 }, { "epoch": 0.2798662419922143, "grad_norm": 0.2180904746055603, "learning_rate": 1.637740376489563e-05, "loss": 0.261, "step": 15500 }, { "epoch": 0.279956521425115, "grad_norm": 0.40104246139526367, "learning_rate": 1.6375218888474666e-05, "loss": 0.3079, "step": 15505 }, { "epoch": 0.28004680085801575, "grad_norm": 0.4926336109638214, "learning_rate": 1.637303349921369e-05, "loss": 0.3486, "step": 15510 }, { "epoch": 0.28013708029091644, "grad_norm": 0.38141384720802307, "learning_rate": 1.637084759728851e-05, "loss": 0.2401, "step": 15515 }, { "epoch": 0.2802273597238172, "grad_norm": 0.31682920455932617, "learning_rate": 1.6368661182874964e-05, "loss": 0.2688, "step": 15520 }, { "epoch": 0.28031763915671787, "grad_norm": 0.6005726456642151, "learning_rate": 1.6366474256148925e-05, "loss": 0.2366, "step": 15525 }, { "epoch": 0.2804079185896186, "grad_norm": 0.33243298530578613, "learning_rate": 1.6364286817286323e-05, "loss": 0.2397, "step": 15530 }, { "epoch": 0.2804981980225193, "grad_norm": 0.38761410117149353, "learning_rate": 1.6362098866463122e-05, "loss": 0.2715, "step": 15535 }, { "epoch": 0.28058847745542004, "grad_norm": 0.5326482653617859, "learning_rate": 1.635991040385532e-05, "loss": 0.3462, "step": 15540 }, { "epoch": 0.2806787568883207, "grad_norm": 0.5653488636016846, "learning_rate": 1.635772142963897e-05, "loss": 0.3557, "step": 15545 }, { "epoch": 0.28076903632122147, "grad_norm": 0.4962272047996521, "learning_rate": 1.6355531943990153e-05, "loss": 0.266, "step": 15550 }, { "epoch": 0.28085931575412215, "grad_norm": 0.5806422233581543, "learning_rate": 1.6353341947085e-05, "loss": 0.2875, "step": 15555 }, { "epoch": 0.2809495951870229, "grad_norm": 0.429801344871521, "learning_rate": 1.6351151439099684e-05, "loss": 0.2699, "step": 15560 }, { "epoch": 0.2810398746199236, "grad_norm": 1.272374153137207, "learning_rate": 1.634896042021041e-05, "loss": 0.2556, "step": 15565 }, { "epoch": 0.2811301540528243, "grad_norm": 0.4203578233718872, "learning_rate": 1.6346768890593432e-05, "loss": 0.2648, "step": 15570 }, { "epoch": 0.281220433485725, "grad_norm": 0.6440940499305725, "learning_rate": 1.6344576850425044e-05, "loss": 0.2569, "step": 15575 }, { "epoch": 0.28131071291862575, "grad_norm": 0.40680187940597534, "learning_rate": 1.6342384299881574e-05, "loss": 0.352, "step": 15580 }, { "epoch": 0.28140099235152644, "grad_norm": 1.5566149950027466, "learning_rate": 1.6340191239139405e-05, "loss": 0.3613, "step": 15585 }, { "epoch": 0.2814912717844272, "grad_norm": 0.35331594944000244, "learning_rate": 1.633799766837495e-05, "loss": 0.2324, "step": 15590 }, { "epoch": 0.28158155121732786, "grad_norm": 0.38838991522789, "learning_rate": 1.6335803587764655e-05, "loss": 0.2894, "step": 15595 }, { "epoch": 0.2816718306502286, "grad_norm": 0.6177527904510498, "learning_rate": 1.6333608997485034e-05, "loss": 0.2121, "step": 15600 }, { "epoch": 0.2817621100831293, "grad_norm": 0.3072573244571686, "learning_rate": 1.633141389771262e-05, "loss": 0.1905, "step": 15605 }, { "epoch": 0.28185238951603003, "grad_norm": 0.5108826756477356, "learning_rate": 1.632921828862399e-05, "loss": 0.2624, "step": 15610 }, { "epoch": 0.2819426689489307, "grad_norm": 0.8060066103935242, "learning_rate": 1.6327022170395763e-05, "loss": 0.2973, "step": 15615 }, { "epoch": 0.28203294838183146, "grad_norm": 0.4345701336860657, "learning_rate": 1.632482554320461e-05, "loss": 0.2287, "step": 15620 }, { "epoch": 0.28212322781473215, "grad_norm": 0.47276219725608826, "learning_rate": 1.6322628407227226e-05, "loss": 0.2187, "step": 15625 }, { "epoch": 0.2822135072476329, "grad_norm": 0.6881159543991089, "learning_rate": 1.6320430762640357e-05, "loss": 0.3437, "step": 15630 }, { "epoch": 0.2823037866805336, "grad_norm": 0.47420379519462585, "learning_rate": 1.6318232609620783e-05, "loss": 0.262, "step": 15635 }, { "epoch": 0.2823940661134343, "grad_norm": 0.48898845911026, "learning_rate": 1.6316033948345336e-05, "loss": 0.4279, "step": 15640 }, { "epoch": 0.282484345546335, "grad_norm": 0.516494631767273, "learning_rate": 1.631383477899088e-05, "loss": 0.2792, "step": 15645 }, { "epoch": 0.28257462497923574, "grad_norm": 0.9257849454879761, "learning_rate": 1.6311635101734326e-05, "loss": 0.248, "step": 15650 }, { "epoch": 0.28266490441213643, "grad_norm": 0.41677549481391907, "learning_rate": 1.6309434916752612e-05, "loss": 0.291, "step": 15655 }, { "epoch": 0.2827551838450372, "grad_norm": 0.4060181975364685, "learning_rate": 1.6307234224222735e-05, "loss": 0.3158, "step": 15660 }, { "epoch": 0.28284546327793786, "grad_norm": 0.7808871269226074, "learning_rate": 1.6305033024321725e-05, "loss": 0.2654, "step": 15665 }, { "epoch": 0.2829357427108386, "grad_norm": 0.6203485131263733, "learning_rate": 1.630283131722665e-05, "loss": 0.2618, "step": 15670 }, { "epoch": 0.2830260221437393, "grad_norm": 0.42930033802986145, "learning_rate": 1.6300629103114617e-05, "loss": 0.3103, "step": 15675 }, { "epoch": 0.28311630157664003, "grad_norm": 0.4635891616344452, "learning_rate": 1.6298426382162788e-05, "loss": 0.3125, "step": 15680 }, { "epoch": 0.2832065810095407, "grad_norm": 0.6747693419456482, "learning_rate": 1.629622315454835e-05, "loss": 0.3134, "step": 15685 }, { "epoch": 0.28329686044244146, "grad_norm": 0.4666113257408142, "learning_rate": 1.6294019420448533e-05, "loss": 0.2255, "step": 15690 }, { "epoch": 0.28338713987534214, "grad_norm": 1.8684427738189697, "learning_rate": 1.6291815180040617e-05, "loss": 0.245, "step": 15695 }, { "epoch": 0.2834774193082429, "grad_norm": 0.4526993930339813, "learning_rate": 1.6289610433501916e-05, "loss": 0.2927, "step": 15700 }, { "epoch": 0.28356769874114357, "grad_norm": 0.44100838899612427, "learning_rate": 1.628740518100979e-05, "loss": 0.2808, "step": 15705 }, { "epoch": 0.2836579781740443, "grad_norm": 0.2732015550136566, "learning_rate": 1.6285199422741627e-05, "loss": 0.2947, "step": 15710 }, { "epoch": 0.283748257606945, "grad_norm": 0.48613548278808594, "learning_rate": 1.6282993158874867e-05, "loss": 0.3734, "step": 15715 }, { "epoch": 0.28383853703984574, "grad_norm": 0.7183789610862732, "learning_rate": 1.628078638958699e-05, "loss": 0.3986, "step": 15720 }, { "epoch": 0.2839288164727464, "grad_norm": 0.3593280613422394, "learning_rate": 1.6278579115055514e-05, "loss": 0.2467, "step": 15725 }, { "epoch": 0.28401909590564717, "grad_norm": 0.47348156571388245, "learning_rate": 1.6276371335458e-05, "loss": 0.2409, "step": 15730 }, { "epoch": 0.28410937533854785, "grad_norm": 0.7523562908172607, "learning_rate": 1.6274163050972044e-05, "loss": 0.3164, "step": 15735 }, { "epoch": 0.2841996547714486, "grad_norm": 0.41706204414367676, "learning_rate": 1.6271954261775285e-05, "loss": 0.2297, "step": 15740 }, { "epoch": 0.2842899342043493, "grad_norm": 0.4382179081439972, "learning_rate": 1.6269744968045412e-05, "loss": 0.2788, "step": 15745 }, { "epoch": 0.28438021363725, "grad_norm": 0.4931817054748535, "learning_rate": 1.626753516996014e-05, "loss": 0.315, "step": 15750 }, { "epoch": 0.2844704930701507, "grad_norm": 0.5786130428314209, "learning_rate": 1.626532486769723e-05, "loss": 0.2401, "step": 15755 }, { "epoch": 0.28456077250305145, "grad_norm": 0.5008528828620911, "learning_rate": 1.6263114061434492e-05, "loss": 0.3156, "step": 15760 }, { "epoch": 0.28465105193595214, "grad_norm": 0.24782541394233704, "learning_rate": 1.6260902751349762e-05, "loss": 0.2095, "step": 15765 }, { "epoch": 0.2847413313688529, "grad_norm": 0.3510185480117798, "learning_rate": 1.6258690937620932e-05, "loss": 0.2288, "step": 15770 }, { "epoch": 0.28483161080175357, "grad_norm": 0.5392242670059204, "learning_rate": 1.6256478620425914e-05, "loss": 0.3031, "step": 15775 }, { "epoch": 0.2849218902346543, "grad_norm": 0.4234905540943146, "learning_rate": 1.6254265799942685e-05, "loss": 0.2683, "step": 15780 }, { "epoch": 0.285012169667555, "grad_norm": 0.26604676246643066, "learning_rate": 1.6252052476349244e-05, "loss": 0.271, "step": 15785 }, { "epoch": 0.28510244910045573, "grad_norm": 0.6017569899559021, "learning_rate": 1.624983864982364e-05, "loss": 0.217, "step": 15790 }, { "epoch": 0.2851927285333564, "grad_norm": 0.41519540548324585, "learning_rate": 1.6247624320543955e-05, "loss": 0.245, "step": 15795 }, { "epoch": 0.28528300796625716, "grad_norm": 0.4786364734172821, "learning_rate": 1.6245409488688317e-05, "loss": 0.1956, "step": 15800 }, { "epoch": 0.28537328739915785, "grad_norm": 0.30143222212791443, "learning_rate": 1.6243194154434897e-05, "loss": 0.1802, "step": 15805 }, { "epoch": 0.2854635668320586, "grad_norm": 0.48613789677619934, "learning_rate": 1.6240978317961897e-05, "loss": 0.2628, "step": 15810 }, { "epoch": 0.2855538462649593, "grad_norm": 0.35156938433647156, "learning_rate": 1.623876197944757e-05, "loss": 0.2647, "step": 15815 }, { "epoch": 0.28564412569786, "grad_norm": 0.5738462805747986, "learning_rate": 1.62365451390702e-05, "loss": 0.3109, "step": 15820 }, { "epoch": 0.2857344051307607, "grad_norm": 0.489198237657547, "learning_rate": 1.623432779700812e-05, "loss": 0.3053, "step": 15825 }, { "epoch": 0.28582468456366145, "grad_norm": 0.3832317888736725, "learning_rate": 1.623210995343969e-05, "loss": 0.3054, "step": 15830 }, { "epoch": 0.28591496399656213, "grad_norm": 0.5632326006889343, "learning_rate": 1.6229891608543334e-05, "loss": 0.2865, "step": 15835 }, { "epoch": 0.2860052434294629, "grad_norm": 0.56822270154953, "learning_rate": 1.622767276249749e-05, "loss": 0.2577, "step": 15840 }, { "epoch": 0.28609552286236356, "grad_norm": 0.5073590874671936, "learning_rate": 1.6225453415480652e-05, "loss": 0.2407, "step": 15845 }, { "epoch": 0.2861858022952643, "grad_norm": 0.42480191588401794, "learning_rate": 1.6223233567671352e-05, "loss": 0.2864, "step": 15850 }, { "epoch": 0.28627608172816504, "grad_norm": 0.40834692120552063, "learning_rate": 1.6221013219248158e-05, "loss": 0.3367, "step": 15855 }, { "epoch": 0.28636636116106573, "grad_norm": 0.5145226120948792, "learning_rate": 1.621879237038968e-05, "loss": 0.3203, "step": 15860 }, { "epoch": 0.28645664059396647, "grad_norm": 0.3466629385948181, "learning_rate": 1.621657102127457e-05, "loss": 0.193, "step": 15865 }, { "epoch": 0.28654692002686716, "grad_norm": 0.5159338712692261, "learning_rate": 1.6214349172081525e-05, "loss": 0.2962, "step": 15870 }, { "epoch": 0.2866371994597679, "grad_norm": 0.48120617866516113, "learning_rate": 1.6212126822989268e-05, "loss": 0.3539, "step": 15875 }, { "epoch": 0.2867274788926686, "grad_norm": 0.5824990272521973, "learning_rate": 1.6209903974176574e-05, "loss": 0.1397, "step": 15880 }, { "epoch": 0.2868177583255693, "grad_norm": 0.3986798822879791, "learning_rate": 1.620768062582226e-05, "loss": 0.2509, "step": 15885 }, { "epoch": 0.28690803775847, "grad_norm": 0.35834360122680664, "learning_rate": 1.6205456778105172e-05, "loss": 0.3183, "step": 15890 }, { "epoch": 0.28699831719137076, "grad_norm": 0.4624342620372772, "learning_rate": 1.6203232431204204e-05, "loss": 0.3156, "step": 15895 }, { "epoch": 0.28708859662427144, "grad_norm": 0.6071195006370544, "learning_rate": 1.6201007585298288e-05, "loss": 0.3294, "step": 15900 }, { "epoch": 0.2871788760571722, "grad_norm": 0.4487033486366272, "learning_rate": 1.6198782240566395e-05, "loss": 0.3327, "step": 15905 }, { "epoch": 0.28726915549007287, "grad_norm": 0.30451515316963196, "learning_rate": 1.6196556397187545e-05, "loss": 0.2031, "step": 15910 }, { "epoch": 0.2873594349229736, "grad_norm": 0.4486877918243408, "learning_rate": 1.619433005534078e-05, "loss": 0.29, "step": 15915 }, { "epoch": 0.2874497143558743, "grad_norm": 0.40688467025756836, "learning_rate": 1.6192103215205205e-05, "loss": 0.2412, "step": 15920 }, { "epoch": 0.28753999378877504, "grad_norm": 0.5863103866577148, "learning_rate": 1.6189875876959947e-05, "loss": 0.2982, "step": 15925 }, { "epoch": 0.2876302732216757, "grad_norm": 0.6324847936630249, "learning_rate": 1.6187648040784178e-05, "loss": 0.3914, "step": 15930 }, { "epoch": 0.28772055265457647, "grad_norm": 0.4159594476222992, "learning_rate": 1.618541970685711e-05, "loss": 0.213, "step": 15935 }, { "epoch": 0.28781083208747715, "grad_norm": 0.4259512722492218, "learning_rate": 1.6183190875358e-05, "loss": 0.1838, "step": 15940 }, { "epoch": 0.2879011115203779, "grad_norm": 0.41506460309028625, "learning_rate": 1.6180961546466145e-05, "loss": 0.2815, "step": 15945 }, { "epoch": 0.2879913909532786, "grad_norm": 0.5842285752296448, "learning_rate": 1.617873172036087e-05, "loss": 0.2814, "step": 15950 }, { "epoch": 0.2880816703861793, "grad_norm": 0.26104700565338135, "learning_rate": 1.617650139722155e-05, "loss": 0.3148, "step": 15955 }, { "epoch": 0.28817194981908, "grad_norm": 0.4021354615688324, "learning_rate": 1.6174270577227603e-05, "loss": 0.3118, "step": 15960 }, { "epoch": 0.28826222925198075, "grad_norm": 0.44796377420425415, "learning_rate": 1.617203926055848e-05, "loss": 0.2368, "step": 15965 }, { "epoch": 0.28835250868488144, "grad_norm": 0.37265872955322266, "learning_rate": 1.616980744739367e-05, "loss": 0.2213, "step": 15970 }, { "epoch": 0.2884427881177822, "grad_norm": 0.47346892952919006, "learning_rate": 1.616757513791271e-05, "loss": 0.4529, "step": 15975 }, { "epoch": 0.28853306755068286, "grad_norm": 0.5431389212608337, "learning_rate": 1.6165342332295176e-05, "loss": 0.2396, "step": 15980 }, { "epoch": 0.2886233469835836, "grad_norm": 0.3073919415473938, "learning_rate": 1.6163109030720678e-05, "loss": 0.3849, "step": 15985 }, { "epoch": 0.2887136264164843, "grad_norm": 0.6999151110649109, "learning_rate": 1.6160875233368868e-05, "loss": 0.3738, "step": 15990 }, { "epoch": 0.28880390584938503, "grad_norm": 0.36075490713119507, "learning_rate": 1.6158640940419435e-05, "loss": 0.3154, "step": 15995 }, { "epoch": 0.2888941852822857, "grad_norm": 0.39156901836395264, "learning_rate": 1.615640615205212e-05, "loss": 0.3754, "step": 16000 }, { "epoch": 0.28898446471518646, "grad_norm": 0.5082807540893555, "learning_rate": 1.6154170868446692e-05, "loss": 0.2995, "step": 16005 }, { "epoch": 0.28907474414808715, "grad_norm": 0.5425965785980225, "learning_rate": 1.615193508978296e-05, "loss": 0.3891, "step": 16010 }, { "epoch": 0.2891650235809879, "grad_norm": 0.532785177230835, "learning_rate": 1.6149698816240782e-05, "loss": 0.245, "step": 16015 }, { "epoch": 0.2892553030138886, "grad_norm": 0.5923757553100586, "learning_rate": 1.6147462048000043e-05, "loss": 0.2548, "step": 16020 }, { "epoch": 0.2893455824467893, "grad_norm": 0.49339884519577026, "learning_rate": 1.6145224785240682e-05, "loss": 0.257, "step": 16025 }, { "epoch": 0.28943586187969, "grad_norm": 0.31761792302131653, "learning_rate": 1.614298702814267e-05, "loss": 0.3083, "step": 16030 }, { "epoch": 0.28952614131259075, "grad_norm": 1.4089819192886353, "learning_rate": 1.614074877688601e-05, "loss": 0.391, "step": 16035 }, { "epoch": 0.28961642074549143, "grad_norm": 0.5526888370513916, "learning_rate": 1.6138510031650757e-05, "loss": 0.2956, "step": 16040 }, { "epoch": 0.2897067001783922, "grad_norm": 0.39860251545906067, "learning_rate": 1.6136270792617008e-05, "loss": 0.2583, "step": 16045 }, { "epoch": 0.28979697961129286, "grad_norm": 0.3737854063510895, "learning_rate": 1.6134031059964884e-05, "loss": 0.2668, "step": 16050 }, { "epoch": 0.2898872590441936, "grad_norm": 0.47919437289237976, "learning_rate": 1.6131790833874564e-05, "loss": 0.277, "step": 16055 }, { "epoch": 0.2899775384770943, "grad_norm": 0.3381844460964203, "learning_rate": 1.612955011452625e-05, "loss": 0.3252, "step": 16060 }, { "epoch": 0.29006781790999503, "grad_norm": 0.881658673286438, "learning_rate": 1.61273089021002e-05, "loss": 0.3333, "step": 16065 }, { "epoch": 0.2901580973428957, "grad_norm": 0.4923168420791626, "learning_rate": 1.61250671967767e-05, "loss": 0.2895, "step": 16070 }, { "epoch": 0.29024837677579646, "grad_norm": 0.44712138175964355, "learning_rate": 1.6122824998736074e-05, "loss": 0.3409, "step": 16075 }, { "epoch": 0.29033865620869714, "grad_norm": 0.4037761092185974, "learning_rate": 1.612058230815869e-05, "loss": 0.2853, "step": 16080 }, { "epoch": 0.2904289356415979, "grad_norm": 0.3157300651073456, "learning_rate": 1.6118339125224966e-05, "loss": 0.2477, "step": 16085 }, { "epoch": 0.29051921507449857, "grad_norm": 0.3346655070781708, "learning_rate": 1.6116095450115344e-05, "loss": 0.2159, "step": 16090 }, { "epoch": 0.2906094945073993, "grad_norm": 0.4581669569015503, "learning_rate": 1.611385128301031e-05, "loss": 0.3279, "step": 16095 }, { "epoch": 0.2906997739403, "grad_norm": 0.5271456241607666, "learning_rate": 1.6111606624090392e-05, "loss": 0.2819, "step": 16100 }, { "epoch": 0.29079005337320074, "grad_norm": 0.4685978293418884, "learning_rate": 1.6109361473536156e-05, "loss": 0.2761, "step": 16105 }, { "epoch": 0.2908803328061014, "grad_norm": 0.536946713924408, "learning_rate": 1.610711583152821e-05, "loss": 0.3384, "step": 16110 }, { "epoch": 0.29097061223900217, "grad_norm": 1.053295373916626, "learning_rate": 1.61048696982472e-05, "loss": 0.3751, "step": 16115 }, { "epoch": 0.29106089167190285, "grad_norm": 0.5262178182601929, "learning_rate": 1.6102623073873806e-05, "loss": 0.2472, "step": 16120 }, { "epoch": 0.2911511711048036, "grad_norm": 0.5010016560554504, "learning_rate": 1.610037595858876e-05, "loss": 0.2458, "step": 16125 }, { "epoch": 0.2912414505377043, "grad_norm": 0.5782332420349121, "learning_rate": 1.6098128352572817e-05, "loss": 0.3115, "step": 16130 }, { "epoch": 0.291331729970605, "grad_norm": 0.4458324909210205, "learning_rate": 1.609588025600679e-05, "loss": 0.3693, "step": 16135 }, { "epoch": 0.2914220094035057, "grad_norm": 0.6408203840255737, "learning_rate": 1.6093631669071514e-05, "loss": 0.2183, "step": 16140 }, { "epoch": 0.29151228883640645, "grad_norm": 0.5350770354270935, "learning_rate": 1.6091382591947878e-05, "loss": 0.3975, "step": 16145 }, { "epoch": 0.29160256826930714, "grad_norm": 0.31011661887168884, "learning_rate": 1.60891330248168e-05, "loss": 0.2466, "step": 16150 }, { "epoch": 0.2916928477022079, "grad_norm": 0.44612425565719604, "learning_rate": 1.6086882967859244e-05, "loss": 0.2606, "step": 16155 }, { "epoch": 0.29178312713510857, "grad_norm": 0.8416286110877991, "learning_rate": 1.6084632421256206e-05, "loss": 0.3034, "step": 16160 }, { "epoch": 0.2918734065680093, "grad_norm": 0.840193510055542, "learning_rate": 1.6082381385188736e-05, "loss": 0.3395, "step": 16165 }, { "epoch": 0.29196368600091, "grad_norm": 0.5311060547828674, "learning_rate": 1.60801298598379e-05, "loss": 0.2897, "step": 16170 }, { "epoch": 0.29205396543381074, "grad_norm": 0.4871434271335602, "learning_rate": 1.6077877845384826e-05, "loss": 0.2197, "step": 16175 }, { "epoch": 0.2921442448667114, "grad_norm": 0.3499642014503479, "learning_rate": 1.607562534201067e-05, "loss": 0.2727, "step": 16180 }, { "epoch": 0.29223452429961216, "grad_norm": 0.47589465975761414, "learning_rate": 1.6073372349896626e-05, "loss": 0.3038, "step": 16185 }, { "epoch": 0.29232480373251285, "grad_norm": 0.4539521038532257, "learning_rate": 1.6071118869223942e-05, "loss": 0.2644, "step": 16190 }, { "epoch": 0.2924150831654136, "grad_norm": 0.39266568422317505, "learning_rate": 1.606886490017388e-05, "loss": 0.2343, "step": 16195 }, { "epoch": 0.2925053625983143, "grad_norm": 0.5503832697868347, "learning_rate": 1.6066610442927762e-05, "loss": 0.3114, "step": 16200 }, { "epoch": 0.292595642031215, "grad_norm": 0.3184185028076172, "learning_rate": 1.6064355497666942e-05, "loss": 0.17, "step": 16205 }, { "epoch": 0.2926859214641157, "grad_norm": 0.6293808221817017, "learning_rate": 1.6062100064572818e-05, "loss": 0.3187, "step": 16210 }, { "epoch": 0.29277620089701645, "grad_norm": 0.5355658531188965, "learning_rate": 1.6059844143826816e-05, "loss": 0.2458, "step": 16215 }, { "epoch": 0.29286648032991713, "grad_norm": 0.3671532869338989, "learning_rate": 1.6057587735610413e-05, "loss": 0.2434, "step": 16220 }, { "epoch": 0.2929567597628179, "grad_norm": 0.39543014764785767, "learning_rate": 1.605533084010512e-05, "loss": 0.333, "step": 16225 }, { "epoch": 0.29304703919571856, "grad_norm": 0.4987691342830658, "learning_rate": 1.6053073457492486e-05, "loss": 0.2069, "step": 16230 }, { "epoch": 0.2931373186286193, "grad_norm": 0.4521399736404419, "learning_rate": 1.6050815587954103e-05, "loss": 0.2345, "step": 16235 }, { "epoch": 0.29322759806152, "grad_norm": 0.35981765389442444, "learning_rate": 1.6048557231671602e-05, "loss": 0.2338, "step": 16240 }, { "epoch": 0.29331787749442073, "grad_norm": 0.6158979535102844, "learning_rate": 1.6046298388826646e-05, "loss": 0.2522, "step": 16245 }, { "epoch": 0.2934081569273215, "grad_norm": 0.36089736223220825, "learning_rate": 1.6044039059600946e-05, "loss": 0.2624, "step": 16250 }, { "epoch": 0.29349843636022216, "grad_norm": 0.7971797585487366, "learning_rate": 1.6041779244176253e-05, "loss": 0.2781, "step": 16255 }, { "epoch": 0.2935887157931229, "grad_norm": 0.44186967611312866, "learning_rate": 1.603951894273434e-05, "loss": 0.3122, "step": 16260 }, { "epoch": 0.2936789952260236, "grad_norm": 0.6414748430252075, "learning_rate": 1.6037258155457046e-05, "loss": 0.3285, "step": 16265 }, { "epoch": 0.29376927465892433, "grad_norm": 0.611411452293396, "learning_rate": 1.6034996882526224e-05, "loss": 0.2994, "step": 16270 }, { "epoch": 0.293859554091825, "grad_norm": 0.8820737600326538, "learning_rate": 1.6032735124123783e-05, "loss": 0.225, "step": 16275 }, { "epoch": 0.29394983352472576, "grad_norm": 0.45774945616722107, "learning_rate": 1.6030472880431665e-05, "loss": 0.2805, "step": 16280 }, { "epoch": 0.29404011295762644, "grad_norm": 0.2715279459953308, "learning_rate": 1.6028210151631848e-05, "loss": 0.2294, "step": 16285 }, { "epoch": 0.2941303923905272, "grad_norm": 0.4004227817058563, "learning_rate": 1.6025946937906353e-05, "loss": 0.2253, "step": 16290 }, { "epoch": 0.29422067182342787, "grad_norm": 0.6729216575622559, "learning_rate": 1.602368323943724e-05, "loss": 0.2271, "step": 16295 }, { "epoch": 0.2943109512563286, "grad_norm": 0.6926440000534058, "learning_rate": 1.6021419056406608e-05, "loss": 0.2381, "step": 16300 }, { "epoch": 0.2944012306892293, "grad_norm": 0.3082001805305481, "learning_rate": 1.601915438899659e-05, "loss": 0.2876, "step": 16305 }, { "epoch": 0.29449151012213004, "grad_norm": 0.3748997449874878, "learning_rate": 1.6016889237389368e-05, "loss": 0.3011, "step": 16310 }, { "epoch": 0.2945817895550307, "grad_norm": 0.31639403104782104, "learning_rate": 1.6014623601767155e-05, "loss": 0.2779, "step": 16315 }, { "epoch": 0.29467206898793147, "grad_norm": 0.3767625689506531, "learning_rate": 1.6012357482312197e-05, "loss": 0.3008, "step": 16320 }, { "epoch": 0.29476234842083215, "grad_norm": 1.117903232574463, "learning_rate": 1.60100908792068e-05, "loss": 0.3156, "step": 16325 }, { "epoch": 0.2948526278537329, "grad_norm": 0.7283892631530762, "learning_rate": 1.600782379263329e-05, "loss": 0.2838, "step": 16330 }, { "epoch": 0.2949429072866336, "grad_norm": 0.5270529985427856, "learning_rate": 1.6005556222774033e-05, "loss": 0.2027, "step": 16335 }, { "epoch": 0.2950331867195343, "grad_norm": 0.3388364613056183, "learning_rate": 1.6003288169811444e-05, "loss": 0.282, "step": 16340 }, { "epoch": 0.295123466152435, "grad_norm": 0.32637423276901245, "learning_rate": 1.600101963392797e-05, "loss": 0.2616, "step": 16345 }, { "epoch": 0.29521374558533575, "grad_norm": 0.4189916253089905, "learning_rate": 1.5998750615306102e-05, "loss": 0.2784, "step": 16350 }, { "epoch": 0.29530402501823644, "grad_norm": 0.41963228583335876, "learning_rate": 1.5996481114128355e-05, "loss": 0.2659, "step": 16355 }, { "epoch": 0.2953943044511372, "grad_norm": 0.7162477374076843, "learning_rate": 1.5994211130577305e-05, "loss": 0.2969, "step": 16360 }, { "epoch": 0.29548458388403787, "grad_norm": 0.43092843890190125, "learning_rate": 1.599194066483555e-05, "loss": 0.369, "step": 16365 }, { "epoch": 0.2955748633169386, "grad_norm": 0.41079744696617126, "learning_rate": 1.5989669717085736e-05, "loss": 0.28, "step": 16370 }, { "epoch": 0.2956651427498393, "grad_norm": 0.4858797788619995, "learning_rate": 1.598739828751054e-05, "loss": 0.2702, "step": 16375 }, { "epoch": 0.29575542218274004, "grad_norm": 0.61838698387146, "learning_rate": 1.5985126376292684e-05, "loss": 0.3351, "step": 16380 }, { "epoch": 0.2958457016156407, "grad_norm": 0.42351454496383667, "learning_rate": 1.598285398361493e-05, "loss": 0.2322, "step": 16385 }, { "epoch": 0.29593598104854146, "grad_norm": 0.42310693860054016, "learning_rate": 1.5980581109660072e-05, "loss": 0.3411, "step": 16390 }, { "epoch": 0.29602626048144215, "grad_norm": 0.4089198410511017, "learning_rate": 1.5978307754610948e-05, "loss": 0.2977, "step": 16395 }, { "epoch": 0.2961165399143429, "grad_norm": 0.7078156471252441, "learning_rate": 1.5976033918650428e-05, "loss": 0.2277, "step": 16400 }, { "epoch": 0.2962068193472436, "grad_norm": 1.6692484617233276, "learning_rate": 1.597375960196143e-05, "loss": 0.2924, "step": 16405 }, { "epoch": 0.2962970987801443, "grad_norm": 0.5945263504981995, "learning_rate": 1.5971484804726908e-05, "loss": 0.1733, "step": 16410 }, { "epoch": 0.296387378213045, "grad_norm": 0.4478415250778198, "learning_rate": 1.5969209527129847e-05, "loss": 0.3286, "step": 16415 }, { "epoch": 0.29647765764594575, "grad_norm": 0.42879509925842285, "learning_rate": 1.5966933769353282e-05, "loss": 0.2969, "step": 16420 }, { "epoch": 0.29656793707884643, "grad_norm": 0.43934568762779236, "learning_rate": 1.596465753158028e-05, "loss": 0.3345, "step": 16425 }, { "epoch": 0.2966582165117472, "grad_norm": 0.3786970376968384, "learning_rate": 1.5962380813993946e-05, "loss": 0.3804, "step": 16430 }, { "epoch": 0.29674849594464786, "grad_norm": 0.5921241044998169, "learning_rate": 1.5960103616777423e-05, "loss": 0.2281, "step": 16435 }, { "epoch": 0.2968387753775486, "grad_norm": 0.842401921749115, "learning_rate": 1.5957825940113903e-05, "loss": 0.3482, "step": 16440 }, { "epoch": 0.2969290548104493, "grad_norm": 0.5948904752731323, "learning_rate": 1.59555477841866e-05, "loss": 0.2591, "step": 16445 }, { "epoch": 0.29701933424335003, "grad_norm": 0.6085169911384583, "learning_rate": 1.595326914917878e-05, "loss": 0.3126, "step": 16450 }, { "epoch": 0.2971096136762507, "grad_norm": 0.3494928181171417, "learning_rate": 1.5950990035273745e-05, "loss": 0.3576, "step": 16455 }, { "epoch": 0.29719989310915146, "grad_norm": 0.5723707675933838, "learning_rate": 1.5948710442654823e-05, "loss": 0.302, "step": 16460 }, { "epoch": 0.29729017254205214, "grad_norm": 0.5860376954078674, "learning_rate": 1.5946430371505404e-05, "loss": 0.2754, "step": 16465 }, { "epoch": 0.2973804519749529, "grad_norm": 0.5320484042167664, "learning_rate": 1.5944149822008893e-05, "loss": 0.3047, "step": 16470 }, { "epoch": 0.2974707314078536, "grad_norm": 0.3392791152000427, "learning_rate": 1.5941868794348745e-05, "loss": 0.2641, "step": 16475 }, { "epoch": 0.2975610108407543, "grad_norm": 0.3529278039932251, "learning_rate": 1.593958728870846e-05, "loss": 0.2366, "step": 16480 }, { "epoch": 0.297651290273655, "grad_norm": 0.4152165353298187, "learning_rate": 1.5937305305271556e-05, "loss": 0.2023, "step": 16485 }, { "epoch": 0.29774156970655574, "grad_norm": 0.3920888900756836, "learning_rate": 1.593502284422161e-05, "loss": 0.2125, "step": 16490 }, { "epoch": 0.29783184913945643, "grad_norm": 0.4801064431667328, "learning_rate": 1.5932739905742228e-05, "loss": 0.3785, "step": 16495 }, { "epoch": 0.29792212857235717, "grad_norm": 0.9531915187835693, "learning_rate": 1.593045649001706e-05, "loss": 0.1912, "step": 16500 }, { "epoch": 0.29801240800525786, "grad_norm": 0.35262203216552734, "learning_rate": 1.592817259722978e-05, "loss": 0.2331, "step": 16505 }, { "epoch": 0.2981026874381586, "grad_norm": 0.3981858491897583, "learning_rate": 1.592588822756412e-05, "loss": 0.326, "step": 16510 }, { "epoch": 0.2981929668710593, "grad_norm": 0.45648452639579773, "learning_rate": 1.5923603381203837e-05, "loss": 0.3099, "step": 16515 }, { "epoch": 0.29828324630396, "grad_norm": 0.3206727206707001, "learning_rate": 1.592131805833273e-05, "loss": 0.2139, "step": 16520 }, { "epoch": 0.2983735257368607, "grad_norm": 0.22646181285381317, "learning_rate": 1.5919032259134638e-05, "loss": 0.3353, "step": 16525 }, { "epoch": 0.29846380516976145, "grad_norm": 0.5294449925422668, "learning_rate": 1.5916745983793436e-05, "loss": 0.2825, "step": 16530 }, { "epoch": 0.29855408460266214, "grad_norm": 0.576151430606842, "learning_rate": 1.591445923249304e-05, "loss": 0.3866, "step": 16535 }, { "epoch": 0.2986443640355629, "grad_norm": 0.4805523157119751, "learning_rate": 1.5912172005417402e-05, "loss": 0.202, "step": 16540 }, { "epoch": 0.29873464346846357, "grad_norm": 0.873942494392395, "learning_rate": 1.590988430275051e-05, "loss": 0.3125, "step": 16545 }, { "epoch": 0.2988249229013643, "grad_norm": 0.37433138489723206, "learning_rate": 1.590759612467639e-05, "loss": 0.2575, "step": 16550 }, { "epoch": 0.298915202334265, "grad_norm": 0.5413210391998291, "learning_rate": 1.5905307471379118e-05, "loss": 0.2877, "step": 16555 }, { "epoch": 0.29900548176716574, "grad_norm": 0.3787118196487427, "learning_rate": 1.5903018343042797e-05, "loss": 0.2922, "step": 16560 }, { "epoch": 0.2990957612000664, "grad_norm": 0.3813949525356293, "learning_rate": 1.5900728739851568e-05, "loss": 0.2428, "step": 16565 }, { "epoch": 0.29918604063296717, "grad_norm": 0.6623268127441406, "learning_rate": 1.5898438661989615e-05, "loss": 0.3732, "step": 16570 }, { "epoch": 0.29927632006586785, "grad_norm": 0.5135146975517273, "learning_rate": 1.5896148109641157e-05, "loss": 0.2872, "step": 16575 }, { "epoch": 0.2993665994987686, "grad_norm": 0.36938363313674927, "learning_rate": 1.5893857082990455e-05, "loss": 0.312, "step": 16580 }, { "epoch": 0.2994568789316693, "grad_norm": 0.338520884513855, "learning_rate": 1.58915655822218e-05, "loss": 0.257, "step": 16585 }, { "epoch": 0.29954715836457, "grad_norm": 0.272137850522995, "learning_rate": 1.5889273607519535e-05, "loss": 0.2715, "step": 16590 }, { "epoch": 0.2996374377974707, "grad_norm": 0.46022340655326843, "learning_rate": 1.588698115906802e-05, "loss": 0.2981, "step": 16595 }, { "epoch": 0.29972771723037145, "grad_norm": 0.6358815431594849, "learning_rate": 1.5884688237051675e-05, "loss": 0.3019, "step": 16600 }, { "epoch": 0.29981799666327213, "grad_norm": 1.5296499729156494, "learning_rate": 1.588239484165495e-05, "loss": 0.2328, "step": 16605 }, { "epoch": 0.2999082760961729, "grad_norm": 0.49699029326438904, "learning_rate": 1.5880100973062326e-05, "loss": 0.2118, "step": 16610 }, { "epoch": 0.29999855552907356, "grad_norm": 0.5322579741477966, "learning_rate": 1.5877806631458334e-05, "loss": 0.4579, "step": 16615 }, { "epoch": 0.3000888349619743, "grad_norm": 0.356161504983902, "learning_rate": 1.5875511817027534e-05, "loss": 0.2937, "step": 16620 }, { "epoch": 0.300179114394875, "grad_norm": 0.45161619782447815, "learning_rate": 1.5873216529954528e-05, "loss": 0.2722, "step": 16625 }, { "epoch": 0.30026939382777573, "grad_norm": 1.0573439598083496, "learning_rate": 1.5870920770423956e-05, "loss": 0.3443, "step": 16630 }, { "epoch": 0.3003596732606764, "grad_norm": 0.6455829739570618, "learning_rate": 1.586862453862049e-05, "loss": 0.221, "step": 16635 }, { "epoch": 0.30044995269357716, "grad_norm": 0.332810640335083, "learning_rate": 1.5866327834728846e-05, "loss": 0.1939, "step": 16640 }, { "epoch": 0.3005402321264779, "grad_norm": 0.3890936076641083, "learning_rate": 1.5864030658933785e-05, "loss": 0.2534, "step": 16645 }, { "epoch": 0.3006305115593786, "grad_norm": 0.9418467283248901, "learning_rate": 1.5861733011420092e-05, "loss": 0.296, "step": 16650 }, { "epoch": 0.30072079099227933, "grad_norm": 0.5516576170921326, "learning_rate": 1.5859434892372597e-05, "loss": 0.316, "step": 16655 }, { "epoch": 0.30081107042518, "grad_norm": 0.5210039615631104, "learning_rate": 1.585713630197617e-05, "loss": 0.3231, "step": 16660 }, { "epoch": 0.30090134985808076, "grad_norm": 0.45455679297447205, "learning_rate": 1.5854837240415707e-05, "loss": 0.1983, "step": 16665 }, { "epoch": 0.30099162929098144, "grad_norm": 0.35978347063064575, "learning_rate": 1.5852537707876163e-05, "loss": 0.3051, "step": 16670 }, { "epoch": 0.3010819087238822, "grad_norm": 0.39413386583328247, "learning_rate": 1.5850237704542508e-05, "loss": 0.3355, "step": 16675 }, { "epoch": 0.30117218815678287, "grad_norm": 0.4914714992046356, "learning_rate": 1.584793723059977e-05, "loss": 0.2743, "step": 16680 }, { "epoch": 0.3012624675896836, "grad_norm": 0.41727781295776367, "learning_rate": 1.5845636286232993e-05, "loss": 0.274, "step": 16685 }, { "epoch": 0.3013527470225843, "grad_norm": 0.5112030506134033, "learning_rate": 1.5843334871627285e-05, "loss": 0.2437, "step": 16690 }, { "epoch": 0.30144302645548504, "grad_norm": 0.4653795063495636, "learning_rate": 1.5841032986967768e-05, "loss": 0.4123, "step": 16695 }, { "epoch": 0.3015333058883857, "grad_norm": 0.2604282796382904, "learning_rate": 1.583873063243962e-05, "loss": 0.2101, "step": 16700 }, { "epoch": 0.30162358532128647, "grad_norm": 0.3000047206878662, "learning_rate": 1.5836427808228037e-05, "loss": 0.2573, "step": 16705 }, { "epoch": 0.30171386475418716, "grad_norm": 0.5702049136161804, "learning_rate": 1.5834124514518275e-05, "loss": 0.3305, "step": 16710 }, { "epoch": 0.3018041441870879, "grad_norm": 0.7986447215080261, "learning_rate": 1.5831820751495618e-05, "loss": 0.2979, "step": 16715 }, { "epoch": 0.3018944236199886, "grad_norm": 0.5973787307739258, "learning_rate": 1.5829516519345382e-05, "loss": 0.2826, "step": 16720 }, { "epoch": 0.3019847030528893, "grad_norm": 0.3181029260158539, "learning_rate": 1.5827211818252922e-05, "loss": 0.2041, "step": 16725 }, { "epoch": 0.30207498248579, "grad_norm": 0.3650952875614166, "learning_rate": 1.5824906648403645e-05, "loss": 0.1462, "step": 16730 }, { "epoch": 0.30216526191869075, "grad_norm": 0.4607314169406891, "learning_rate": 1.5822601009982977e-05, "loss": 0.2993, "step": 16735 }, { "epoch": 0.30225554135159144, "grad_norm": 0.8629481792449951, "learning_rate": 1.582029490317639e-05, "loss": 0.3386, "step": 16740 }, { "epoch": 0.3023458207844922, "grad_norm": 0.8705028295516968, "learning_rate": 1.5817988328169397e-05, "loss": 0.2005, "step": 16745 }, { "epoch": 0.30243610021739287, "grad_norm": 0.5850492715835571, "learning_rate": 1.5815681285147543e-05, "loss": 0.2795, "step": 16750 }, { "epoch": 0.3025263796502936, "grad_norm": 0.43750256299972534, "learning_rate": 1.5813373774296415e-05, "loss": 0.3561, "step": 16755 }, { "epoch": 0.3026166590831943, "grad_norm": 0.4892030656337738, "learning_rate": 1.5811065795801633e-05, "loss": 0.3041, "step": 16760 }, { "epoch": 0.30270693851609504, "grad_norm": 0.7717058658599854, "learning_rate": 1.580875734984886e-05, "loss": 0.2017, "step": 16765 }, { "epoch": 0.3027972179489957, "grad_norm": 0.4515610635280609, "learning_rate": 1.580644843662379e-05, "loss": 0.2682, "step": 16770 }, { "epoch": 0.30288749738189646, "grad_norm": 0.5138480067253113, "learning_rate": 1.580413905631216e-05, "loss": 0.2458, "step": 16775 }, { "epoch": 0.30297777681479715, "grad_norm": 0.40732017159461975, "learning_rate": 1.5801829209099747e-05, "loss": 0.2704, "step": 16780 }, { "epoch": 0.3030680562476979, "grad_norm": 0.4371160566806793, "learning_rate": 1.5799518895172353e-05, "loss": 0.1914, "step": 16785 }, { "epoch": 0.3031583356805986, "grad_norm": 0.4059670567512512, "learning_rate": 1.5797208114715834e-05, "loss": 0.2064, "step": 16790 }, { "epoch": 0.3032486151134993, "grad_norm": 0.46853598952293396, "learning_rate": 1.579489686791607e-05, "loss": 0.3301, "step": 16795 }, { "epoch": 0.3033388945464, "grad_norm": 0.4065183699131012, "learning_rate": 1.5792585154958983e-05, "loss": 0.3527, "step": 16800 }, { "epoch": 0.30342917397930075, "grad_norm": 0.5132343173027039, "learning_rate": 1.579027297603054e-05, "loss": 0.2933, "step": 16805 }, { "epoch": 0.30351945341220143, "grad_norm": 0.5387590527534485, "learning_rate": 1.5787960331316737e-05, "loss": 0.2777, "step": 16810 }, { "epoch": 0.3036097328451022, "grad_norm": 0.451153963804245, "learning_rate": 1.5785647221003607e-05, "loss": 0.2794, "step": 16815 }, { "epoch": 0.30370001227800286, "grad_norm": 0.4658501446247101, "learning_rate": 1.578333364527722e-05, "loss": 0.2532, "step": 16820 }, { "epoch": 0.3037902917109036, "grad_norm": 0.6236963868141174, "learning_rate": 1.5781019604323695e-05, "loss": 0.3406, "step": 16825 }, { "epoch": 0.3038805711438043, "grad_norm": 0.5373722314834595, "learning_rate": 1.5778705098329174e-05, "loss": 0.2902, "step": 16830 }, { "epoch": 0.30397085057670503, "grad_norm": 0.5444965958595276, "learning_rate": 1.577639012747984e-05, "loss": 0.2481, "step": 16835 }, { "epoch": 0.3040611300096057, "grad_norm": 0.5736184120178223, "learning_rate": 1.577407469196192e-05, "loss": 0.2832, "step": 16840 }, { "epoch": 0.30415140944250646, "grad_norm": 0.3974739611148834, "learning_rate": 1.5771758791961676e-05, "loss": 0.2287, "step": 16845 }, { "epoch": 0.30424168887540715, "grad_norm": 0.3344295024871826, "learning_rate": 1.5769442427665397e-05, "loss": 0.2858, "step": 16850 }, { "epoch": 0.3043319683083079, "grad_norm": 0.3950170874595642, "learning_rate": 1.5767125599259424e-05, "loss": 0.347, "step": 16855 }, { "epoch": 0.3044222477412086, "grad_norm": 0.527312159538269, "learning_rate": 1.5764808306930127e-05, "loss": 0.2666, "step": 16860 }, { "epoch": 0.3045125271741093, "grad_norm": 0.46841877698898315, "learning_rate": 1.576249055086392e-05, "loss": 0.2881, "step": 16865 }, { "epoch": 0.30460280660701, "grad_norm": 1.189628005027771, "learning_rate": 1.5760172331247242e-05, "loss": 0.257, "step": 16870 }, { "epoch": 0.30469308603991074, "grad_norm": 0.5873405337333679, "learning_rate": 1.575785364826658e-05, "loss": 0.2372, "step": 16875 }, { "epoch": 0.30478336547281143, "grad_norm": 0.5122183561325073, "learning_rate": 1.5755534502108458e-05, "loss": 0.3921, "step": 16880 }, { "epoch": 0.30487364490571217, "grad_norm": 0.3151512145996094, "learning_rate": 1.575321489295943e-05, "loss": 0.1668, "step": 16885 }, { "epoch": 0.30496392433861286, "grad_norm": 0.4796990156173706, "learning_rate": 1.5750894821006094e-05, "loss": 0.2686, "step": 16890 }, { "epoch": 0.3050542037715136, "grad_norm": 0.44149377942085266, "learning_rate": 1.5748574286435083e-05, "loss": 0.3099, "step": 16895 }, { "epoch": 0.3051444832044143, "grad_norm": 0.4739304780960083, "learning_rate": 1.574625328943307e-05, "loss": 0.3619, "step": 16900 }, { "epoch": 0.305234762637315, "grad_norm": 0.47793883085250854, "learning_rate": 1.5743931830186754e-05, "loss": 0.3208, "step": 16905 }, { "epoch": 0.3053250420702157, "grad_norm": 0.29709920287132263, "learning_rate": 1.574160990888289e-05, "loss": 0.2187, "step": 16910 }, { "epoch": 0.30541532150311645, "grad_norm": 2.004121780395508, "learning_rate": 1.573928752570825e-05, "loss": 0.299, "step": 16915 }, { "epoch": 0.30550560093601714, "grad_norm": 0.721603274345398, "learning_rate": 1.573696468084966e-05, "loss": 0.278, "step": 16920 }, { "epoch": 0.3055958803689179, "grad_norm": 0.3206048905849457, "learning_rate": 1.573464137449397e-05, "loss": 0.27, "step": 16925 }, { "epoch": 0.30568615980181857, "grad_norm": 0.910554826259613, "learning_rate": 1.573231760682808e-05, "loss": 0.3167, "step": 16930 }, { "epoch": 0.3057764392347193, "grad_norm": 0.4243769943714142, "learning_rate": 1.5729993378038913e-05, "loss": 0.2846, "step": 16935 }, { "epoch": 0.30586671866762, "grad_norm": 0.4143887460231781, "learning_rate": 1.572766868831344e-05, "loss": 0.2467, "step": 16940 }, { "epoch": 0.30595699810052074, "grad_norm": 0.35781130194664, "learning_rate": 1.572534353783867e-05, "loss": 0.3657, "step": 16945 }, { "epoch": 0.3060472775334214, "grad_norm": 0.4417208135128021, "learning_rate": 1.5723017926801633e-05, "loss": 0.2778, "step": 16950 }, { "epoch": 0.30613755696632217, "grad_norm": 0.3955446481704712, "learning_rate": 1.5720691855389417e-05, "loss": 0.296, "step": 16955 }, { "epoch": 0.30622783639922285, "grad_norm": 0.34164705872535706, "learning_rate": 1.5718365323789135e-05, "loss": 0.2581, "step": 16960 }, { "epoch": 0.3063181158321236, "grad_norm": 0.3188674747943878, "learning_rate": 1.571603833218794e-05, "loss": 0.2536, "step": 16965 }, { "epoch": 0.3064083952650243, "grad_norm": 0.36604100465774536, "learning_rate": 1.571371088077302e-05, "loss": 0.193, "step": 16970 }, { "epoch": 0.306498674697925, "grad_norm": 0.46219056844711304, "learning_rate": 1.5711382969731603e-05, "loss": 0.3393, "step": 16975 }, { "epoch": 0.3065889541308257, "grad_norm": 0.4927115738391876, "learning_rate": 1.5709054599250954e-05, "loss": 0.2085, "step": 16980 }, { "epoch": 0.30667923356372645, "grad_norm": 0.3200385272502899, "learning_rate": 1.5706725769518365e-05, "loss": 0.2326, "step": 16985 }, { "epoch": 0.30676951299662714, "grad_norm": 0.4072023332118988, "learning_rate": 1.5704396480721183e-05, "loss": 0.2405, "step": 16990 }, { "epoch": 0.3068597924295279, "grad_norm": 0.41983669996261597, "learning_rate": 1.5702066733046782e-05, "loss": 0.4141, "step": 16995 }, { "epoch": 0.30695007186242856, "grad_norm": 0.4949229657649994, "learning_rate": 1.5699736526682566e-05, "loss": 0.3501, "step": 17000 }, { "epoch": 0.3070403512953293, "grad_norm": 0.574365496635437, "learning_rate": 1.569740586181599e-05, "loss": 0.2866, "step": 17005 }, { "epoch": 0.30713063072823, "grad_norm": 0.329974889755249, "learning_rate": 1.5695074738634536e-05, "loss": 0.314, "step": 17010 }, { "epoch": 0.30722091016113073, "grad_norm": 0.45943304896354675, "learning_rate": 1.569274315732573e-05, "loss": 0.2753, "step": 17015 }, { "epoch": 0.3073111895940314, "grad_norm": 0.5166605710983276, "learning_rate": 1.569041111807712e-05, "loss": 0.3967, "step": 17020 }, { "epoch": 0.30740146902693216, "grad_norm": 0.6126747727394104, "learning_rate": 1.5688078621076313e-05, "loss": 0.2758, "step": 17025 }, { "epoch": 0.30749174845983285, "grad_norm": 0.5386550426483154, "learning_rate": 1.5685745666510935e-05, "loss": 0.2913, "step": 17030 }, { "epoch": 0.3075820278927336, "grad_norm": 0.5282537937164307, "learning_rate": 1.568341225456866e-05, "loss": 0.3627, "step": 17035 }, { "epoch": 0.30767230732563433, "grad_norm": 1.0371299982070923, "learning_rate": 1.568107838543719e-05, "loss": 0.1561, "step": 17040 }, { "epoch": 0.307762586758535, "grad_norm": 0.54307621717453, "learning_rate": 1.5678744059304273e-05, "loss": 0.2988, "step": 17045 }, { "epoch": 0.30785286619143576, "grad_norm": 0.5720261335372925, "learning_rate": 1.567640927635768e-05, "loss": 0.2863, "step": 17050 }, { "epoch": 0.30794314562433645, "grad_norm": 0.42609718441963196, "learning_rate": 1.5674074036785233e-05, "loss": 0.216, "step": 17055 }, { "epoch": 0.3080334250572372, "grad_norm": 0.39350172877311707, "learning_rate": 1.5671738340774787e-05, "loss": 0.3556, "step": 17060 }, { "epoch": 0.3081237044901379, "grad_norm": 0.441972553730011, "learning_rate": 1.5669402188514228e-05, "loss": 0.2697, "step": 17065 }, { "epoch": 0.3082139839230386, "grad_norm": 0.5798181295394897, "learning_rate": 1.5667065580191482e-05, "loss": 0.3265, "step": 17070 }, { "epoch": 0.3083042633559393, "grad_norm": 0.4554998576641083, "learning_rate": 1.5664728515994514e-05, "loss": 0.3389, "step": 17075 }, { "epoch": 0.30839454278884004, "grad_norm": 0.3674794137477875, "learning_rate": 1.5662390996111322e-05, "loss": 0.2932, "step": 17080 }, { "epoch": 0.30848482222174073, "grad_norm": 0.6466832756996155, "learning_rate": 1.566005302072995e-05, "loss": 0.1986, "step": 17085 }, { "epoch": 0.30857510165464147, "grad_norm": 1.2485787868499756, "learning_rate": 1.565771459003846e-05, "loss": 0.2913, "step": 17090 }, { "epoch": 0.30866538108754216, "grad_norm": 1.4496780633926392, "learning_rate": 1.565537570422497e-05, "loss": 0.3014, "step": 17095 }, { "epoch": 0.3087556605204429, "grad_norm": 0.415077805519104, "learning_rate": 1.565303636347762e-05, "loss": 0.257, "step": 17100 }, { "epoch": 0.3088459399533436, "grad_norm": 0.35092994570732117, "learning_rate": 1.5650696567984595e-05, "loss": 0.2626, "step": 17105 }, { "epoch": 0.3089362193862443, "grad_norm": 0.5046998858451843, "learning_rate": 1.564835631793412e-05, "loss": 0.2361, "step": 17110 }, { "epoch": 0.309026498819145, "grad_norm": 0.46307122707366943, "learning_rate": 1.5646015613514444e-05, "loss": 0.249, "step": 17115 }, { "epoch": 0.30911677825204575, "grad_norm": 0.6530759930610657, "learning_rate": 1.564367445491386e-05, "loss": 0.2387, "step": 17120 }, { "epoch": 0.30920705768494644, "grad_norm": 0.36529412865638733, "learning_rate": 1.5641332842320706e-05, "loss": 0.272, "step": 17125 }, { "epoch": 0.3092973371178472, "grad_norm": 0.3701333999633789, "learning_rate": 1.5638990775923332e-05, "loss": 0.2612, "step": 17130 }, { "epoch": 0.30938761655074787, "grad_norm": 0.39277517795562744, "learning_rate": 1.5636648255910158e-05, "loss": 0.2861, "step": 17135 }, { "epoch": 0.3094778959836486, "grad_norm": 0.2821442782878876, "learning_rate": 1.5634305282469612e-05, "loss": 0.2338, "step": 17140 }, { "epoch": 0.3095681754165493, "grad_norm": 0.4453014135360718, "learning_rate": 1.563196185579017e-05, "loss": 0.2903, "step": 17145 }, { "epoch": 0.30965845484945004, "grad_norm": 0.4597513675689697, "learning_rate": 1.5629617976060345e-05, "loss": 0.279, "step": 17150 }, { "epoch": 0.3097487342823507, "grad_norm": 0.2701919376850128, "learning_rate": 1.5627273643468685e-05, "loss": 0.1459, "step": 17155 }, { "epoch": 0.30983901371525147, "grad_norm": 0.6496157646179199, "learning_rate": 1.5624928858203777e-05, "loss": 0.2051, "step": 17160 }, { "epoch": 0.30992929314815215, "grad_norm": 0.47751709818840027, "learning_rate": 1.5622583620454238e-05, "loss": 0.1716, "step": 17165 }, { "epoch": 0.3100195725810529, "grad_norm": 0.4866003096103668, "learning_rate": 1.5620237930408726e-05, "loss": 0.2241, "step": 17170 }, { "epoch": 0.3101098520139536, "grad_norm": 0.30454862117767334, "learning_rate": 1.5617891788255936e-05, "loss": 0.3589, "step": 17175 }, { "epoch": 0.3102001314468543, "grad_norm": 0.6282525658607483, "learning_rate": 1.56155451941846e-05, "loss": 0.3163, "step": 17180 }, { "epoch": 0.310290410879755, "grad_norm": 0.4587770700454712, "learning_rate": 1.5613198148383482e-05, "loss": 0.2203, "step": 17185 }, { "epoch": 0.31038069031265575, "grad_norm": 0.5046096444129944, "learning_rate": 1.561085065104138e-05, "loss": 0.374, "step": 17190 }, { "epoch": 0.31047096974555644, "grad_norm": 0.2771824598312378, "learning_rate": 1.560850270234714e-05, "loss": 0.1991, "step": 17195 }, { "epoch": 0.3105612491784572, "grad_norm": 0.4556228518486023, "learning_rate": 1.560615430248964e-05, "loss": 0.2416, "step": 17200 }, { "epoch": 0.31065152861135786, "grad_norm": 0.5162927508354187, "learning_rate": 1.560380545165778e-05, "loss": 0.2607, "step": 17205 }, { "epoch": 0.3107418080442586, "grad_norm": 0.3797825276851654, "learning_rate": 1.5601456150040522e-05, "loss": 0.1871, "step": 17210 }, { "epoch": 0.3108320874771593, "grad_norm": 0.3668143153190613, "learning_rate": 1.5599106397826836e-05, "loss": 0.1506, "step": 17215 }, { "epoch": 0.31092236691006003, "grad_norm": 0.2939102351665497, "learning_rate": 1.5596756195205755e-05, "loss": 0.2777, "step": 17220 }, { "epoch": 0.3110126463429607, "grad_norm": 0.34372422099113464, "learning_rate": 1.5594405542366327e-05, "loss": 0.2181, "step": 17225 }, { "epoch": 0.31110292577586146, "grad_norm": 0.8658391833305359, "learning_rate": 1.559205443949765e-05, "loss": 0.3776, "step": 17230 }, { "epoch": 0.31119320520876215, "grad_norm": 0.34222641587257385, "learning_rate": 1.5589702886788855e-05, "loss": 0.1955, "step": 17235 }, { "epoch": 0.3112834846416629, "grad_norm": 0.4252156913280487, "learning_rate": 1.55873508844291e-05, "loss": 0.2518, "step": 17240 }, { "epoch": 0.3113737640745636, "grad_norm": 1.5674724578857422, "learning_rate": 1.558499843260759e-05, "loss": 0.2681, "step": 17245 }, { "epoch": 0.3114640435074643, "grad_norm": 0.388078898191452, "learning_rate": 1.5582645531513563e-05, "loss": 0.3618, "step": 17250 }, { "epoch": 0.311554322940365, "grad_norm": 0.34436941146850586, "learning_rate": 1.5580292181336295e-05, "loss": 0.3266, "step": 17255 }, { "epoch": 0.31164460237326574, "grad_norm": 0.4869365394115448, "learning_rate": 1.557793838226509e-05, "loss": 0.2529, "step": 17260 }, { "epoch": 0.31173488180616643, "grad_norm": 0.4009968936443329, "learning_rate": 1.55755841344893e-05, "loss": 0.2321, "step": 17265 }, { "epoch": 0.3118251612390672, "grad_norm": 0.4688039720058441, "learning_rate": 1.5573229438198304e-05, "loss": 0.3298, "step": 17270 }, { "epoch": 0.31191544067196786, "grad_norm": 0.44214802980422974, "learning_rate": 1.5570874293581524e-05, "loss": 0.2518, "step": 17275 }, { "epoch": 0.3120057201048686, "grad_norm": 0.6073891520500183, "learning_rate": 1.556851870082841e-05, "loss": 0.3124, "step": 17280 }, { "epoch": 0.3120959995377693, "grad_norm": 0.5940367579460144, "learning_rate": 1.5566162660128452e-05, "loss": 0.2754, "step": 17285 }, { "epoch": 0.31218627897067003, "grad_norm": 0.4338489770889282, "learning_rate": 1.5563806171671177e-05, "loss": 0.2416, "step": 17290 }, { "epoch": 0.3122765584035707, "grad_norm": 0.4148912727832794, "learning_rate": 1.556144923564615e-05, "loss": 0.3408, "step": 17295 }, { "epoch": 0.31236683783647146, "grad_norm": 0.42692506313323975, "learning_rate": 1.5559091852242966e-05, "loss": 0.2837, "step": 17300 }, { "epoch": 0.31245711726937214, "grad_norm": 0.4609665274620056, "learning_rate": 1.5556734021651263e-05, "loss": 0.2777, "step": 17305 }, { "epoch": 0.3125473967022729, "grad_norm": 0.41917040944099426, "learning_rate": 1.555437574406071e-05, "loss": 0.2286, "step": 17310 }, { "epoch": 0.31263767613517357, "grad_norm": 0.8608391284942627, "learning_rate": 1.5552017019661013e-05, "loss": 0.2612, "step": 17315 }, { "epoch": 0.3127279555680743, "grad_norm": 0.49164366722106934, "learning_rate": 1.5549657848641907e-05, "loss": 0.3164, "step": 17320 }, { "epoch": 0.312818235000975, "grad_norm": 0.46910715103149414, "learning_rate": 1.5547298231193185e-05, "loss": 0.2725, "step": 17325 }, { "epoch": 0.31290851443387574, "grad_norm": 0.38566046953201294, "learning_rate": 1.554493816750465e-05, "loss": 0.2339, "step": 17330 }, { "epoch": 0.3129987938667764, "grad_norm": 0.38748466968536377, "learning_rate": 1.5542577657766152e-05, "loss": 0.2653, "step": 17335 }, { "epoch": 0.31308907329967717, "grad_norm": 0.3147033751010895, "learning_rate": 1.5540216702167587e-05, "loss": 0.3066, "step": 17340 }, { "epoch": 0.31317935273257785, "grad_norm": 0.3528516888618469, "learning_rate": 1.5537855300898862e-05, "loss": 0.3652, "step": 17345 }, { "epoch": 0.3132696321654786, "grad_norm": 0.43596094846725464, "learning_rate": 1.5535493454149947e-05, "loss": 0.3015, "step": 17350 }, { "epoch": 0.3133599115983793, "grad_norm": 0.6840492486953735, "learning_rate": 1.5533131162110827e-05, "loss": 0.1868, "step": 17355 }, { "epoch": 0.31345019103128, "grad_norm": 0.38821282982826233, "learning_rate": 1.553076842497154e-05, "loss": 0.196, "step": 17360 }, { "epoch": 0.3135404704641807, "grad_norm": 0.4120749831199646, "learning_rate": 1.5528405242922143e-05, "loss": 0.3502, "step": 17365 }, { "epoch": 0.31363074989708145, "grad_norm": 0.43744704127311707, "learning_rate": 1.552604161615274e-05, "loss": 0.2365, "step": 17370 }, { "epoch": 0.31372102932998214, "grad_norm": 0.31180235743522644, "learning_rate": 1.5523677544853464e-05, "loss": 0.28, "step": 17375 }, { "epoch": 0.3138113087628829, "grad_norm": 0.5090747475624084, "learning_rate": 1.5521313029214494e-05, "loss": 0.3136, "step": 17380 }, { "epoch": 0.31390158819578357, "grad_norm": 0.4170553982257843, "learning_rate": 1.5518948069426035e-05, "loss": 0.3178, "step": 17385 }, { "epoch": 0.3139918676286843, "grad_norm": 0.49591946601867676, "learning_rate": 1.5516582665678332e-05, "loss": 0.2441, "step": 17390 }, { "epoch": 0.314082147061585, "grad_norm": 1.1685659885406494, "learning_rate": 1.5514216818161658e-05, "loss": 0.3014, "step": 17395 }, { "epoch": 0.31417242649448573, "grad_norm": 0.23236806690692902, "learning_rate": 1.5511850527066336e-05, "loss": 0.3061, "step": 17400 }, { "epoch": 0.3142627059273864, "grad_norm": 0.37775471806526184, "learning_rate": 1.5509483792582713e-05, "loss": 0.2792, "step": 17405 }, { "epoch": 0.31435298536028716, "grad_norm": 0.6125426292419434, "learning_rate": 1.5507116614901176e-05, "loss": 0.2237, "step": 17410 }, { "epoch": 0.31444326479318785, "grad_norm": 0.626716673374176, "learning_rate": 1.5504748994212147e-05, "loss": 0.2518, "step": 17415 }, { "epoch": 0.3145335442260886, "grad_norm": 0.5543079972267151, "learning_rate": 1.550238093070609e-05, "loss": 0.2795, "step": 17420 }, { "epoch": 0.3146238236589893, "grad_norm": 0.35237279534339905, "learning_rate": 1.550001242457349e-05, "loss": 0.2229, "step": 17425 }, { "epoch": 0.31471410309189, "grad_norm": 0.5148335695266724, "learning_rate": 1.549764347600488e-05, "loss": 0.3217, "step": 17430 }, { "epoch": 0.3148043825247907, "grad_norm": 0.38885366916656494, "learning_rate": 1.5495274085190823e-05, "loss": 0.1891, "step": 17435 }, { "epoch": 0.31489466195769145, "grad_norm": 0.46232593059539795, "learning_rate": 1.549290425232192e-05, "loss": 0.358, "step": 17440 }, { "epoch": 0.3149849413905922, "grad_norm": 0.40540289878845215, "learning_rate": 1.549053397758881e-05, "loss": 0.2707, "step": 17445 }, { "epoch": 0.3150752208234929, "grad_norm": 0.47470027208328247, "learning_rate": 1.548816326118216e-05, "loss": 0.2484, "step": 17450 }, { "epoch": 0.3151655002563936, "grad_norm": 0.5260505080223083, "learning_rate": 1.5485792103292678e-05, "loss": 0.195, "step": 17455 }, { "epoch": 0.3152557796892943, "grad_norm": 0.4604606330394745, "learning_rate": 1.548342050411111e-05, "loss": 0.2525, "step": 17460 }, { "epoch": 0.31534605912219504, "grad_norm": 0.5449261665344238, "learning_rate": 1.5481048463828226e-05, "loss": 0.2357, "step": 17465 }, { "epoch": 0.31543633855509573, "grad_norm": 0.34216904640197754, "learning_rate": 1.5478675982634844e-05, "loss": 0.2595, "step": 17470 }, { "epoch": 0.31552661798799647, "grad_norm": 0.6009491086006165, "learning_rate": 1.5476303060721818e-05, "loss": 0.3101, "step": 17475 }, { "epoch": 0.31561689742089716, "grad_norm": 0.6484291553497314, "learning_rate": 1.5473929698280023e-05, "loss": 0.2956, "step": 17480 }, { "epoch": 0.3157071768537979, "grad_norm": 0.48652103543281555, "learning_rate": 1.5471555895500382e-05, "loss": 0.2512, "step": 17485 }, { "epoch": 0.3157974562866986, "grad_norm": 0.45876580476760864, "learning_rate": 1.5469181652573856e-05, "loss": 0.2578, "step": 17490 }, { "epoch": 0.3158877357195993, "grad_norm": 0.4625091850757599, "learning_rate": 1.5466806969691428e-05, "loss": 0.2697, "step": 17495 }, { "epoch": 0.3159780151525, "grad_norm": 2.235440492630005, "learning_rate": 1.5464431847044127e-05, "loss": 0.2677, "step": 17500 }, { "epoch": 0.31606829458540076, "grad_norm": 0.9531151652336121, "learning_rate": 1.5462056284823016e-05, "loss": 0.2773, "step": 17505 }, { "epoch": 0.31615857401830144, "grad_norm": 0.6140753030776978, "learning_rate": 1.545968028321919e-05, "loss": 0.2744, "step": 17510 }, { "epoch": 0.3162488534512022, "grad_norm": 0.3872118294239044, "learning_rate": 1.5457303842423777e-05, "loss": 0.278, "step": 17515 }, { "epoch": 0.31633913288410287, "grad_norm": 1.6485217809677124, "learning_rate": 1.5454926962627948e-05, "loss": 0.2587, "step": 17520 }, { "epoch": 0.3164294123170036, "grad_norm": 0.3472902178764343, "learning_rate": 1.545254964402291e-05, "loss": 0.2899, "step": 17525 }, { "epoch": 0.3165196917499043, "grad_norm": 0.3585287630558014, "learning_rate": 1.5450171886799898e-05, "loss": 0.2406, "step": 17530 }, { "epoch": 0.31660997118280504, "grad_norm": 0.5434662699699402, "learning_rate": 1.544779369115018e-05, "loss": 0.3442, "step": 17535 }, { "epoch": 0.3167002506157057, "grad_norm": 0.5494941473007202, "learning_rate": 1.5445415057265072e-05, "loss": 0.3096, "step": 17540 }, { "epoch": 0.31679053004860647, "grad_norm": 0.3780425190925598, "learning_rate": 1.5443035985335914e-05, "loss": 0.2801, "step": 17545 }, { "epoch": 0.31688080948150715, "grad_norm": 0.2784373164176941, "learning_rate": 1.5440656475554084e-05, "loss": 0.2165, "step": 17550 }, { "epoch": 0.3169710889144079, "grad_norm": 0.42795300483703613, "learning_rate": 1.5438276528111e-05, "loss": 0.3302, "step": 17555 }, { "epoch": 0.3170613683473086, "grad_norm": 0.4713800549507141, "learning_rate": 1.5435896143198107e-05, "loss": 0.2392, "step": 17560 }, { "epoch": 0.3171516477802093, "grad_norm": 0.4960048794746399, "learning_rate": 1.5433515321006895e-05, "loss": 0.3038, "step": 17565 }, { "epoch": 0.31724192721311, "grad_norm": 0.275836318731308, "learning_rate": 1.543113406172888e-05, "loss": 0.1655, "step": 17570 }, { "epoch": 0.31733220664601075, "grad_norm": 0.4332798719406128, "learning_rate": 1.5428752365555617e-05, "loss": 0.3069, "step": 17575 }, { "epoch": 0.31742248607891144, "grad_norm": 0.2804764211177826, "learning_rate": 1.5426370232678697e-05, "loss": 0.2446, "step": 17580 }, { "epoch": 0.3175127655118122, "grad_norm": 0.5299254059791565, "learning_rate": 1.5423987663289747e-05, "loss": 0.2971, "step": 17585 }, { "epoch": 0.31760304494471286, "grad_norm": 0.4273294508457184, "learning_rate": 1.542160465758042e-05, "loss": 0.2914, "step": 17590 }, { "epoch": 0.3176933243776136, "grad_norm": 0.35984429717063904, "learning_rate": 1.5419221215742423e-05, "loss": 0.2972, "step": 17595 }, { "epoch": 0.3177836038105143, "grad_norm": 0.5424455404281616, "learning_rate": 1.541683733796748e-05, "loss": 0.276, "step": 17600 }, { "epoch": 0.31787388324341503, "grad_norm": 0.4359292984008789, "learning_rate": 1.5414453024447354e-05, "loss": 0.3098, "step": 17605 }, { "epoch": 0.3179641626763157, "grad_norm": 0.570429265499115, "learning_rate": 1.5412068275373854e-05, "loss": 0.3422, "step": 17610 }, { "epoch": 0.31805444210921646, "grad_norm": 0.43146470189094543, "learning_rate": 1.540968309093881e-05, "loss": 0.296, "step": 17615 }, { "epoch": 0.31814472154211715, "grad_norm": 0.5638782978057861, "learning_rate": 1.540729747133409e-05, "loss": 0.3522, "step": 17620 }, { "epoch": 0.3182350009750179, "grad_norm": 0.31060877442359924, "learning_rate": 1.5404911416751607e-05, "loss": 0.2625, "step": 17625 }, { "epoch": 0.3183252804079186, "grad_norm": 0.5508368015289307, "learning_rate": 1.5402524927383295e-05, "loss": 0.3542, "step": 17630 }, { "epoch": 0.3184155598408193, "grad_norm": 0.6269552707672119, "learning_rate": 1.5400138003421135e-05, "loss": 0.2714, "step": 17635 }, { "epoch": 0.31850583927372, "grad_norm": 0.656037449836731, "learning_rate": 1.5397750645057135e-05, "loss": 0.3294, "step": 17640 }, { "epoch": 0.31859611870662075, "grad_norm": 0.45344042778015137, "learning_rate": 1.539536285248334e-05, "loss": 0.3146, "step": 17645 }, { "epoch": 0.31868639813952143, "grad_norm": 0.4088731110095978, "learning_rate": 1.5392974625891834e-05, "loss": 0.2511, "step": 17650 }, { "epoch": 0.3187766775724222, "grad_norm": 0.44984954595565796, "learning_rate": 1.5390585965474727e-05, "loss": 0.2528, "step": 17655 }, { "epoch": 0.31886695700532286, "grad_norm": 0.35342809557914734, "learning_rate": 1.538819687142417e-05, "loss": 0.2453, "step": 17660 }, { "epoch": 0.3189572364382236, "grad_norm": 0.45350322127342224, "learning_rate": 1.5385807343932358e-05, "loss": 0.2969, "step": 17665 }, { "epoch": 0.3190475158711243, "grad_norm": 0.8516509532928467, "learning_rate": 1.53834173831915e-05, "loss": 0.2054, "step": 17670 }, { "epoch": 0.31913779530402503, "grad_norm": 0.8284174203872681, "learning_rate": 1.5381026989393854e-05, "loss": 0.3415, "step": 17675 }, { "epoch": 0.3192280747369257, "grad_norm": 0.39702773094177246, "learning_rate": 1.537863616273171e-05, "loss": 0.2738, "step": 17680 }, { "epoch": 0.31931835416982646, "grad_norm": 0.32880184054374695, "learning_rate": 1.5376244903397397e-05, "loss": 0.2103, "step": 17685 }, { "epoch": 0.31940863360272714, "grad_norm": 0.3985629975795746, "learning_rate": 1.5373853211583267e-05, "loss": 0.2858, "step": 17690 }, { "epoch": 0.3194989130356279, "grad_norm": 0.397734671831131, "learning_rate": 1.537146108748172e-05, "loss": 0.3038, "step": 17695 }, { "epoch": 0.31958919246852857, "grad_norm": 0.45787420868873596, "learning_rate": 1.536906853128518e-05, "loss": 0.2162, "step": 17700 }, { "epoch": 0.3196794719014293, "grad_norm": 0.32149168848991394, "learning_rate": 1.5366675543186117e-05, "loss": 0.2645, "step": 17705 }, { "epoch": 0.31976975133433, "grad_norm": 0.5299479365348816, "learning_rate": 1.5364282123377022e-05, "loss": 0.3511, "step": 17710 }, { "epoch": 0.31986003076723074, "grad_norm": 0.4254346489906311, "learning_rate": 1.5361888272050435e-05, "loss": 0.2054, "step": 17715 }, { "epoch": 0.3199503102001314, "grad_norm": 0.6408798098564148, "learning_rate": 1.5359493989398923e-05, "loss": 0.3001, "step": 17720 }, { "epoch": 0.32004058963303217, "grad_norm": 0.3622814416885376, "learning_rate": 1.5357099275615085e-05, "loss": 0.2692, "step": 17725 }, { "epoch": 0.32013086906593285, "grad_norm": 0.7824422717094421, "learning_rate": 1.535470413089156e-05, "loss": 0.2724, "step": 17730 }, { "epoch": 0.3202211484988336, "grad_norm": 0.4633120596408844, "learning_rate": 1.5352308555421022e-05, "loss": 0.2852, "step": 17735 }, { "epoch": 0.3203114279317343, "grad_norm": 0.47430092096328735, "learning_rate": 1.534991254939617e-05, "loss": 0.3123, "step": 17740 }, { "epoch": 0.320401707364635, "grad_norm": 0.38343843817710876, "learning_rate": 1.5347516113009756e-05, "loss": 0.3564, "step": 17745 }, { "epoch": 0.3204919867975357, "grad_norm": 0.5076741576194763, "learning_rate": 1.534511924645455e-05, "loss": 0.2177, "step": 17750 }, { "epoch": 0.32058226623043645, "grad_norm": 0.35495322942733765, "learning_rate": 1.5342721949923363e-05, "loss": 0.3111, "step": 17755 }, { "epoch": 0.32067254566333714, "grad_norm": 0.5616178512573242, "learning_rate": 1.534032422360904e-05, "loss": 0.3554, "step": 17760 }, { "epoch": 0.3207628250962379, "grad_norm": 0.5561057329177856, "learning_rate": 1.533792606770446e-05, "loss": 0.4187, "step": 17765 }, { "epoch": 0.32085310452913857, "grad_norm": 0.3615930676460266, "learning_rate": 1.5335527482402543e-05, "loss": 0.225, "step": 17770 }, { "epoch": 0.3209433839620393, "grad_norm": 0.7328411936759949, "learning_rate": 1.5333128467896227e-05, "loss": 0.281, "step": 17775 }, { "epoch": 0.32103366339494, "grad_norm": 0.9044609665870667, "learning_rate": 1.5330729024378504e-05, "loss": 0.2768, "step": 17780 }, { "epoch": 0.32112394282784074, "grad_norm": 0.7998683452606201, "learning_rate": 1.532832915204239e-05, "loss": 0.2857, "step": 17785 }, { "epoch": 0.3212142222607414, "grad_norm": 0.4175611138343811, "learning_rate": 1.5325928851080936e-05, "loss": 0.2223, "step": 17790 }, { "epoch": 0.32130450169364216, "grad_norm": 0.7397657632827759, "learning_rate": 1.532352812168723e-05, "loss": 0.268, "step": 17795 }, { "epoch": 0.32139478112654285, "grad_norm": 0.30127376317977905, "learning_rate": 1.5321126964054394e-05, "loss": 0.3168, "step": 17800 }, { "epoch": 0.3214850605594436, "grad_norm": 0.2943519353866577, "learning_rate": 1.531872537837558e-05, "loss": 0.2549, "step": 17805 }, { "epoch": 0.3215753399923443, "grad_norm": 0.9851113557815552, "learning_rate": 1.5316323364843976e-05, "loss": 0.1874, "step": 17810 }, { "epoch": 0.321665619425245, "grad_norm": 0.6844465136528015, "learning_rate": 1.5313920923652817e-05, "loss": 0.2449, "step": 17815 }, { "epoch": 0.3217558988581457, "grad_norm": 0.48671990633010864, "learning_rate": 1.5311518054995352e-05, "loss": 0.341, "step": 17820 }, { "epoch": 0.32184617829104645, "grad_norm": 0.25424695014953613, "learning_rate": 1.530911475906488e-05, "loss": 0.3965, "step": 17825 }, { "epoch": 0.32193645772394713, "grad_norm": 0.36367541551589966, "learning_rate": 1.5306711036054727e-05, "loss": 0.3291, "step": 17830 }, { "epoch": 0.3220267371568479, "grad_norm": 0.5668572187423706, "learning_rate": 1.5304306886158254e-05, "loss": 0.174, "step": 17835 }, { "epoch": 0.3221170165897486, "grad_norm": 0.3800593912601471, "learning_rate": 1.5301902309568854e-05, "loss": 0.2872, "step": 17840 }, { "epoch": 0.3222072960226493, "grad_norm": 0.4047221541404724, "learning_rate": 1.5299497306479966e-05, "loss": 0.3493, "step": 17845 }, { "epoch": 0.32229757545555004, "grad_norm": 0.32829201221466064, "learning_rate": 1.529709187708505e-05, "loss": 0.2295, "step": 17850 }, { "epoch": 0.32238785488845073, "grad_norm": 0.40165066719055176, "learning_rate": 1.5294686021577608e-05, "loss": 0.247, "step": 17855 }, { "epoch": 0.3224781343213515, "grad_norm": 0.31915032863616943, "learning_rate": 1.5292279740151167e-05, "loss": 0.2794, "step": 17860 }, { "epoch": 0.32256841375425216, "grad_norm": 0.5519475340843201, "learning_rate": 1.5289873032999306e-05, "loss": 0.268, "step": 17865 }, { "epoch": 0.3226586931871529, "grad_norm": 0.49204424023628235, "learning_rate": 1.5287465900315616e-05, "loss": 0.2668, "step": 17870 }, { "epoch": 0.3227489726200536, "grad_norm": 0.3488686680793762, "learning_rate": 1.5285058342293738e-05, "loss": 0.17, "step": 17875 }, { "epoch": 0.32283925205295433, "grad_norm": 0.5201186537742615, "learning_rate": 1.528265035912734e-05, "loss": 0.2825, "step": 17880 }, { "epoch": 0.322929531485855, "grad_norm": 0.3380523920059204, "learning_rate": 1.5280241951010137e-05, "loss": 0.2673, "step": 17885 }, { "epoch": 0.32301981091875576, "grad_norm": 0.4553193151950836, "learning_rate": 1.5277833118135853e-05, "loss": 0.3188, "step": 17890 }, { "epoch": 0.32311009035165644, "grad_norm": 0.49805140495300293, "learning_rate": 1.5275423860698268e-05, "loss": 0.2471, "step": 17895 }, { "epoch": 0.3232003697845572, "grad_norm": 0.4507269859313965, "learning_rate": 1.5273014178891196e-05, "loss": 0.1797, "step": 17900 }, { "epoch": 0.32329064921745787, "grad_norm": 0.5158857107162476, "learning_rate": 1.5270604072908466e-05, "loss": 0.2785, "step": 17905 }, { "epoch": 0.3233809286503586, "grad_norm": 0.4525963366031647, "learning_rate": 1.526819354294396e-05, "loss": 0.2833, "step": 17910 }, { "epoch": 0.3234712080832593, "grad_norm": 0.46041014790534973, "learning_rate": 1.5265782589191588e-05, "loss": 0.216, "step": 17915 }, { "epoch": 0.32356148751616004, "grad_norm": 0.46751755475997925, "learning_rate": 1.5263371211845296e-05, "loss": 0.2792, "step": 17920 }, { "epoch": 0.3236517669490607, "grad_norm": 0.4012288451194763, "learning_rate": 1.5260959411099055e-05, "loss": 0.339, "step": 17925 }, { "epoch": 0.32374204638196147, "grad_norm": 0.391625314950943, "learning_rate": 1.525854718714688e-05, "loss": 0.2954, "step": 17930 }, { "epoch": 0.32383232581486215, "grad_norm": 0.6322211027145386, "learning_rate": 1.5256134540182817e-05, "loss": 0.2814, "step": 17935 }, { "epoch": 0.3239226052477629, "grad_norm": 0.4969169795513153, "learning_rate": 1.5253721470400947e-05, "loss": 0.387, "step": 17940 }, { "epoch": 0.3240128846806636, "grad_norm": 1.0259813070297241, "learning_rate": 1.5251307977995383e-05, "loss": 0.2878, "step": 17945 }, { "epoch": 0.3241031641135643, "grad_norm": 0.7180622816085815, "learning_rate": 1.5248894063160271e-05, "loss": 0.2033, "step": 17950 }, { "epoch": 0.324193443546465, "grad_norm": 0.49255871772766113, "learning_rate": 1.5246479726089796e-05, "loss": 0.3112, "step": 17955 }, { "epoch": 0.32428372297936575, "grad_norm": 0.353839248418808, "learning_rate": 1.5244064966978171e-05, "loss": 0.2683, "step": 17960 }, { "epoch": 0.32437400241226644, "grad_norm": 0.6729487776756287, "learning_rate": 1.5241649786019652e-05, "loss": 0.2065, "step": 17965 }, { "epoch": 0.3244642818451672, "grad_norm": 0.44503656029701233, "learning_rate": 1.5239234183408513e-05, "loss": 0.2631, "step": 17970 }, { "epoch": 0.32455456127806787, "grad_norm": 0.31708043813705444, "learning_rate": 1.523681815933908e-05, "loss": 0.2517, "step": 17975 }, { "epoch": 0.3246448407109686, "grad_norm": 0.4860188364982605, "learning_rate": 1.5234401714005702e-05, "loss": 0.2687, "step": 17980 }, { "epoch": 0.3247351201438693, "grad_norm": 0.4818451702594757, "learning_rate": 1.5231984847602764e-05, "loss": 0.243, "step": 17985 }, { "epoch": 0.32482539957677004, "grad_norm": 0.4228139817714691, "learning_rate": 1.522956756032468e-05, "loss": 0.1765, "step": 17990 }, { "epoch": 0.3249156790096707, "grad_norm": 0.47641831636428833, "learning_rate": 1.5227149852365915e-05, "loss": 0.2793, "step": 17995 }, { "epoch": 0.32500595844257146, "grad_norm": 0.4192675054073334, "learning_rate": 1.5224731723920947e-05, "loss": 0.2514, "step": 18000 }, { "epoch": 0.32509623787547215, "grad_norm": 0.4162924885749817, "learning_rate": 1.52223131751843e-05, "loss": 0.3272, "step": 18005 }, { "epoch": 0.3251865173083729, "grad_norm": 0.43204668164253235, "learning_rate": 1.5219894206350528e-05, "loss": 0.31, "step": 18010 }, { "epoch": 0.3252767967412736, "grad_norm": 0.4375188648700714, "learning_rate": 1.5217474817614217e-05, "loss": 0.3487, "step": 18015 }, { "epoch": 0.3253670761741743, "grad_norm": 0.746900200843811, "learning_rate": 1.521505500916999e-05, "loss": 0.252, "step": 18020 }, { "epoch": 0.325457355607075, "grad_norm": 1.7763670682907104, "learning_rate": 1.521263478121251e-05, "loss": 0.3135, "step": 18025 }, { "epoch": 0.32554763503997575, "grad_norm": 0.5207912921905518, "learning_rate": 1.5210214133936458e-05, "loss": 0.2825, "step": 18030 }, { "epoch": 0.32563791447287643, "grad_norm": 0.4232496917247772, "learning_rate": 1.520779306753656e-05, "loss": 0.3172, "step": 18035 }, { "epoch": 0.3257281939057772, "grad_norm": 0.47740769386291504, "learning_rate": 1.5205371582207577e-05, "loss": 0.3233, "step": 18040 }, { "epoch": 0.32581847333867786, "grad_norm": 0.3207866847515106, "learning_rate": 1.5202949678144297e-05, "loss": 0.2103, "step": 18045 }, { "epoch": 0.3259087527715786, "grad_norm": 0.5160874128341675, "learning_rate": 1.5200527355541543e-05, "loss": 0.3405, "step": 18050 }, { "epoch": 0.3259990322044793, "grad_norm": 0.47352078557014465, "learning_rate": 1.5198104614594174e-05, "loss": 0.2959, "step": 18055 }, { "epoch": 0.32608931163738003, "grad_norm": 0.28415295481681824, "learning_rate": 1.5195681455497084e-05, "loss": 0.3159, "step": 18060 }, { "epoch": 0.3261795910702807, "grad_norm": 0.3160787522792816, "learning_rate": 1.5193257878445198e-05, "loss": 0.2751, "step": 18065 }, { "epoch": 0.32626987050318146, "grad_norm": 0.3366694748401642, "learning_rate": 1.5190833883633472e-05, "loss": 0.2634, "step": 18070 }, { "epoch": 0.32636014993608214, "grad_norm": 0.49434515833854675, "learning_rate": 1.5188409471256902e-05, "loss": 0.2425, "step": 18075 }, { "epoch": 0.3264504293689829, "grad_norm": 0.8601022958755493, "learning_rate": 1.5185984641510514e-05, "loss": 0.309, "step": 18080 }, { "epoch": 0.3265407088018836, "grad_norm": 0.48108533024787903, "learning_rate": 1.5183559394589366e-05, "loss": 0.3005, "step": 18085 }, { "epoch": 0.3266309882347843, "grad_norm": 0.4106110632419586, "learning_rate": 1.5181133730688553e-05, "loss": 0.404, "step": 18090 }, { "epoch": 0.326721267667685, "grad_norm": 0.4805768132209778, "learning_rate": 1.5178707650003204e-05, "loss": 0.3485, "step": 18095 }, { "epoch": 0.32681154710058574, "grad_norm": 0.308956503868103, "learning_rate": 1.5176281152728479e-05, "loss": 0.2114, "step": 18100 }, { "epoch": 0.32690182653348643, "grad_norm": 0.26499325037002563, "learning_rate": 1.5173854239059565e-05, "loss": 0.2471, "step": 18105 }, { "epoch": 0.32699210596638717, "grad_norm": 0.3078134059906006, "learning_rate": 1.5171426909191697e-05, "loss": 0.2572, "step": 18110 }, { "epoch": 0.32708238539928786, "grad_norm": 0.33887648582458496, "learning_rate": 1.5168999163320135e-05, "loss": 0.2228, "step": 18115 }, { "epoch": 0.3271726648321886, "grad_norm": 0.287139356136322, "learning_rate": 1.5166571001640171e-05, "loss": 0.1838, "step": 18120 }, { "epoch": 0.3272629442650893, "grad_norm": 1.0340220928192139, "learning_rate": 1.5164142424347136e-05, "loss": 0.2525, "step": 18125 }, { "epoch": 0.32735322369799, "grad_norm": 0.3287242650985718, "learning_rate": 1.5161713431636389e-05, "loss": 0.2641, "step": 18130 }, { "epoch": 0.3274435031308907, "grad_norm": 0.4560239613056183, "learning_rate": 1.5159284023703326e-05, "loss": 0.2862, "step": 18135 }, { "epoch": 0.32753378256379145, "grad_norm": 0.5136450529098511, "learning_rate": 1.5156854200743376e-05, "loss": 0.3416, "step": 18140 }, { "epoch": 0.32762406199669214, "grad_norm": 0.7004572153091431, "learning_rate": 1.5154423962951998e-05, "loss": 0.2566, "step": 18145 }, { "epoch": 0.3277143414295929, "grad_norm": 0.5109156966209412, "learning_rate": 1.5151993310524692e-05, "loss": 0.3552, "step": 18150 }, { "epoch": 0.32780462086249357, "grad_norm": 0.3315854072570801, "learning_rate": 1.5149562243656977e-05, "loss": 0.3227, "step": 18155 }, { "epoch": 0.3278949002953943, "grad_norm": 0.4538090229034424, "learning_rate": 1.5147130762544424e-05, "loss": 0.3818, "step": 18160 }, { "epoch": 0.327985179728295, "grad_norm": 0.46064648032188416, "learning_rate": 1.5144698867382626e-05, "loss": 0.2542, "step": 18165 }, { "epoch": 0.32807545916119574, "grad_norm": 0.41564473509788513, "learning_rate": 1.5142266558367205e-05, "loss": 0.2961, "step": 18170 }, { "epoch": 0.3281657385940964, "grad_norm": 0.3369181752204895, "learning_rate": 1.513983383569383e-05, "loss": 0.2766, "step": 18175 }, { "epoch": 0.32825601802699717, "grad_norm": 0.5861334204673767, "learning_rate": 1.5137400699558194e-05, "loss": 0.2411, "step": 18180 }, { "epoch": 0.32834629745989785, "grad_norm": 0.6709774136543274, "learning_rate": 1.5134967150156024e-05, "loss": 0.3143, "step": 18185 }, { "epoch": 0.3284365768927986, "grad_norm": 0.3815455734729767, "learning_rate": 1.513253318768308e-05, "loss": 0.2594, "step": 18190 }, { "epoch": 0.3285268563256993, "grad_norm": 0.5351514220237732, "learning_rate": 1.513009881233516e-05, "loss": 0.253, "step": 18195 }, { "epoch": 0.3286171357586, "grad_norm": 0.4658789038658142, "learning_rate": 1.5127664024308086e-05, "loss": 0.1936, "step": 18200 }, { "epoch": 0.3287074151915007, "grad_norm": 0.2493811547756195, "learning_rate": 1.5125228823797726e-05, "loss": 0.1633, "step": 18205 }, { "epoch": 0.32879769462440145, "grad_norm": 0.4948270320892334, "learning_rate": 1.5122793210999972e-05, "loss": 0.2486, "step": 18210 }, { "epoch": 0.32888797405730213, "grad_norm": 0.4731934368610382, "learning_rate": 1.5120357186110747e-05, "loss": 0.2792, "step": 18215 }, { "epoch": 0.3289782534902029, "grad_norm": 0.4198252558708191, "learning_rate": 1.5117920749326019e-05, "loss": 0.281, "step": 18220 }, { "epoch": 0.32906853292310356, "grad_norm": 0.4379923641681671, "learning_rate": 1.5115483900841774e-05, "loss": 0.2259, "step": 18225 }, { "epoch": 0.3291588123560043, "grad_norm": 0.4433564841747284, "learning_rate": 1.5113046640854045e-05, "loss": 0.2752, "step": 18230 }, { "epoch": 0.32924909178890505, "grad_norm": 0.33808523416519165, "learning_rate": 1.5110608969558888e-05, "loss": 0.1777, "step": 18235 }, { "epoch": 0.32933937122180573, "grad_norm": 0.2609556317329407, "learning_rate": 1.51081708871524e-05, "loss": 0.2747, "step": 18240 }, { "epoch": 0.3294296506547065, "grad_norm": 1.2248449325561523, "learning_rate": 1.51057323938307e-05, "loss": 0.3077, "step": 18245 }, { "epoch": 0.32951993008760716, "grad_norm": 0.5488759279251099, "learning_rate": 1.5103293489789953e-05, "loss": 0.3706, "step": 18250 }, { "epoch": 0.3296102095205079, "grad_norm": 0.6112303137779236, "learning_rate": 1.510085417522635e-05, "loss": 0.2726, "step": 18255 }, { "epoch": 0.3297004889534086, "grad_norm": 0.45051974058151245, "learning_rate": 1.5098414450336114e-05, "loss": 0.1565, "step": 18260 }, { "epoch": 0.32979076838630933, "grad_norm": 0.6409130096435547, "learning_rate": 1.5095974315315505e-05, "loss": 0.2526, "step": 18265 }, { "epoch": 0.32988104781921, "grad_norm": 0.4389084279537201, "learning_rate": 1.5093533770360813e-05, "loss": 0.3013, "step": 18270 }, { "epoch": 0.32997132725211076, "grad_norm": 0.4817896783351898, "learning_rate": 1.5091092815668362e-05, "loss": 0.2014, "step": 18275 }, { "epoch": 0.33006160668501144, "grad_norm": 0.4563262462615967, "learning_rate": 1.5088651451434515e-05, "loss": 0.2679, "step": 18280 }, { "epoch": 0.3301518861179122, "grad_norm": 0.5780072212219238, "learning_rate": 1.5086209677855652e-05, "loss": 0.2654, "step": 18285 }, { "epoch": 0.33024216555081287, "grad_norm": 0.6199705600738525, "learning_rate": 1.50837674951282e-05, "loss": 0.2326, "step": 18290 }, { "epoch": 0.3303324449837136, "grad_norm": 0.9827958941459656, "learning_rate": 1.508132490344862e-05, "loss": 0.3321, "step": 18295 }, { "epoch": 0.3304227244166143, "grad_norm": 0.4238829016685486, "learning_rate": 1.5078881903013392e-05, "loss": 0.2209, "step": 18300 }, { "epoch": 0.33051300384951504, "grad_norm": 0.459552139043808, "learning_rate": 1.5076438494019043e-05, "loss": 0.3734, "step": 18305 }, { "epoch": 0.3306032832824157, "grad_norm": 0.40297162532806396, "learning_rate": 1.507399467666213e-05, "loss": 0.2856, "step": 18310 }, { "epoch": 0.33069356271531647, "grad_norm": 0.4808918237686157, "learning_rate": 1.5071550451139234e-05, "loss": 0.2648, "step": 18315 }, { "epoch": 0.33078384214821716, "grad_norm": 0.5772596597671509, "learning_rate": 1.5069105817646977e-05, "loss": 0.2349, "step": 18320 }, { "epoch": 0.3308741215811179, "grad_norm": 0.407766193151474, "learning_rate": 1.5066660776382014e-05, "loss": 0.3459, "step": 18325 }, { "epoch": 0.3309644010140186, "grad_norm": 0.46875712275505066, "learning_rate": 1.506421532754103e-05, "loss": 0.2846, "step": 18330 }, { "epoch": 0.3310546804469193, "grad_norm": 0.531537652015686, "learning_rate": 1.5061769471320742e-05, "loss": 0.1981, "step": 18335 }, { "epoch": 0.33114495987982, "grad_norm": 1.2391175031661987, "learning_rate": 1.5059323207917902e-05, "loss": 0.2979, "step": 18340 }, { "epoch": 0.33123523931272075, "grad_norm": 0.3438813388347626, "learning_rate": 1.5056876537529296e-05, "loss": 0.3106, "step": 18345 }, { "epoch": 0.33132551874562144, "grad_norm": 0.8342446088790894, "learning_rate": 1.5054429460351743e-05, "loss": 0.3556, "step": 18350 }, { "epoch": 0.3314157981785222, "grad_norm": 0.5235251784324646, "learning_rate": 1.5051981976582082e-05, "loss": 0.2515, "step": 18355 }, { "epoch": 0.33150607761142287, "grad_norm": 0.4574722647666931, "learning_rate": 1.5049534086417208e-05, "loss": 0.3817, "step": 18360 }, { "epoch": 0.3315963570443236, "grad_norm": 0.48444637656211853, "learning_rate": 1.5047085790054028e-05, "loss": 0.3425, "step": 18365 }, { "epoch": 0.3316866364772243, "grad_norm": 0.4341820478439331, "learning_rate": 1.5044637087689494e-05, "loss": 0.3034, "step": 18370 }, { "epoch": 0.33177691591012504, "grad_norm": 0.5475265383720398, "learning_rate": 1.5042187979520583e-05, "loss": 0.3048, "step": 18375 }, { "epoch": 0.3318671953430257, "grad_norm": 1.0465278625488281, "learning_rate": 1.5039738465744312e-05, "loss": 0.2356, "step": 18380 }, { "epoch": 0.33195747477592646, "grad_norm": 0.5055349469184875, "learning_rate": 1.5037288546557718e-05, "loss": 0.2453, "step": 18385 }, { "epoch": 0.33204775420882715, "grad_norm": 0.4561009109020233, "learning_rate": 1.503483822215789e-05, "loss": 0.25, "step": 18390 }, { "epoch": 0.3321380336417279, "grad_norm": 0.3330610394477844, "learning_rate": 1.5032387492741932e-05, "loss": 0.329, "step": 18395 }, { "epoch": 0.3322283130746286, "grad_norm": 0.4629840850830078, "learning_rate": 1.502993635850699e-05, "loss": 0.2658, "step": 18400 }, { "epoch": 0.3323185925075293, "grad_norm": 0.5670529007911682, "learning_rate": 1.5027484819650238e-05, "loss": 0.2284, "step": 18405 }, { "epoch": 0.33240887194043, "grad_norm": 0.4390983283519745, "learning_rate": 1.5025032876368885e-05, "loss": 0.3765, "step": 18410 }, { "epoch": 0.33249915137333075, "grad_norm": 0.3989102840423584, "learning_rate": 1.5022580528860175e-05, "loss": 0.3113, "step": 18415 }, { "epoch": 0.33258943080623143, "grad_norm": 0.36408400535583496, "learning_rate": 1.5020127777321376e-05, "loss": 0.3607, "step": 18420 }, { "epoch": 0.3326797102391322, "grad_norm": 0.43424683809280396, "learning_rate": 1.50176746219498e-05, "loss": 0.234, "step": 18425 }, { "epoch": 0.33276998967203286, "grad_norm": 0.8242179751396179, "learning_rate": 1.5015221062942783e-05, "loss": 0.1775, "step": 18430 }, { "epoch": 0.3328602691049336, "grad_norm": 0.42394378781318665, "learning_rate": 1.5012767100497692e-05, "loss": 0.2686, "step": 18435 }, { "epoch": 0.3329505485378343, "grad_norm": 0.475367933511734, "learning_rate": 1.5010312734811938e-05, "loss": 0.2521, "step": 18440 }, { "epoch": 0.33304082797073503, "grad_norm": 0.5788323283195496, "learning_rate": 1.5007857966082953e-05, "loss": 0.2721, "step": 18445 }, { "epoch": 0.3331311074036357, "grad_norm": 0.41820040345191956, "learning_rate": 1.5005402794508203e-05, "loss": 0.3111, "step": 18450 }, { "epoch": 0.33322138683653646, "grad_norm": 0.5704630017280579, "learning_rate": 1.500294722028519e-05, "loss": 0.2769, "step": 18455 }, { "epoch": 0.33331166626943715, "grad_norm": 0.3807762563228607, "learning_rate": 1.5000491243611453e-05, "loss": 0.2532, "step": 18460 }, { "epoch": 0.3334019457023379, "grad_norm": 0.6831206679344177, "learning_rate": 1.4998034864684547e-05, "loss": 0.2033, "step": 18465 }, { "epoch": 0.3334922251352386, "grad_norm": 0.427998811006546, "learning_rate": 1.499557808370208e-05, "loss": 0.2237, "step": 18470 }, { "epoch": 0.3335825045681393, "grad_norm": 0.44406625628471375, "learning_rate": 1.4993120900861674e-05, "loss": 0.2092, "step": 18475 }, { "epoch": 0.33367278400104, "grad_norm": 1.016243815422058, "learning_rate": 1.4990663316360998e-05, "loss": 0.2789, "step": 18480 }, { "epoch": 0.33376306343394074, "grad_norm": 0.7131323218345642, "learning_rate": 1.498820533039774e-05, "loss": 0.2929, "step": 18485 }, { "epoch": 0.33385334286684143, "grad_norm": 0.36451780796051025, "learning_rate": 1.4985746943169634e-05, "loss": 0.3113, "step": 18490 }, { "epoch": 0.33394362229974217, "grad_norm": 0.5136528611183167, "learning_rate": 1.4983288154874436e-05, "loss": 0.1657, "step": 18495 }, { "epoch": 0.33403390173264286, "grad_norm": 0.46431905031204224, "learning_rate": 1.4980828965709938e-05, "loss": 0.2786, "step": 18500 }, { "epoch": 0.3341241811655436, "grad_norm": 0.3372277021408081, "learning_rate": 1.4978369375873963e-05, "loss": 0.2844, "step": 18505 }, { "epoch": 0.3342144605984443, "grad_norm": 0.4561137557029724, "learning_rate": 1.4975909385564366e-05, "loss": 0.2242, "step": 18510 }, { "epoch": 0.334304740031345, "grad_norm": 0.6487745046615601, "learning_rate": 1.4973448994979041e-05, "loss": 0.2536, "step": 18515 }, { "epoch": 0.3343950194642457, "grad_norm": 0.7460372447967529, "learning_rate": 1.4970988204315902e-05, "loss": 0.2419, "step": 18520 }, { "epoch": 0.33448529889714645, "grad_norm": 0.3725984990596771, "learning_rate": 1.4968527013772906e-05, "loss": 0.2879, "step": 18525 }, { "epoch": 0.33457557833004714, "grad_norm": 0.4160342216491699, "learning_rate": 1.4966065423548037e-05, "loss": 0.3512, "step": 18530 }, { "epoch": 0.3346658577629479, "grad_norm": 0.5152134895324707, "learning_rate": 1.4963603433839308e-05, "loss": 0.3255, "step": 18535 }, { "epoch": 0.33475613719584857, "grad_norm": 0.4288351535797119, "learning_rate": 1.4961141044844776e-05, "loss": 0.253, "step": 18540 }, { "epoch": 0.3348464166287493, "grad_norm": 0.4910518229007721, "learning_rate": 1.4958678256762515e-05, "loss": 0.1859, "step": 18545 }, { "epoch": 0.33493669606165, "grad_norm": 0.5223522782325745, "learning_rate": 1.4956215069790645e-05, "loss": 0.3078, "step": 18550 }, { "epoch": 0.33502697549455074, "grad_norm": 0.4163327217102051, "learning_rate": 1.4953751484127304e-05, "loss": 0.294, "step": 18555 }, { "epoch": 0.3351172549274514, "grad_norm": 0.45471954345703125, "learning_rate": 1.4951287499970677e-05, "loss": 0.1941, "step": 18560 }, { "epoch": 0.33520753436035217, "grad_norm": 0.564900815486908, "learning_rate": 1.4948823117518967e-05, "loss": 0.2978, "step": 18565 }, { "epoch": 0.33529781379325285, "grad_norm": 0.6309841275215149, "learning_rate": 1.494635833697042e-05, "loss": 0.2194, "step": 18570 }, { "epoch": 0.3353880932261536, "grad_norm": 0.39717692136764526, "learning_rate": 1.4943893158523306e-05, "loss": 0.2625, "step": 18575 }, { "epoch": 0.3354783726590543, "grad_norm": 0.5182188749313354, "learning_rate": 1.4941427582375935e-05, "loss": 0.4234, "step": 18580 }, { "epoch": 0.335568652091955, "grad_norm": 0.43272313475608826, "learning_rate": 1.4938961608726642e-05, "loss": 0.2652, "step": 18585 }, { "epoch": 0.3356589315248557, "grad_norm": 1.8120278120040894, "learning_rate": 1.4936495237773798e-05, "loss": 0.2911, "step": 18590 }, { "epoch": 0.33574921095775645, "grad_norm": 0.6251808404922485, "learning_rate": 1.4934028469715805e-05, "loss": 0.2554, "step": 18595 }, { "epoch": 0.33583949039065714, "grad_norm": 0.3555147647857666, "learning_rate": 1.493156130475109e-05, "loss": 0.2023, "step": 18600 }, { "epoch": 0.3359297698235579, "grad_norm": 0.4704226851463318, "learning_rate": 1.4929093743078128e-05, "loss": 0.2813, "step": 18605 }, { "epoch": 0.33602004925645856, "grad_norm": 0.6650073528289795, "learning_rate": 1.4926625784895414e-05, "loss": 0.3789, "step": 18610 }, { "epoch": 0.3361103286893593, "grad_norm": 0.47177284955978394, "learning_rate": 1.4924157430401473e-05, "loss": 0.3072, "step": 18615 }, { "epoch": 0.33620060812226, "grad_norm": 0.5198081731796265, "learning_rate": 1.4921688679794871e-05, "loss": 0.2936, "step": 18620 }, { "epoch": 0.33629088755516073, "grad_norm": 0.36302539706230164, "learning_rate": 1.4919219533274196e-05, "loss": 0.1642, "step": 18625 }, { "epoch": 0.3363811669880615, "grad_norm": 0.3527725338935852, "learning_rate": 1.4916749991038077e-05, "loss": 0.1546, "step": 18630 }, { "epoch": 0.33647144642096216, "grad_norm": 1.1330320835113525, "learning_rate": 1.4914280053285168e-05, "loss": 0.2461, "step": 18635 }, { "epoch": 0.3365617258538629, "grad_norm": 0.4595145881175995, "learning_rate": 1.4911809720214163e-05, "loss": 0.207, "step": 18640 }, { "epoch": 0.3366520052867636, "grad_norm": 0.31736651062965393, "learning_rate": 1.4909338992023777e-05, "loss": 0.2515, "step": 18645 }, { "epoch": 0.33674228471966433, "grad_norm": 0.4406778812408447, "learning_rate": 1.4906867868912763e-05, "loss": 0.2084, "step": 18650 }, { "epoch": 0.336832564152565, "grad_norm": 0.3526134192943573, "learning_rate": 1.4904396351079903e-05, "loss": 0.271, "step": 18655 }, { "epoch": 0.33692284358546576, "grad_norm": 0.4426441490650177, "learning_rate": 1.4901924438724018e-05, "loss": 0.292, "step": 18660 }, { "epoch": 0.33701312301836645, "grad_norm": 0.5876598954200745, "learning_rate": 1.489945213204395e-05, "loss": 0.3156, "step": 18665 }, { "epoch": 0.3371034024512672, "grad_norm": 0.5972442030906677, "learning_rate": 1.4896979431238582e-05, "loss": 0.3115, "step": 18670 }, { "epoch": 0.3371936818841679, "grad_norm": 0.4676417112350464, "learning_rate": 1.4894506336506823e-05, "loss": 0.2342, "step": 18675 }, { "epoch": 0.3372839613170686, "grad_norm": 0.332589715719223, "learning_rate": 1.4892032848047615e-05, "loss": 0.3179, "step": 18680 }, { "epoch": 0.3373742407499693, "grad_norm": 0.46008965373039246, "learning_rate": 1.4889558966059934e-05, "loss": 0.2315, "step": 18685 }, { "epoch": 0.33746452018287004, "grad_norm": 0.29239562153816223, "learning_rate": 1.4887084690742783e-05, "loss": 0.267, "step": 18690 }, { "epoch": 0.33755479961577073, "grad_norm": 0.41400787234306335, "learning_rate": 1.4884610022295204e-05, "loss": 0.3701, "step": 18695 }, { "epoch": 0.33764507904867147, "grad_norm": 0.31794947385787964, "learning_rate": 1.4882134960916258e-05, "loss": 0.1902, "step": 18700 }, { "epoch": 0.33773535848157216, "grad_norm": 0.37944456934928894, "learning_rate": 1.4879659506805056e-05, "loss": 0.3004, "step": 18705 }, { "epoch": 0.3378256379144729, "grad_norm": 0.5071249008178711, "learning_rate": 1.4877183660160724e-05, "loss": 0.2749, "step": 18710 }, { "epoch": 0.3379159173473736, "grad_norm": 0.43339505791664124, "learning_rate": 1.4874707421182426e-05, "loss": 0.214, "step": 18715 }, { "epoch": 0.3380061967802743, "grad_norm": 0.31769007444381714, "learning_rate": 1.487223079006936e-05, "loss": 0.295, "step": 18720 }, { "epoch": 0.338096476213175, "grad_norm": 0.7044994831085205, "learning_rate": 1.4869753767020749e-05, "loss": 0.3715, "step": 18725 }, { "epoch": 0.33818675564607575, "grad_norm": 0.3921722173690796, "learning_rate": 1.4867276352235858e-05, "loss": 0.3381, "step": 18730 }, { "epoch": 0.33827703507897644, "grad_norm": 0.5172808766365051, "learning_rate": 1.4864798545913968e-05, "loss": 0.2997, "step": 18735 }, { "epoch": 0.3383673145118772, "grad_norm": 0.49731895327568054, "learning_rate": 1.486232034825441e-05, "loss": 0.2703, "step": 18740 }, { "epoch": 0.33845759394477787, "grad_norm": 0.3825044631958008, "learning_rate": 1.4859841759456532e-05, "loss": 0.3019, "step": 18745 }, { "epoch": 0.3385478733776786, "grad_norm": 0.4261285662651062, "learning_rate": 1.4857362779719715e-05, "loss": 0.27, "step": 18750 }, { "epoch": 0.3386381528105793, "grad_norm": 0.34586456418037415, "learning_rate": 1.4854883409243383e-05, "loss": 0.1818, "step": 18755 }, { "epoch": 0.33872843224348004, "grad_norm": 0.396688848733902, "learning_rate": 1.4852403648226976e-05, "loss": 0.2738, "step": 18760 }, { "epoch": 0.3388187116763807, "grad_norm": 0.6118276715278625, "learning_rate": 1.4849923496869979e-05, "loss": 0.2463, "step": 18765 }, { "epoch": 0.33890899110928147, "grad_norm": 0.3033411502838135, "learning_rate": 1.4847442955371896e-05, "loss": 0.26, "step": 18770 }, { "epoch": 0.33899927054218215, "grad_norm": 0.5740275979042053, "learning_rate": 1.4844962023932274e-05, "loss": 0.2029, "step": 18775 }, { "epoch": 0.3390895499750829, "grad_norm": 0.4284028112888336, "learning_rate": 1.4842480702750684e-05, "loss": 0.3111, "step": 18780 }, { "epoch": 0.3391798294079836, "grad_norm": 0.6427981853485107, "learning_rate": 1.4839998992026726e-05, "loss": 0.1807, "step": 18785 }, { "epoch": 0.3392701088408843, "grad_norm": 0.36368077993392944, "learning_rate": 1.4837516891960043e-05, "loss": 0.2607, "step": 18790 }, { "epoch": 0.339360388273785, "grad_norm": 0.4679321050643921, "learning_rate": 1.4835034402750294e-05, "loss": 0.2857, "step": 18795 }, { "epoch": 0.33945066770668575, "grad_norm": 0.5633551478385925, "learning_rate": 1.4832551524597183e-05, "loss": 0.3887, "step": 18800 }, { "epoch": 0.33954094713958644, "grad_norm": 0.6296786665916443, "learning_rate": 1.4830068257700441e-05, "loss": 0.3002, "step": 18805 }, { "epoch": 0.3396312265724872, "grad_norm": 0.5864987969398499, "learning_rate": 1.4827584602259824e-05, "loss": 0.2391, "step": 18810 }, { "epoch": 0.33972150600538786, "grad_norm": 0.36309221386909485, "learning_rate": 1.4825100558475122e-05, "loss": 0.3185, "step": 18815 }, { "epoch": 0.3398117854382886, "grad_norm": 0.4260045886039734, "learning_rate": 1.4822616126546168e-05, "loss": 0.2521, "step": 18820 }, { "epoch": 0.3399020648711893, "grad_norm": 0.5944346189498901, "learning_rate": 1.4820131306672807e-05, "loss": 0.3321, "step": 18825 }, { "epoch": 0.33999234430409003, "grad_norm": 0.3762878179550171, "learning_rate": 1.4817646099054929e-05, "loss": 0.2522, "step": 18830 }, { "epoch": 0.3400826237369907, "grad_norm": 0.2695557177066803, "learning_rate": 1.4815160503892449e-05, "loss": 0.2246, "step": 18835 }, { "epoch": 0.34017290316989146, "grad_norm": 0.4036068022251129, "learning_rate": 1.4812674521385317e-05, "loss": 0.2663, "step": 18840 }, { "epoch": 0.34026318260279215, "grad_norm": 0.36277270317077637, "learning_rate": 1.4810188151733513e-05, "loss": 0.2648, "step": 18845 }, { "epoch": 0.3403534620356929, "grad_norm": 0.25366660952568054, "learning_rate": 1.4807701395137041e-05, "loss": 0.2239, "step": 18850 }, { "epoch": 0.3404437414685936, "grad_norm": 0.41909897327423096, "learning_rate": 1.4805214251795951e-05, "loss": 0.0684, "step": 18855 }, { "epoch": 0.3405340209014943, "grad_norm": 0.3215011954307556, "learning_rate": 1.4802726721910312e-05, "loss": 0.3305, "step": 18860 }, { "epoch": 0.340624300334395, "grad_norm": 1.6500555276870728, "learning_rate": 1.4800238805680223e-05, "loss": 0.3719, "step": 18865 }, { "epoch": 0.34071457976729574, "grad_norm": 0.6505630016326904, "learning_rate": 1.4797750503305828e-05, "loss": 0.2377, "step": 18870 }, { "epoch": 0.34080485920019643, "grad_norm": 0.45783016085624695, "learning_rate": 1.4795261814987286e-05, "loss": 0.2104, "step": 18875 }, { "epoch": 0.34089513863309717, "grad_norm": 0.6193820834159851, "learning_rate": 1.4792772740924795e-05, "loss": 0.3273, "step": 18880 }, { "epoch": 0.34098541806599786, "grad_norm": 0.42955857515335083, "learning_rate": 1.4790283281318585e-05, "loss": 0.2735, "step": 18885 }, { "epoch": 0.3410756974988986, "grad_norm": 0.4381408989429474, "learning_rate": 1.4787793436368913e-05, "loss": 0.2838, "step": 18890 }, { "epoch": 0.3411659769317993, "grad_norm": 0.5888350605964661, "learning_rate": 1.4785303206276072e-05, "loss": 0.3188, "step": 18895 }, { "epoch": 0.34125625636470003, "grad_norm": 0.3903317451477051, "learning_rate": 1.478281259124038e-05, "loss": 0.2102, "step": 18900 }, { "epoch": 0.3413465357976007, "grad_norm": 0.4762745797634125, "learning_rate": 1.4780321591462186e-05, "loss": 0.2934, "step": 18905 }, { "epoch": 0.34143681523050146, "grad_norm": 0.4727645218372345, "learning_rate": 1.4777830207141878e-05, "loss": 0.3152, "step": 18910 }, { "epoch": 0.34152709466340214, "grad_norm": 0.30822187662124634, "learning_rate": 1.477533843847987e-05, "loss": 0.3066, "step": 18915 }, { "epoch": 0.3416173740963029, "grad_norm": 0.43814173340797424, "learning_rate": 1.4772846285676603e-05, "loss": 0.3401, "step": 18920 }, { "epoch": 0.34170765352920357, "grad_norm": 0.4088091552257538, "learning_rate": 1.4770353748932556e-05, "loss": 0.2896, "step": 18925 }, { "epoch": 0.3417979329621043, "grad_norm": 0.44749176502227783, "learning_rate": 1.476786082844823e-05, "loss": 0.2602, "step": 18930 }, { "epoch": 0.341888212395005, "grad_norm": 0.6866820454597473, "learning_rate": 1.476536752442417e-05, "loss": 0.2833, "step": 18935 }, { "epoch": 0.34197849182790574, "grad_norm": 0.6032989621162415, "learning_rate": 1.4762873837060939e-05, "loss": 0.3363, "step": 18940 }, { "epoch": 0.3420687712608064, "grad_norm": 0.31822606921195984, "learning_rate": 1.4760379766559136e-05, "loss": 0.2322, "step": 18945 }, { "epoch": 0.34215905069370717, "grad_norm": 0.46180498600006104, "learning_rate": 1.4757885313119394e-05, "loss": 0.329, "step": 18950 }, { "epoch": 0.34224933012660785, "grad_norm": 0.4387761056423187, "learning_rate": 1.475539047694237e-05, "loss": 0.194, "step": 18955 }, { "epoch": 0.3423396095595086, "grad_norm": 0.3619935214519501, "learning_rate": 1.4752895258228757e-05, "loss": 0.2013, "step": 18960 }, { "epoch": 0.3424298889924093, "grad_norm": 0.33837059140205383, "learning_rate": 1.4750399657179278e-05, "loss": 0.214, "step": 18965 }, { "epoch": 0.34252016842531, "grad_norm": 0.4133617877960205, "learning_rate": 1.4747903673994685e-05, "loss": 0.2423, "step": 18970 }, { "epoch": 0.3426104478582107, "grad_norm": 0.5128462910652161, "learning_rate": 1.4745407308875764e-05, "loss": 0.2934, "step": 18975 }, { "epoch": 0.34270072729111145, "grad_norm": 0.29368436336517334, "learning_rate": 1.4742910562023325e-05, "loss": 0.2599, "step": 18980 }, { "epoch": 0.34279100672401214, "grad_norm": 0.6440581679344177, "learning_rate": 1.4740413433638217e-05, "loss": 0.2811, "step": 18985 }, { "epoch": 0.3428812861569129, "grad_norm": 0.32762810587882996, "learning_rate": 1.4737915923921314e-05, "loss": 0.289, "step": 18990 }, { "epoch": 0.34297156558981357, "grad_norm": 0.6130682229995728, "learning_rate": 1.4735418033073523e-05, "loss": 0.2321, "step": 18995 }, { "epoch": 0.3430618450227143, "grad_norm": 0.461866557598114, "learning_rate": 1.4732919761295779e-05, "loss": 0.254, "step": 19000 }, { "epoch": 0.343152124455615, "grad_norm": 0.44246944785118103, "learning_rate": 1.4730421108789054e-05, "loss": 0.363, "step": 19005 }, { "epoch": 0.34324240388851573, "grad_norm": 0.4307284355163574, "learning_rate": 1.4727922075754344e-05, "loss": 0.288, "step": 19010 }, { "epoch": 0.3433326833214164, "grad_norm": 0.5668054223060608, "learning_rate": 1.4725422662392678e-05, "loss": 0.2427, "step": 19015 }, { "epoch": 0.34342296275431716, "grad_norm": 0.5066699981689453, "learning_rate": 1.4722922868905118e-05, "loss": 0.2555, "step": 19020 }, { "epoch": 0.3435132421872179, "grad_norm": 0.6849452257156372, "learning_rate": 1.4720422695492752e-05, "loss": 0.3359, "step": 19025 }, { "epoch": 0.3436035216201186, "grad_norm": 0.18425752222537994, "learning_rate": 1.4717922142356698e-05, "loss": 0.2555, "step": 19030 }, { "epoch": 0.34369380105301933, "grad_norm": 0.3990256190299988, "learning_rate": 1.4715421209698111e-05, "loss": 0.2023, "step": 19035 }, { "epoch": 0.34378408048592, "grad_norm": 0.5287699103355408, "learning_rate": 1.4712919897718174e-05, "loss": 0.1971, "step": 19040 }, { "epoch": 0.34387435991882076, "grad_norm": 0.4891014099121094, "learning_rate": 1.4710418206618093e-05, "loss": 0.2733, "step": 19045 }, { "epoch": 0.34396463935172145, "grad_norm": 0.7189919352531433, "learning_rate": 1.470791613659912e-05, "loss": 0.2504, "step": 19050 }, { "epoch": 0.3440549187846222, "grad_norm": 0.4761006832122803, "learning_rate": 1.4705413687862523e-05, "loss": 0.2638, "step": 19055 }, { "epoch": 0.3441451982175229, "grad_norm": 0.4221712350845337, "learning_rate": 1.4702910860609607e-05, "loss": 0.3228, "step": 19060 }, { "epoch": 0.3442354776504236, "grad_norm": 0.4744848608970642, "learning_rate": 1.4700407655041701e-05, "loss": 0.3331, "step": 19065 }, { "epoch": 0.3443257570833243, "grad_norm": 0.5731523633003235, "learning_rate": 1.469790407136018e-05, "loss": 0.3992, "step": 19070 }, { "epoch": 0.34441603651622504, "grad_norm": 0.5367938876152039, "learning_rate": 1.469540010976643e-05, "loss": 0.1992, "step": 19075 }, { "epoch": 0.34450631594912573, "grad_norm": 0.5153685212135315, "learning_rate": 1.4692895770461881e-05, "loss": 0.2705, "step": 19080 }, { "epoch": 0.34459659538202647, "grad_norm": 0.4405202269554138, "learning_rate": 1.469039105364799e-05, "loss": 0.335, "step": 19085 }, { "epoch": 0.34468687481492716, "grad_norm": 0.5037932395935059, "learning_rate": 1.4687885959526239e-05, "loss": 0.2357, "step": 19090 }, { "epoch": 0.3447771542478279, "grad_norm": 0.47482338547706604, "learning_rate": 1.4685380488298148e-05, "loss": 0.2591, "step": 19095 }, { "epoch": 0.3448674336807286, "grad_norm": 0.41249433159828186, "learning_rate": 1.468287464016526e-05, "loss": 0.2142, "step": 19100 }, { "epoch": 0.3449577131136293, "grad_norm": 0.47318199276924133, "learning_rate": 1.4680368415329158e-05, "loss": 0.2712, "step": 19105 }, { "epoch": 0.34504799254653, "grad_norm": 0.41339215636253357, "learning_rate": 1.4677861813991446e-05, "loss": 0.3777, "step": 19110 }, { "epoch": 0.34513827197943076, "grad_norm": 0.41870197653770447, "learning_rate": 1.4675354836353762e-05, "loss": 0.2222, "step": 19115 }, { "epoch": 0.34522855141233144, "grad_norm": 0.45384618639945984, "learning_rate": 1.4672847482617775e-05, "loss": 0.1345, "step": 19120 }, { "epoch": 0.3453188308452322, "grad_norm": 0.6229274868965149, "learning_rate": 1.4670339752985182e-05, "loss": 0.2492, "step": 19125 }, { "epoch": 0.34540911027813287, "grad_norm": 0.6140114665031433, "learning_rate": 1.4667831647657714e-05, "loss": 0.2459, "step": 19130 }, { "epoch": 0.3454993897110336, "grad_norm": 0.6977282762527466, "learning_rate": 1.4665323166837126e-05, "loss": 0.3348, "step": 19135 }, { "epoch": 0.3455896691439343, "grad_norm": 0.4640437066555023, "learning_rate": 1.4662814310725214e-05, "loss": 0.2192, "step": 19140 }, { "epoch": 0.34567994857683504, "grad_norm": 0.5979374051094055, "learning_rate": 1.4660305079523784e-05, "loss": 0.2042, "step": 19145 }, { "epoch": 0.3457702280097357, "grad_norm": 0.8193564414978027, "learning_rate": 1.4657795473434702e-05, "loss": 0.4243, "step": 19150 }, { "epoch": 0.34586050744263647, "grad_norm": 0.6361282467842102, "learning_rate": 1.4655285492659837e-05, "loss": 0.2487, "step": 19155 }, { "epoch": 0.34595078687553715, "grad_norm": 0.5085448622703552, "learning_rate": 1.46527751374011e-05, "loss": 0.2778, "step": 19160 }, { "epoch": 0.3460410663084379, "grad_norm": 0.3158360421657562, "learning_rate": 1.4650264407860432e-05, "loss": 0.1997, "step": 19165 }, { "epoch": 0.3461313457413386, "grad_norm": 0.3488834798336029, "learning_rate": 1.4647753304239807e-05, "loss": 0.2658, "step": 19170 }, { "epoch": 0.3462216251742393, "grad_norm": 0.3840755820274353, "learning_rate": 1.4645241826741218e-05, "loss": 0.2221, "step": 19175 }, { "epoch": 0.34631190460714, "grad_norm": 0.5721611380577087, "learning_rate": 1.46427299755667e-05, "loss": 0.3702, "step": 19180 }, { "epoch": 0.34640218404004075, "grad_norm": 0.38359248638153076, "learning_rate": 1.4640217750918308e-05, "loss": 0.2568, "step": 19185 }, { "epoch": 0.34649246347294144, "grad_norm": 0.43777433037757874, "learning_rate": 1.4637705152998139e-05, "loss": 0.3129, "step": 19190 }, { "epoch": 0.3465827429058422, "grad_norm": 0.3485426902770996, "learning_rate": 1.4635192182008308e-05, "loss": 0.2971, "step": 19195 }, { "epoch": 0.34667302233874286, "grad_norm": 0.4009533226490021, "learning_rate": 1.4632678838150966e-05, "loss": 0.2504, "step": 19200 }, { "epoch": 0.3467633017716436, "grad_norm": 0.2905276417732239, "learning_rate": 1.4630165121628299e-05, "loss": 0.2163, "step": 19205 }, { "epoch": 0.3468535812045443, "grad_norm": 0.5106023550033569, "learning_rate": 1.4627651032642508e-05, "loss": 0.314, "step": 19210 }, { "epoch": 0.34694386063744503, "grad_norm": 0.4790681004524231, "learning_rate": 1.462513657139584e-05, "loss": 0.2759, "step": 19215 }, { "epoch": 0.3470341400703457, "grad_norm": 0.39027634263038635, "learning_rate": 1.4622621738090563e-05, "loss": 0.3259, "step": 19220 }, { "epoch": 0.34712441950324646, "grad_norm": 0.7681130170822144, "learning_rate": 1.4620106532928975e-05, "loss": 0.2633, "step": 19225 }, { "epoch": 0.34721469893614715, "grad_norm": 0.5734387636184692, "learning_rate": 1.4617590956113408e-05, "loss": 0.3346, "step": 19230 }, { "epoch": 0.3473049783690479, "grad_norm": 0.3158043920993805, "learning_rate": 1.4615075007846222e-05, "loss": 0.1993, "step": 19235 }, { "epoch": 0.3473952578019486, "grad_norm": 0.3726866543292999, "learning_rate": 1.4612558688329808e-05, "loss": 0.2789, "step": 19240 }, { "epoch": 0.3474855372348493, "grad_norm": 0.8030419945716858, "learning_rate": 1.4610041997766581e-05, "loss": 0.41, "step": 19245 }, { "epoch": 0.34757581666775, "grad_norm": 0.45678412914276123, "learning_rate": 1.4607524936358996e-05, "loss": 0.2492, "step": 19250 }, { "epoch": 0.34766609610065075, "grad_norm": 0.3806954324245453, "learning_rate": 1.4605007504309531e-05, "loss": 0.2823, "step": 19255 }, { "epoch": 0.34775637553355143, "grad_norm": 0.646897554397583, "learning_rate": 1.4602489701820693e-05, "loss": 0.2263, "step": 19260 }, { "epoch": 0.3478466549664522, "grad_norm": 0.5565019845962524, "learning_rate": 1.4599971529095019e-05, "loss": 0.1936, "step": 19265 }, { "epoch": 0.34793693439935286, "grad_norm": 0.3828217685222626, "learning_rate": 1.4597452986335084e-05, "loss": 0.2337, "step": 19270 }, { "epoch": 0.3480272138322536, "grad_norm": 0.49128270149230957, "learning_rate": 1.459493407374348e-05, "loss": 0.2843, "step": 19275 }, { "epoch": 0.3481174932651543, "grad_norm": 0.40793755650520325, "learning_rate": 1.459241479152284e-05, "loss": 0.3523, "step": 19280 }, { "epoch": 0.34820777269805503, "grad_norm": 0.46196678280830383, "learning_rate": 1.4589895139875818e-05, "loss": 0.2962, "step": 19285 }, { "epoch": 0.3482980521309557, "grad_norm": 0.9009236693382263, "learning_rate": 1.4587375119005107e-05, "loss": 0.3176, "step": 19290 }, { "epoch": 0.34838833156385646, "grad_norm": 0.40262532234191895, "learning_rate": 1.458485472911342e-05, "loss": 0.3026, "step": 19295 }, { "epoch": 0.34847861099675714, "grad_norm": 0.41505348682403564, "learning_rate": 1.4582333970403506e-05, "loss": 0.2214, "step": 19300 }, { "epoch": 0.3485688904296579, "grad_norm": 0.38175857067108154, "learning_rate": 1.457981284307814e-05, "loss": 0.1901, "step": 19305 }, { "epoch": 0.34865916986255857, "grad_norm": 0.41143855452537537, "learning_rate": 1.4577291347340128e-05, "loss": 0.3221, "step": 19310 }, { "epoch": 0.3487494492954593, "grad_norm": 0.5829758048057556, "learning_rate": 1.4574769483392308e-05, "loss": 0.1806, "step": 19315 }, { "epoch": 0.34883972872836, "grad_norm": 0.4416637420654297, "learning_rate": 1.4572247251437545e-05, "loss": 0.2414, "step": 19320 }, { "epoch": 0.34893000816126074, "grad_norm": 0.3901643455028534, "learning_rate": 1.4569724651678736e-05, "loss": 0.2626, "step": 19325 }, { "epoch": 0.3490202875941614, "grad_norm": 0.6232507824897766, "learning_rate": 1.4567201684318798e-05, "loss": 0.3614, "step": 19330 }, { "epoch": 0.34911056702706217, "grad_norm": 0.48857784271240234, "learning_rate": 1.4564678349560697e-05, "loss": 0.2078, "step": 19335 }, { "epoch": 0.34920084645996285, "grad_norm": 0.4701916575431824, "learning_rate": 1.4562154647607409e-05, "loss": 0.259, "step": 19340 }, { "epoch": 0.3492911258928636, "grad_norm": 0.4478681981563568, "learning_rate": 1.455963057866195e-05, "loss": 0.229, "step": 19345 }, { "epoch": 0.3493814053257643, "grad_norm": 0.6830845475196838, "learning_rate": 1.4557106142927362e-05, "loss": 0.3134, "step": 19350 }, { "epoch": 0.349471684758665, "grad_norm": 0.6161406636238098, "learning_rate": 1.4554581340606721e-05, "loss": 0.2472, "step": 19355 }, { "epoch": 0.3495619641915657, "grad_norm": 0.711693525314331, "learning_rate": 1.4552056171903125e-05, "loss": 0.225, "step": 19360 }, { "epoch": 0.34965224362446645, "grad_norm": 0.44821977615356445, "learning_rate": 1.4549530637019702e-05, "loss": 0.4006, "step": 19365 }, { "epoch": 0.34974252305736714, "grad_norm": 0.25322824716567993, "learning_rate": 1.4547004736159622e-05, "loss": 0.2437, "step": 19370 }, { "epoch": 0.3498328024902679, "grad_norm": 0.6989191174507141, "learning_rate": 1.4544478469526071e-05, "loss": 0.3077, "step": 19375 }, { "epoch": 0.34992308192316857, "grad_norm": 0.7583245038986206, "learning_rate": 1.4541951837322268e-05, "loss": 0.31, "step": 19380 }, { "epoch": 0.3500133613560693, "grad_norm": 0.4083063006401062, "learning_rate": 1.4539424839751463e-05, "loss": 0.2243, "step": 19385 }, { "epoch": 0.35010364078897, "grad_norm": 0.5561839938163757, "learning_rate": 1.4536897477016936e-05, "loss": 0.2594, "step": 19390 }, { "epoch": 0.35019392022187074, "grad_norm": 0.29318517446517944, "learning_rate": 1.453436974932199e-05, "loss": 0.2789, "step": 19395 }, { "epoch": 0.3502841996547714, "grad_norm": 0.23433071374893188, "learning_rate": 1.4531841656869967e-05, "loss": 0.2632, "step": 19400 }, { "epoch": 0.35037447908767216, "grad_norm": 0.5063546895980835, "learning_rate": 1.4529313199864233e-05, "loss": 0.3407, "step": 19405 }, { "epoch": 0.35046475852057285, "grad_norm": 0.49939775466918945, "learning_rate": 1.4526784378508181e-05, "loss": 0.2844, "step": 19410 }, { "epoch": 0.3505550379534736, "grad_norm": 0.3330202102661133, "learning_rate": 1.4524255193005242e-05, "loss": 0.1756, "step": 19415 }, { "epoch": 0.35064531738637433, "grad_norm": 0.7029513120651245, "learning_rate": 1.4521725643558866e-05, "loss": 0.2963, "step": 19420 }, { "epoch": 0.350735596819275, "grad_norm": 0.7581319808959961, "learning_rate": 1.4519195730372539e-05, "loss": 0.1796, "step": 19425 }, { "epoch": 0.35082587625217576, "grad_norm": 1.4334921836853027, "learning_rate": 1.451666545364977e-05, "loss": 0.2531, "step": 19430 }, { "epoch": 0.35091615568507645, "grad_norm": 0.6741414070129395, "learning_rate": 1.4514134813594108e-05, "loss": 0.2778, "step": 19435 }, { "epoch": 0.3510064351179772, "grad_norm": 0.45837345719337463, "learning_rate": 1.4511603810409121e-05, "loss": 0.3039, "step": 19440 }, { "epoch": 0.3510967145508779, "grad_norm": 0.6172248721122742, "learning_rate": 1.4509072444298406e-05, "loss": 0.3513, "step": 19445 }, { "epoch": 0.3511869939837786, "grad_norm": 0.5250604152679443, "learning_rate": 1.45065407154656e-05, "loss": 0.2898, "step": 19450 }, { "epoch": 0.3512772734166793, "grad_norm": 0.4318670630455017, "learning_rate": 1.450400862411436e-05, "loss": 0.2264, "step": 19455 }, { "epoch": 0.35136755284958004, "grad_norm": 0.4514148533344269, "learning_rate": 1.4501476170448375e-05, "loss": 0.3025, "step": 19460 }, { "epoch": 0.35145783228248073, "grad_norm": 0.5048012137413025, "learning_rate": 1.4498943354671355e-05, "loss": 0.3115, "step": 19465 }, { "epoch": 0.3515481117153815, "grad_norm": 0.3340650498867035, "learning_rate": 1.4496410176987056e-05, "loss": 0.1856, "step": 19470 }, { "epoch": 0.35163839114828216, "grad_norm": 0.6395434141159058, "learning_rate": 1.4493876637599251e-05, "loss": 0.316, "step": 19475 }, { "epoch": 0.3517286705811829, "grad_norm": 0.32689008116722107, "learning_rate": 1.4491342736711747e-05, "loss": 0.2629, "step": 19480 }, { "epoch": 0.3518189500140836, "grad_norm": 0.3152740001678467, "learning_rate": 1.448880847452837e-05, "loss": 0.268, "step": 19485 }, { "epoch": 0.35190922944698433, "grad_norm": 0.4254841208457947, "learning_rate": 1.4486273851252989e-05, "loss": 0.2718, "step": 19490 }, { "epoch": 0.351999508879885, "grad_norm": 0.2890605330467224, "learning_rate": 1.4483738867089497e-05, "loss": 0.2361, "step": 19495 }, { "epoch": 0.35208978831278576, "grad_norm": 0.49806782603263855, "learning_rate": 1.4481203522241812e-05, "loss": 0.2602, "step": 19500 }, { "epoch": 0.35218006774568644, "grad_norm": 0.4263138473033905, "learning_rate": 1.4478667816913884e-05, "loss": 0.2234, "step": 19505 }, { "epoch": 0.3522703471785872, "grad_norm": 0.490191787481308, "learning_rate": 1.4476131751309695e-05, "loss": 0.2344, "step": 19510 }, { "epoch": 0.35236062661148787, "grad_norm": 0.5755016803741455, "learning_rate": 1.4473595325633249e-05, "loss": 0.3289, "step": 19515 }, { "epoch": 0.3524509060443886, "grad_norm": 0.7282212972640991, "learning_rate": 1.4471058540088588e-05, "loss": 0.3026, "step": 19520 }, { "epoch": 0.3525411854772893, "grad_norm": 0.45803165435791016, "learning_rate": 1.4468521394879773e-05, "loss": 0.3556, "step": 19525 }, { "epoch": 0.35263146491019004, "grad_norm": 0.47527822852134705, "learning_rate": 1.44659838902109e-05, "loss": 0.3038, "step": 19530 }, { "epoch": 0.3527217443430907, "grad_norm": 0.3465036153793335, "learning_rate": 1.4463446026286096e-05, "loss": 0.2434, "step": 19535 }, { "epoch": 0.35281202377599147, "grad_norm": 0.5368249416351318, "learning_rate": 1.4460907803309511e-05, "loss": 0.2178, "step": 19540 }, { "epoch": 0.35290230320889215, "grad_norm": 0.46956199407577515, "learning_rate": 1.4458369221485327e-05, "loss": 0.2635, "step": 19545 }, { "epoch": 0.3529925826417929, "grad_norm": 0.9469265937805176, "learning_rate": 1.4455830281017756e-05, "loss": 0.1715, "step": 19550 }, { "epoch": 0.3530828620746936, "grad_norm": 0.3272339403629303, "learning_rate": 1.4453290982111035e-05, "loss": 0.2596, "step": 19555 }, { "epoch": 0.3531731415075943, "grad_norm": 0.5277330875396729, "learning_rate": 1.4450751324969432e-05, "loss": 0.3983, "step": 19560 }, { "epoch": 0.353263420940495, "grad_norm": 0.4450097680091858, "learning_rate": 1.4448211309797247e-05, "loss": 0.2898, "step": 19565 }, { "epoch": 0.35335370037339575, "grad_norm": 0.5424237847328186, "learning_rate": 1.4445670936798806e-05, "loss": 0.3631, "step": 19570 }, { "epoch": 0.35344397980629644, "grad_norm": 0.48294100165367126, "learning_rate": 1.4443130206178456e-05, "loss": 0.2373, "step": 19575 }, { "epoch": 0.3535342592391972, "grad_norm": 0.6076140403747559, "learning_rate": 1.4440589118140586e-05, "loss": 0.3505, "step": 19580 }, { "epoch": 0.35362453867209787, "grad_norm": 0.35823559761047363, "learning_rate": 1.443804767288961e-05, "loss": 0.236, "step": 19585 }, { "epoch": 0.3537148181049986, "grad_norm": 0.4359067380428314, "learning_rate": 1.4435505870629967e-05, "loss": 0.3581, "step": 19590 }, { "epoch": 0.3538050975378993, "grad_norm": 0.5708020329475403, "learning_rate": 1.4432963711566124e-05, "loss": 0.2288, "step": 19595 }, { "epoch": 0.35389537697080004, "grad_norm": 1.0934189558029175, "learning_rate": 1.4430421195902582e-05, "loss": 0.2791, "step": 19600 }, { "epoch": 0.3539856564037007, "grad_norm": 0.6318325996398926, "learning_rate": 1.4427878323843867e-05, "loss": 0.2272, "step": 19605 }, { "epoch": 0.35407593583660146, "grad_norm": 0.3226698338985443, "learning_rate": 1.4425335095594532e-05, "loss": 0.2296, "step": 19610 }, { "epoch": 0.35416621526950215, "grad_norm": 0.722440242767334, "learning_rate": 1.442279151135917e-05, "loss": 0.24, "step": 19615 }, { "epoch": 0.3542564947024029, "grad_norm": 0.3280979096889496, "learning_rate": 1.4420247571342383e-05, "loss": 0.228, "step": 19620 }, { "epoch": 0.3543467741353036, "grad_norm": 0.32591092586517334, "learning_rate": 1.4417703275748817e-05, "loss": 0.1964, "step": 19625 }, { "epoch": 0.3544370535682043, "grad_norm": 0.3040795624256134, "learning_rate": 1.4415158624783142e-05, "loss": 0.1845, "step": 19630 }, { "epoch": 0.354527333001105, "grad_norm": 0.4605494737625122, "learning_rate": 1.441261361865006e-05, "loss": 0.2225, "step": 19635 }, { "epoch": 0.35461761243400575, "grad_norm": 0.4035797715187073, "learning_rate": 1.4410068257554295e-05, "loss": 0.1865, "step": 19640 }, { "epoch": 0.35470789186690643, "grad_norm": 0.5863381028175354, "learning_rate": 1.44075225417006e-05, "loss": 0.2495, "step": 19645 }, { "epoch": 0.3547981712998072, "grad_norm": 0.45320987701416016, "learning_rate": 1.4404976471293766e-05, "loss": 0.308, "step": 19650 }, { "epoch": 0.35488845073270786, "grad_norm": 0.4062706530094147, "learning_rate": 1.4402430046538597e-05, "loss": 0.2782, "step": 19655 }, { "epoch": 0.3549787301656086, "grad_norm": 0.5910449028015137, "learning_rate": 1.4399883267639942e-05, "loss": 0.2696, "step": 19660 }, { "epoch": 0.3550690095985093, "grad_norm": 0.440742552280426, "learning_rate": 1.4397336134802673e-05, "loss": 0.2436, "step": 19665 }, { "epoch": 0.35515928903141003, "grad_norm": 1.6211673021316528, "learning_rate": 1.4394788648231679e-05, "loss": 0.2452, "step": 19670 }, { "epoch": 0.3552495684643107, "grad_norm": 0.5122200846672058, "learning_rate": 1.4392240808131892e-05, "loss": 0.2505, "step": 19675 }, { "epoch": 0.35533984789721146, "grad_norm": 0.683473527431488, "learning_rate": 1.4389692614708266e-05, "loss": 0.2907, "step": 19680 }, { "epoch": 0.35543012733011214, "grad_norm": 0.4165571928024292, "learning_rate": 1.4387144068165788e-05, "loss": 0.2784, "step": 19685 }, { "epoch": 0.3555204067630129, "grad_norm": 0.35494714975357056, "learning_rate": 1.4384595168709465e-05, "loss": 0.2705, "step": 19690 }, { "epoch": 0.3556106861959136, "grad_norm": 0.4419481158256531, "learning_rate": 1.4382045916544339e-05, "loss": 0.2106, "step": 19695 }, { "epoch": 0.3557009656288143, "grad_norm": 0.5680895447731018, "learning_rate": 1.437949631187548e-05, "loss": 0.4024, "step": 19700 }, { "epoch": 0.355791245061715, "grad_norm": 0.82598876953125, "learning_rate": 1.4376946354907988e-05, "loss": 0.327, "step": 19705 }, { "epoch": 0.35588152449461574, "grad_norm": 0.4863128364086151, "learning_rate": 1.437439604584698e-05, "loss": 0.2895, "step": 19710 }, { "epoch": 0.35597180392751643, "grad_norm": 0.46011534333229065, "learning_rate": 1.4371845384897617e-05, "loss": 0.3094, "step": 19715 }, { "epoch": 0.35606208336041717, "grad_norm": 0.791948139667511, "learning_rate": 1.4369294372265082e-05, "loss": 0.1886, "step": 19720 }, { "epoch": 0.35615236279331786, "grad_norm": 0.45013314485549927, "learning_rate": 1.436674300815458e-05, "loss": 0.25, "step": 19725 }, { "epoch": 0.3562426422262186, "grad_norm": 0.2880622446537018, "learning_rate": 1.4364191292771352e-05, "loss": 0.1994, "step": 19730 }, { "epoch": 0.3563329216591193, "grad_norm": 0.338219553232193, "learning_rate": 1.4361639226320668e-05, "loss": 0.2814, "step": 19735 }, { "epoch": 0.35642320109202, "grad_norm": 0.5113375782966614, "learning_rate": 1.4359086809007818e-05, "loss": 0.2548, "step": 19740 }, { "epoch": 0.3565134805249207, "grad_norm": 0.8494113087654114, "learning_rate": 1.4356534041038126e-05, "loss": 0.2837, "step": 19745 }, { "epoch": 0.35660375995782145, "grad_norm": 0.4344501197338104, "learning_rate": 1.4353980922616948e-05, "loss": 0.3321, "step": 19750 }, { "epoch": 0.35669403939072214, "grad_norm": 0.7422098517417908, "learning_rate": 1.4351427453949664e-05, "loss": 0.302, "step": 19755 }, { "epoch": 0.3567843188236229, "grad_norm": 0.43888768553733826, "learning_rate": 1.4348873635241676e-05, "loss": 0.1837, "step": 19760 }, { "epoch": 0.35687459825652357, "grad_norm": 0.5647388100624084, "learning_rate": 1.4346319466698424e-05, "loss": 0.2307, "step": 19765 }, { "epoch": 0.3569648776894243, "grad_norm": 0.4936065375804901, "learning_rate": 1.4343764948525371e-05, "loss": 0.2358, "step": 19770 }, { "epoch": 0.357055157122325, "grad_norm": 0.43983563780784607, "learning_rate": 1.434121008092801e-05, "loss": 0.2115, "step": 19775 }, { "epoch": 0.35714543655522574, "grad_norm": 0.47324320673942566, "learning_rate": 1.433865486411186e-05, "loss": 0.2016, "step": 19780 }, { "epoch": 0.3572357159881264, "grad_norm": 0.42178916931152344, "learning_rate": 1.4336099298282475e-05, "loss": 0.258, "step": 19785 }, { "epoch": 0.35732599542102716, "grad_norm": 0.42953428626060486, "learning_rate": 1.4333543383645425e-05, "loss": 0.3026, "step": 19790 }, { "epoch": 0.35741627485392785, "grad_norm": 0.5370898842811584, "learning_rate": 1.4330987120406321e-05, "loss": 0.3165, "step": 19795 }, { "epoch": 0.3575065542868286, "grad_norm": 1.250838041305542, "learning_rate": 1.4328430508770789e-05, "loss": 0.2761, "step": 19800 }, { "epoch": 0.3575968337197293, "grad_norm": 0.37467846274375916, "learning_rate": 1.4325873548944498e-05, "loss": 0.2376, "step": 19805 }, { "epoch": 0.35768711315263, "grad_norm": 0.5512472987174988, "learning_rate": 1.4323316241133127e-05, "loss": 0.2276, "step": 19810 }, { "epoch": 0.35777739258553076, "grad_norm": 0.5403490662574768, "learning_rate": 1.4320758585542401e-05, "loss": 0.299, "step": 19815 }, { "epoch": 0.35786767201843145, "grad_norm": 0.6419376730918884, "learning_rate": 1.431820058237806e-05, "loss": 0.25, "step": 19820 }, { "epoch": 0.3579579514513322, "grad_norm": 0.4647071659564972, "learning_rate": 1.4315642231845876e-05, "loss": 0.2565, "step": 19825 }, { "epoch": 0.3580482308842329, "grad_norm": 0.49543461203575134, "learning_rate": 1.4313083534151657e-05, "loss": 0.1982, "step": 19830 }, { "epoch": 0.3581385103171336, "grad_norm": 0.5073922276496887, "learning_rate": 1.4310524489501225e-05, "loss": 0.2764, "step": 19835 }, { "epoch": 0.3582287897500343, "grad_norm": 0.6127316355705261, "learning_rate": 1.430796509810044e-05, "loss": 0.3251, "step": 19840 }, { "epoch": 0.35831906918293505, "grad_norm": 0.5740789175033569, "learning_rate": 1.4305405360155183e-05, "loss": 0.3247, "step": 19845 }, { "epoch": 0.35840934861583573, "grad_norm": 0.36514946818351746, "learning_rate": 1.4302845275871371e-05, "loss": 0.2557, "step": 19850 }, { "epoch": 0.3584996280487365, "grad_norm": 0.5043168067932129, "learning_rate": 1.4300284845454937e-05, "loss": 0.2391, "step": 19855 }, { "epoch": 0.35858990748163716, "grad_norm": 0.4452023506164551, "learning_rate": 1.4297724069111854e-05, "loss": 0.2864, "step": 19860 }, { "epoch": 0.3586801869145379, "grad_norm": 0.2947918474674225, "learning_rate": 1.4295162947048119e-05, "loss": 0.2045, "step": 19865 }, { "epoch": 0.3587704663474386, "grad_norm": 0.3564087450504303, "learning_rate": 1.4292601479469754e-05, "loss": 0.2265, "step": 19870 }, { "epoch": 0.35886074578033933, "grad_norm": 0.46823352575302124, "learning_rate": 1.4290039666582807e-05, "loss": 0.3774, "step": 19875 }, { "epoch": 0.35895102521324, "grad_norm": 0.4109562635421753, "learning_rate": 1.4287477508593361e-05, "loss": 0.2929, "step": 19880 }, { "epoch": 0.35904130464614076, "grad_norm": 0.48783037066459656, "learning_rate": 1.4284915005707527e-05, "loss": 0.2404, "step": 19885 }, { "epoch": 0.35913158407904144, "grad_norm": 0.39710333943367004, "learning_rate": 1.428235215813143e-05, "loss": 0.2284, "step": 19890 }, { "epoch": 0.3592218635119422, "grad_norm": 0.47894757986068726, "learning_rate": 1.427978896607124e-05, "loss": 0.3403, "step": 19895 }, { "epoch": 0.35931214294484287, "grad_norm": 0.4499393105506897, "learning_rate": 1.4277225429733146e-05, "loss": 0.2867, "step": 19900 }, { "epoch": 0.3594024223777436, "grad_norm": 0.36178120970726013, "learning_rate": 1.4274661549323363e-05, "loss": 0.3247, "step": 19905 }, { "epoch": 0.3594927018106443, "grad_norm": 0.5082676410675049, "learning_rate": 1.4272097325048135e-05, "loss": 0.3115, "step": 19910 }, { "epoch": 0.35958298124354504, "grad_norm": 0.4348970055580139, "learning_rate": 1.4269532757113743e-05, "loss": 0.2576, "step": 19915 }, { "epoch": 0.3596732606764457, "grad_norm": 0.34213346242904663, "learning_rate": 1.4266967845726482e-05, "loss": 0.2346, "step": 19920 }, { "epoch": 0.35976354010934647, "grad_norm": 0.5642054080963135, "learning_rate": 1.4264402591092678e-05, "loss": 0.3248, "step": 19925 }, { "epoch": 0.35985381954224716, "grad_norm": 0.3973153233528137, "learning_rate": 1.4261836993418695e-05, "loss": 0.2369, "step": 19930 }, { "epoch": 0.3599440989751479, "grad_norm": 0.5653963685035706, "learning_rate": 1.4259271052910912e-05, "loss": 0.2373, "step": 19935 }, { "epoch": 0.3600343784080486, "grad_norm": 0.4134688079357147, "learning_rate": 1.4256704769775742e-05, "loss": 0.2033, "step": 19940 }, { "epoch": 0.3601246578409493, "grad_norm": 0.5514252185821533, "learning_rate": 1.4254138144219623e-05, "loss": 0.2597, "step": 19945 }, { "epoch": 0.36021493727385, "grad_norm": 0.34145841002464294, "learning_rate": 1.4251571176449018e-05, "loss": 0.2873, "step": 19950 }, { "epoch": 0.36030521670675075, "grad_norm": 0.7172877192497253, "learning_rate": 1.424900386667042e-05, "loss": 0.3498, "step": 19955 }, { "epoch": 0.36039549613965144, "grad_norm": 0.1886661797761917, "learning_rate": 1.4246436215090361e-05, "loss": 0.3121, "step": 19960 }, { "epoch": 0.3604857755725522, "grad_norm": 0.595154881477356, "learning_rate": 1.4243868221915382e-05, "loss": 0.2444, "step": 19965 }, { "epoch": 0.36057605500545287, "grad_norm": 0.5034096240997314, "learning_rate": 1.4241299887352059e-05, "loss": 0.276, "step": 19970 }, { "epoch": 0.3606663344383536, "grad_norm": 0.4864138960838318, "learning_rate": 1.4238731211606995e-05, "loss": 0.2873, "step": 19975 }, { "epoch": 0.3607566138712543, "grad_norm": 0.5148269534111023, "learning_rate": 1.4236162194886827e-05, "loss": 0.2586, "step": 19980 }, { "epoch": 0.36084689330415504, "grad_norm": 0.455869197845459, "learning_rate": 1.4233592837398209e-05, "loss": 0.2671, "step": 19985 }, { "epoch": 0.3609371727370557, "grad_norm": 0.3940732479095459, "learning_rate": 1.4231023139347829e-05, "loss": 0.3339, "step": 19990 }, { "epoch": 0.36102745216995646, "grad_norm": 0.6617382764816284, "learning_rate": 1.42284531009424e-05, "loss": 0.3365, "step": 19995 }, { "epoch": 0.36111773160285715, "grad_norm": 0.23632988333702087, "learning_rate": 1.4225882722388662e-05, "loss": 0.2272, "step": 20000 }, { "epoch": 0.3612080110357579, "grad_norm": 0.3209882378578186, "learning_rate": 1.4223312003893383e-05, "loss": 0.2649, "step": 20005 }, { "epoch": 0.3612982904686586, "grad_norm": 0.5351271629333496, "learning_rate": 1.4220740945663361e-05, "loss": 0.2702, "step": 20010 }, { "epoch": 0.3613885699015593, "grad_norm": 1.513425350189209, "learning_rate": 1.4218169547905417e-05, "loss": 0.2621, "step": 20015 }, { "epoch": 0.36147884933446, "grad_norm": 0.35597509145736694, "learning_rate": 1.4215597810826405e-05, "loss": 0.299, "step": 20020 }, { "epoch": 0.36156912876736075, "grad_norm": 0.3152241110801697, "learning_rate": 1.4213025734633197e-05, "loss": 0.2417, "step": 20025 }, { "epoch": 0.36165940820026143, "grad_norm": 0.5848614573478699, "learning_rate": 1.4210453319532702e-05, "loss": 0.2618, "step": 20030 }, { "epoch": 0.3617496876331622, "grad_norm": 0.33715879917144775, "learning_rate": 1.420788056573185e-05, "loss": 0.2325, "step": 20035 }, { "epoch": 0.36183996706606286, "grad_norm": 0.6367013454437256, "learning_rate": 1.4205307473437597e-05, "loss": 0.2358, "step": 20040 }, { "epoch": 0.3619302464989636, "grad_norm": 0.38860565423965454, "learning_rate": 1.4202734042856938e-05, "loss": 0.3227, "step": 20045 }, { "epoch": 0.3620205259318643, "grad_norm": 0.5168018937110901, "learning_rate": 1.4200160274196881e-05, "loss": 0.3169, "step": 20050 }, { "epoch": 0.36211080536476503, "grad_norm": 0.39503562450408936, "learning_rate": 1.4197586167664466e-05, "loss": 0.2773, "step": 20055 }, { "epoch": 0.3622010847976657, "grad_norm": 0.48961323499679565, "learning_rate": 1.4195011723466765e-05, "loss": 0.238, "step": 20060 }, { "epoch": 0.36229136423056646, "grad_norm": 0.43903133273124695, "learning_rate": 1.4192436941810872e-05, "loss": 0.2567, "step": 20065 }, { "epoch": 0.36238164366346715, "grad_norm": 0.5009246468544006, "learning_rate": 1.4189861822903907e-05, "loss": 0.2474, "step": 20070 }, { "epoch": 0.3624719230963679, "grad_norm": 0.3318796455860138, "learning_rate": 1.4187286366953022e-05, "loss": 0.3398, "step": 20075 }, { "epoch": 0.3625622025292686, "grad_norm": 0.7951874136924744, "learning_rate": 1.4184710574165394e-05, "loss": 0.2387, "step": 20080 }, { "epoch": 0.3626524819621693, "grad_norm": 0.3770418167114258, "learning_rate": 1.4182134444748226e-05, "loss": 0.2669, "step": 20085 }, { "epoch": 0.36274276139507, "grad_norm": 0.32762718200683594, "learning_rate": 1.4179557978908748e-05, "loss": 0.2638, "step": 20090 }, { "epoch": 0.36283304082797074, "grad_norm": 0.637208878993988, "learning_rate": 1.4176981176854221e-05, "loss": 0.2871, "step": 20095 }, { "epoch": 0.36292332026087143, "grad_norm": 0.44785478711128235, "learning_rate": 1.4174404038791927e-05, "loss": 0.3969, "step": 20100 }, { "epoch": 0.36301359969377217, "grad_norm": 0.7451809644699097, "learning_rate": 1.4171826564929178e-05, "loss": 0.2978, "step": 20105 }, { "epoch": 0.36310387912667286, "grad_norm": 0.4416932761669159, "learning_rate": 1.4169248755473315e-05, "loss": 0.262, "step": 20110 }, { "epoch": 0.3631941585595736, "grad_norm": 0.5423164963722229, "learning_rate": 1.41666706106317e-05, "loss": 0.3489, "step": 20115 }, { "epoch": 0.3632844379924743, "grad_norm": 0.461900919675827, "learning_rate": 1.4164092130611735e-05, "loss": 0.3419, "step": 20120 }, { "epoch": 0.363374717425375, "grad_norm": 0.7312812805175781, "learning_rate": 1.416151331562083e-05, "loss": 0.325, "step": 20125 }, { "epoch": 0.3634649968582757, "grad_norm": 0.3819325566291809, "learning_rate": 1.4158934165866436e-05, "loss": 0.2519, "step": 20130 }, { "epoch": 0.36355527629117645, "grad_norm": 0.2581886053085327, "learning_rate": 1.4156354681556028e-05, "loss": 0.224, "step": 20135 }, { "epoch": 0.36364555572407714, "grad_norm": 0.44829660654067993, "learning_rate": 1.4153774862897102e-05, "loss": 0.2847, "step": 20140 }, { "epoch": 0.3637358351569779, "grad_norm": 0.4747091233730316, "learning_rate": 1.4151194710097194e-05, "loss": 0.2449, "step": 20145 }, { "epoch": 0.36382611458987857, "grad_norm": 0.9994378089904785, "learning_rate": 1.4148614223363852e-05, "loss": 0.2027, "step": 20150 }, { "epoch": 0.3639163940227793, "grad_norm": 0.3756590187549591, "learning_rate": 1.4146033402904657e-05, "loss": 0.2202, "step": 20155 }, { "epoch": 0.36400667345568, "grad_norm": 0.6926841139793396, "learning_rate": 1.4143452248927223e-05, "loss": 0.2173, "step": 20160 }, { "epoch": 0.36409695288858074, "grad_norm": 0.4116734266281128, "learning_rate": 1.4140870761639178e-05, "loss": 0.2754, "step": 20165 }, { "epoch": 0.3641872323214814, "grad_norm": 0.49817314743995667, "learning_rate": 1.4138288941248189e-05, "loss": 0.2571, "step": 20170 }, { "epoch": 0.36427751175438217, "grad_norm": 0.2756151854991913, "learning_rate": 1.413570678796194e-05, "loss": 0.2055, "step": 20175 }, { "epoch": 0.36436779118728285, "grad_norm": 1.4359185695648193, "learning_rate": 1.4133124301988155e-05, "loss": 0.379, "step": 20180 }, { "epoch": 0.3644580706201836, "grad_norm": 0.6018916368484497, "learning_rate": 1.4130541483534567e-05, "loss": 0.2789, "step": 20185 }, { "epoch": 0.3645483500530843, "grad_norm": 0.3269156217575073, "learning_rate": 1.4127958332808946e-05, "loss": 0.2321, "step": 20190 }, { "epoch": 0.364638629485985, "grad_norm": 0.4135490357875824, "learning_rate": 1.4125374850019095e-05, "loss": 0.2917, "step": 20195 }, { "epoch": 0.3647289089188857, "grad_norm": 0.23466555774211884, "learning_rate": 1.4122791035372831e-05, "loss": 0.2085, "step": 20200 }, { "epoch": 0.36481918835178645, "grad_norm": 0.800495982170105, "learning_rate": 1.4120206889078003e-05, "loss": 0.2336, "step": 20205 }, { "epoch": 0.36490946778468714, "grad_norm": 0.5827568173408508, "learning_rate": 1.4117622411342486e-05, "loss": 0.3036, "step": 20210 }, { "epoch": 0.3649997472175879, "grad_norm": 1.102327585220337, "learning_rate": 1.4115037602374188e-05, "loss": 0.257, "step": 20215 }, { "epoch": 0.3650900266504886, "grad_norm": 0.7294953465461731, "learning_rate": 1.411245246238103e-05, "loss": 0.2424, "step": 20220 }, { "epoch": 0.3651803060833893, "grad_norm": 0.4187539219856262, "learning_rate": 1.4109866991570975e-05, "loss": 0.2634, "step": 20225 }, { "epoch": 0.36527058551629005, "grad_norm": 0.40374043583869934, "learning_rate": 1.4107281190152e-05, "loss": 0.2914, "step": 20230 }, { "epoch": 0.36536086494919073, "grad_norm": 0.9304539561271667, "learning_rate": 1.4104695058332119e-05, "loss": 0.3806, "step": 20235 }, { "epoch": 0.3654511443820915, "grad_norm": 0.7214563488960266, "learning_rate": 1.4102108596319359e-05, "loss": 0.3112, "step": 20240 }, { "epoch": 0.36554142381499216, "grad_norm": 0.446776807308197, "learning_rate": 1.4099521804321792e-05, "loss": 0.2244, "step": 20245 }, { "epoch": 0.3656317032478929, "grad_norm": 0.568058431148529, "learning_rate": 1.40969346825475e-05, "loss": 0.2412, "step": 20250 }, { "epoch": 0.3657219826807936, "grad_norm": 0.7030618190765381, "learning_rate": 1.4094347231204602e-05, "loss": 0.1724, "step": 20255 }, { "epoch": 0.36581226211369433, "grad_norm": 0.3723071217536926, "learning_rate": 1.4091759450501236e-05, "loss": 0.3529, "step": 20260 }, { "epoch": 0.365902541546595, "grad_norm": 0.7579772472381592, "learning_rate": 1.4089171340645575e-05, "loss": 0.2549, "step": 20265 }, { "epoch": 0.36599282097949576, "grad_norm": 0.4400385916233063, "learning_rate": 1.408658290184581e-05, "loss": 0.2669, "step": 20270 }, { "epoch": 0.36608310041239644, "grad_norm": 0.39168331027030945, "learning_rate": 1.4083994134310162e-05, "loss": 0.2692, "step": 20275 }, { "epoch": 0.3661733798452972, "grad_norm": 0.44686418771743774, "learning_rate": 1.408140503824688e-05, "loss": 0.303, "step": 20280 }, { "epoch": 0.3662636592781979, "grad_norm": 0.48944777250289917, "learning_rate": 1.4078815613864239e-05, "loss": 0.2626, "step": 20285 }, { "epoch": 0.3663539387110986, "grad_norm": 0.4091298282146454, "learning_rate": 1.4076225861370531e-05, "loss": 0.216, "step": 20290 }, { "epoch": 0.3664442181439993, "grad_norm": 0.4132739305496216, "learning_rate": 1.4073635780974097e-05, "loss": 0.2436, "step": 20295 }, { "epoch": 0.36653449757690004, "grad_norm": 0.31656551361083984, "learning_rate": 1.4071045372883281e-05, "loss": 0.2046, "step": 20300 }, { "epoch": 0.36662477700980073, "grad_norm": 0.38056060671806335, "learning_rate": 1.4068454637306461e-05, "loss": 0.265, "step": 20305 }, { "epoch": 0.36671505644270147, "grad_norm": 0.44137445092201233, "learning_rate": 1.406586357445205e-05, "loss": 0.3173, "step": 20310 }, { "epoch": 0.36680533587560216, "grad_norm": 0.7222954034805298, "learning_rate": 1.4063272184528474e-05, "loss": 0.276, "step": 20315 }, { "epoch": 0.3668956153085029, "grad_norm": 0.521718442440033, "learning_rate": 1.4060680467744192e-05, "loss": 0.2463, "step": 20320 }, { "epoch": 0.3669858947414036, "grad_norm": 0.5072159171104431, "learning_rate": 1.4058088424307693e-05, "loss": 0.2716, "step": 20325 }, { "epoch": 0.3670761741743043, "grad_norm": 1.0182414054870605, "learning_rate": 1.4055496054427485e-05, "loss": 0.2574, "step": 20330 }, { "epoch": 0.367166453607205, "grad_norm": 0.3587333858013153, "learning_rate": 1.4052903358312105e-05, "loss": 0.2077, "step": 20335 }, { "epoch": 0.36725673304010575, "grad_norm": 0.9112486243247986, "learning_rate": 1.4050310336170117e-05, "loss": 0.2609, "step": 20340 }, { "epoch": 0.36734701247300644, "grad_norm": 0.8448390364646912, "learning_rate": 1.4047716988210112e-05, "loss": 0.3009, "step": 20345 }, { "epoch": 0.3674372919059072, "grad_norm": 0.45320257544517517, "learning_rate": 1.4045123314640705e-05, "loss": 0.2496, "step": 20350 }, { "epoch": 0.36752757133880787, "grad_norm": 0.49984174966812134, "learning_rate": 1.4042529315670537e-05, "loss": 0.2178, "step": 20355 }, { "epoch": 0.3676178507717086, "grad_norm": 0.4586024880409241, "learning_rate": 1.4039934991508278e-05, "loss": 0.2059, "step": 20360 }, { "epoch": 0.3677081302046093, "grad_norm": 0.5028537511825562, "learning_rate": 1.4037340342362624e-05, "loss": 0.3675, "step": 20365 }, { "epoch": 0.36779840963751004, "grad_norm": 0.5076274275779724, "learning_rate": 1.4034745368442293e-05, "loss": 0.2774, "step": 20370 }, { "epoch": 0.3678886890704107, "grad_norm": 0.5486329793930054, "learning_rate": 1.403215006995603e-05, "loss": 0.22, "step": 20375 }, { "epoch": 0.36797896850331147, "grad_norm": 0.6385655999183655, "learning_rate": 1.4029554447112612e-05, "loss": 0.3082, "step": 20380 }, { "epoch": 0.36806924793621215, "grad_norm": 0.46644681692123413, "learning_rate": 1.4026958500120837e-05, "loss": 0.2894, "step": 20385 }, { "epoch": 0.3681595273691129, "grad_norm": 0.4819391369819641, "learning_rate": 1.4024362229189528e-05, "loss": 0.3046, "step": 20390 }, { "epoch": 0.3682498068020136, "grad_norm": 0.5041378140449524, "learning_rate": 1.4021765634527537e-05, "loss": 0.349, "step": 20395 }, { "epoch": 0.3683400862349143, "grad_norm": 0.6669679880142212, "learning_rate": 1.4019168716343748e-05, "loss": 0.3217, "step": 20400 }, { "epoch": 0.368430365667815, "grad_norm": 0.5393589735031128, "learning_rate": 1.4016571474847051e-05, "loss": 0.1945, "step": 20405 }, { "epoch": 0.36852064510071575, "grad_norm": 0.5151278972625732, "learning_rate": 1.4013973910246385e-05, "loss": 0.3173, "step": 20410 }, { "epoch": 0.36861092453361644, "grad_norm": 0.4417298138141632, "learning_rate": 1.4011376022750702e-05, "loss": 0.304, "step": 20415 }, { "epoch": 0.3687012039665172, "grad_norm": 0.48443835973739624, "learning_rate": 1.4008777812568983e-05, "loss": 0.3338, "step": 20420 }, { "epoch": 0.36879148339941786, "grad_norm": 0.4232576787471771, "learning_rate": 1.4006179279910237e-05, "loss": 0.2347, "step": 20425 }, { "epoch": 0.3688817628323186, "grad_norm": 0.4605163335800171, "learning_rate": 1.4003580424983497e-05, "loss": 0.324, "step": 20430 }, { "epoch": 0.3689720422652193, "grad_norm": 0.3433309495449066, "learning_rate": 1.4000981247997816e-05, "loss": 0.192, "step": 20435 }, { "epoch": 0.36906232169812003, "grad_norm": 0.34693244099617004, "learning_rate": 1.3998381749162286e-05, "loss": 0.2916, "step": 20440 }, { "epoch": 0.3691526011310207, "grad_norm": 0.42569494247436523, "learning_rate": 1.3995781928686016e-05, "loss": 0.3048, "step": 20445 }, { "epoch": 0.36924288056392146, "grad_norm": 0.7065058350563049, "learning_rate": 1.399318178677814e-05, "loss": 0.3531, "step": 20450 }, { "epoch": 0.36933315999682215, "grad_norm": 0.39248576760292053, "learning_rate": 1.3990581323647822e-05, "loss": 0.3205, "step": 20455 }, { "epoch": 0.3694234394297229, "grad_norm": 0.3955816924571991, "learning_rate": 1.3987980539504254e-05, "loss": 0.3107, "step": 20460 }, { "epoch": 0.3695137188626236, "grad_norm": 0.6594250202178955, "learning_rate": 1.3985379434556644e-05, "loss": 0.2847, "step": 20465 }, { "epoch": 0.3696039982955243, "grad_norm": 0.35042914748191833, "learning_rate": 1.3982778009014239e-05, "loss": 0.2377, "step": 20470 }, { "epoch": 0.369694277728425, "grad_norm": 0.6024788618087769, "learning_rate": 1.3980176263086299e-05, "loss": 0.3413, "step": 20475 }, { "epoch": 0.36978455716132574, "grad_norm": 0.3722987771034241, "learning_rate": 1.3977574196982115e-05, "loss": 0.2523, "step": 20480 }, { "epoch": 0.36987483659422643, "grad_norm": 0.4409797489643097, "learning_rate": 1.3974971810911009e-05, "loss": 0.3234, "step": 20485 }, { "epoch": 0.36996511602712717, "grad_norm": 0.5501575469970703, "learning_rate": 1.397236910508232e-05, "loss": 0.2888, "step": 20490 }, { "epoch": 0.37005539546002786, "grad_norm": 0.5797925591468811, "learning_rate": 1.396976607970542e-05, "loss": 0.3273, "step": 20495 }, { "epoch": 0.3701456748929286, "grad_norm": 0.47177791595458984, "learning_rate": 1.3967162734989703e-05, "loss": 0.224, "step": 20500 }, { "epoch": 0.3702359543258293, "grad_norm": 0.3789895176887512, "learning_rate": 1.3964559071144584e-05, "loss": 0.2279, "step": 20505 }, { "epoch": 0.37032623375873003, "grad_norm": 0.35565948486328125, "learning_rate": 1.3961955088379516e-05, "loss": 0.1786, "step": 20510 }, { "epoch": 0.3704165131916307, "grad_norm": 0.40913644433021545, "learning_rate": 1.3959350786903968e-05, "loss": 0.1979, "step": 20515 }, { "epoch": 0.37050679262453146, "grad_norm": 0.4691525995731354, "learning_rate": 1.3956746166927433e-05, "loss": 0.2929, "step": 20520 }, { "epoch": 0.37059707205743214, "grad_norm": 0.328995019197464, "learning_rate": 1.3954141228659439e-05, "loss": 0.2487, "step": 20525 }, { "epoch": 0.3706873514903329, "grad_norm": 0.3796280324459076, "learning_rate": 1.3951535972309533e-05, "loss": 0.3008, "step": 20530 }, { "epoch": 0.37077763092323357, "grad_norm": 0.4332354962825775, "learning_rate": 1.3948930398087288e-05, "loss": 0.2755, "step": 20535 }, { "epoch": 0.3708679103561343, "grad_norm": 0.4433010220527649, "learning_rate": 1.3946324506202301e-05, "loss": 0.2256, "step": 20540 }, { "epoch": 0.370958189789035, "grad_norm": 0.38618332147598267, "learning_rate": 1.3943718296864205e-05, "loss": 0.3062, "step": 20545 }, { "epoch": 0.37104846922193574, "grad_norm": 0.48134633898735046, "learning_rate": 1.3941111770282644e-05, "loss": 0.2413, "step": 20550 }, { "epoch": 0.3711387486548364, "grad_norm": 1.5840861797332764, "learning_rate": 1.3938504926667295e-05, "loss": 0.3481, "step": 20555 }, { "epoch": 0.37122902808773717, "grad_norm": 0.6157742738723755, "learning_rate": 1.3935897766227862e-05, "loss": 0.3665, "step": 20560 }, { "epoch": 0.37131930752063785, "grad_norm": 0.40346086025238037, "learning_rate": 1.3933290289174069e-05, "loss": 0.3293, "step": 20565 }, { "epoch": 0.3714095869535386, "grad_norm": 0.41430744528770447, "learning_rate": 1.393068249571567e-05, "loss": 0.2168, "step": 20570 }, { "epoch": 0.3714998663864393, "grad_norm": 0.435720294713974, "learning_rate": 1.3928074386062442e-05, "loss": 0.2679, "step": 20575 }, { "epoch": 0.37159014581934, "grad_norm": 0.3936663568019867, "learning_rate": 1.392546596042419e-05, "loss": 0.352, "step": 20580 }, { "epoch": 0.3716804252522407, "grad_norm": 0.6342301368713379, "learning_rate": 1.3922857219010746e-05, "loss": 0.1906, "step": 20585 }, { "epoch": 0.37177070468514145, "grad_norm": 0.4912501573562622, "learning_rate": 1.3920248162031957e-05, "loss": 0.284, "step": 20590 }, { "epoch": 0.37186098411804214, "grad_norm": 0.47178614139556885, "learning_rate": 1.391763878969771e-05, "loss": 0.2427, "step": 20595 }, { "epoch": 0.3719512635509429, "grad_norm": 0.7745219469070435, "learning_rate": 1.3915029102217902e-05, "loss": 0.2535, "step": 20600 }, { "epoch": 0.37204154298384357, "grad_norm": 0.36101463437080383, "learning_rate": 1.3912419099802468e-05, "loss": 0.2445, "step": 20605 }, { "epoch": 0.3721318224167443, "grad_norm": 0.470927357673645, "learning_rate": 1.3909808782661368e-05, "loss": 0.2573, "step": 20610 }, { "epoch": 0.37222210184964505, "grad_norm": 0.39924320578575134, "learning_rate": 1.3907198151004572e-05, "loss": 0.3407, "step": 20615 }, { "epoch": 0.37231238128254573, "grad_norm": 0.6046448349952698, "learning_rate": 1.3904587205042096e-05, "loss": 0.1862, "step": 20620 }, { "epoch": 0.3724026607154465, "grad_norm": 0.4899580776691437, "learning_rate": 1.390197594498397e-05, "loss": 0.2631, "step": 20625 }, { "epoch": 0.37249294014834716, "grad_norm": 0.6295987963676453, "learning_rate": 1.3899364371040248e-05, "loss": 0.2755, "step": 20630 }, { "epoch": 0.3725832195812479, "grad_norm": 0.5494354367256165, "learning_rate": 1.3896752483421013e-05, "loss": 0.3084, "step": 20635 }, { "epoch": 0.3726734990141486, "grad_norm": 0.4012296199798584, "learning_rate": 1.3894140282336372e-05, "loss": 0.2626, "step": 20640 }, { "epoch": 0.37276377844704933, "grad_norm": 0.42580288648605347, "learning_rate": 1.389152776799646e-05, "loss": 0.3907, "step": 20645 }, { "epoch": 0.37285405787995, "grad_norm": 0.42306599020957947, "learning_rate": 1.3888914940611433e-05, "loss": 0.2946, "step": 20650 }, { "epoch": 0.37294433731285076, "grad_norm": 0.3592524230480194, "learning_rate": 1.3886301800391473e-05, "loss": 0.3285, "step": 20655 }, { "epoch": 0.37303461674575145, "grad_norm": 0.37943747639656067, "learning_rate": 1.3883688347546791e-05, "loss": 0.2716, "step": 20660 }, { "epoch": 0.3731248961786522, "grad_norm": 0.4590633809566498, "learning_rate": 1.3881074582287616e-05, "loss": 0.1602, "step": 20665 }, { "epoch": 0.3732151756115529, "grad_norm": 0.4163933992385864, "learning_rate": 1.3878460504824211e-05, "loss": 0.2677, "step": 20670 }, { "epoch": 0.3733054550444536, "grad_norm": 0.5285905003547668, "learning_rate": 1.387584611536686e-05, "loss": 0.1799, "step": 20675 }, { "epoch": 0.3733957344773543, "grad_norm": 0.4583968222141266, "learning_rate": 1.3873231414125868e-05, "loss": 0.2622, "step": 20680 }, { "epoch": 0.37348601391025504, "grad_norm": 0.4935058653354645, "learning_rate": 1.3870616401311565e-05, "loss": 0.2088, "step": 20685 }, { "epoch": 0.37357629334315573, "grad_norm": 0.5065711736679077, "learning_rate": 1.386800107713432e-05, "loss": 0.2332, "step": 20690 }, { "epoch": 0.37366657277605647, "grad_norm": 0.43563568592071533, "learning_rate": 1.3865385441804508e-05, "loss": 0.249, "step": 20695 }, { "epoch": 0.37375685220895716, "grad_norm": 0.46323660016059875, "learning_rate": 1.3862769495532542e-05, "loss": 0.1794, "step": 20700 }, { "epoch": 0.3738471316418579, "grad_norm": 0.6760948300361633, "learning_rate": 1.3860153238528855e-05, "loss": 0.3137, "step": 20705 }, { "epoch": 0.3739374110747586, "grad_norm": 0.4988843500614166, "learning_rate": 1.3857536671003908e-05, "loss": 0.3945, "step": 20710 }, { "epoch": 0.3740276905076593, "grad_norm": 0.5700904726982117, "learning_rate": 1.3854919793168183e-05, "loss": 0.2762, "step": 20715 }, { "epoch": 0.37411796994056, "grad_norm": 0.8820415139198303, "learning_rate": 1.3852302605232186e-05, "loss": 0.1863, "step": 20720 }, { "epoch": 0.37420824937346076, "grad_norm": 0.35778066515922546, "learning_rate": 1.3849685107406454e-05, "loss": 0.306, "step": 20725 }, { "epoch": 0.37429852880636144, "grad_norm": 0.3076165020465851, "learning_rate": 1.3847067299901547e-05, "loss": 0.1948, "step": 20730 }, { "epoch": 0.3743888082392622, "grad_norm": 0.5232268571853638, "learning_rate": 1.3844449182928044e-05, "loss": 0.1947, "step": 20735 }, { "epoch": 0.37447908767216287, "grad_norm": 0.5433857440948486, "learning_rate": 1.3841830756696557e-05, "loss": 0.2965, "step": 20740 }, { "epoch": 0.3745693671050636, "grad_norm": 0.35156840085983276, "learning_rate": 1.3839212021417718e-05, "loss": 0.1864, "step": 20745 }, { "epoch": 0.3746596465379643, "grad_norm": 0.36232057213783264, "learning_rate": 1.3836592977302188e-05, "loss": 0.1613, "step": 20750 }, { "epoch": 0.37474992597086504, "grad_norm": 0.33974605798721313, "learning_rate": 1.3833973624560644e-05, "loss": 0.2699, "step": 20755 }, { "epoch": 0.3748402054037657, "grad_norm": 0.2400882989168167, "learning_rate": 1.38313539634038e-05, "loss": 0.3049, "step": 20760 }, { "epoch": 0.37493048483666647, "grad_norm": 0.3881498873233795, "learning_rate": 1.3828733994042387e-05, "loss": 0.1705, "step": 20765 }, { "epoch": 0.37502076426956715, "grad_norm": 0.6005303263664246, "learning_rate": 1.3826113716687162e-05, "loss": 0.2377, "step": 20770 }, { "epoch": 0.3751110437024679, "grad_norm": 0.8644828200340271, "learning_rate": 1.3823493131548906e-05, "loss": 0.3372, "step": 20775 }, { "epoch": 0.3752013231353686, "grad_norm": 0.37639057636260986, "learning_rate": 1.3820872238838426e-05, "loss": 0.3125, "step": 20780 }, { "epoch": 0.3752916025682693, "grad_norm": 0.33708035945892334, "learning_rate": 1.3818251038766555e-05, "loss": 0.3017, "step": 20785 }, { "epoch": 0.37538188200117, "grad_norm": 0.28124114871025085, "learning_rate": 1.381562953154415e-05, "loss": 0.2504, "step": 20790 }, { "epoch": 0.37547216143407075, "grad_norm": 0.5806324481964111, "learning_rate": 1.3813007717382092e-05, "loss": 0.307, "step": 20795 }, { "epoch": 0.37556244086697144, "grad_norm": 0.43579456210136414, "learning_rate": 1.3810385596491282e-05, "loss": 0.2074, "step": 20800 }, { "epoch": 0.3756527202998722, "grad_norm": 0.34230563044548035, "learning_rate": 1.380776316908266e-05, "loss": 0.2473, "step": 20805 }, { "epoch": 0.37574299973277286, "grad_norm": 0.5098487734794617, "learning_rate": 1.3805140435367177e-05, "loss": 0.2555, "step": 20810 }, { "epoch": 0.3758332791656736, "grad_norm": 0.6360251307487488, "learning_rate": 1.380251739555581e-05, "loss": 0.3145, "step": 20815 }, { "epoch": 0.3759235585985743, "grad_norm": 0.3359304666519165, "learning_rate": 1.3799894049859563e-05, "loss": 0.3457, "step": 20820 }, { "epoch": 0.37601383803147503, "grad_norm": 0.7483426928520203, "learning_rate": 1.3797270398489471e-05, "loss": 0.3247, "step": 20825 }, { "epoch": 0.3761041174643757, "grad_norm": 0.31878402829170227, "learning_rate": 1.3794646441656583e-05, "loss": 0.1573, "step": 20830 }, { "epoch": 0.37619439689727646, "grad_norm": 0.3818129897117615, "learning_rate": 1.3792022179571977e-05, "loss": 0.2322, "step": 20835 }, { "epoch": 0.37628467633017715, "grad_norm": 0.4125143587589264, "learning_rate": 1.378939761244676e-05, "loss": 0.2426, "step": 20840 }, { "epoch": 0.3763749557630779, "grad_norm": 0.37870392203330994, "learning_rate": 1.3786772740492055e-05, "loss": 0.2716, "step": 20845 }, { "epoch": 0.3764652351959786, "grad_norm": 0.7333554625511169, "learning_rate": 1.378414756391902e-05, "loss": 0.2721, "step": 20850 }, { "epoch": 0.3765555146288793, "grad_norm": 0.4476829171180725, "learning_rate": 1.378152208293882e-05, "loss": 0.1462, "step": 20855 }, { "epoch": 0.37664579406178, "grad_norm": 0.33928850293159485, "learning_rate": 1.377889629776267e-05, "loss": 0.2591, "step": 20860 }, { "epoch": 0.37673607349468075, "grad_norm": 0.4483809769153595, "learning_rate": 1.377627020860178e-05, "loss": 0.2472, "step": 20865 }, { "epoch": 0.37682635292758143, "grad_norm": 0.3207505941390991, "learning_rate": 1.3773643815667413e-05, "loss": 0.257, "step": 20870 }, { "epoch": 0.3769166323604822, "grad_norm": 0.47099733352661133, "learning_rate": 1.3771017119170838e-05, "loss": 0.1816, "step": 20875 }, { "epoch": 0.37700691179338286, "grad_norm": 0.4887351989746094, "learning_rate": 1.3768390119323352e-05, "loss": 0.2619, "step": 20880 }, { "epoch": 0.3770971912262836, "grad_norm": 0.42729565501213074, "learning_rate": 1.3765762816336279e-05, "loss": 0.2753, "step": 20885 }, { "epoch": 0.3771874706591843, "grad_norm": 1.1441235542297363, "learning_rate": 1.3763135210420971e-05, "loss": 0.3516, "step": 20890 }, { "epoch": 0.37727775009208503, "grad_norm": 0.18297132849693298, "learning_rate": 1.3760507301788792e-05, "loss": 0.3361, "step": 20895 }, { "epoch": 0.3773680295249857, "grad_norm": 0.43833106756210327, "learning_rate": 1.3757879090651143e-05, "loss": 0.3026, "step": 20900 }, { "epoch": 0.37745830895788646, "grad_norm": 0.43935343623161316, "learning_rate": 1.3755250577219443e-05, "loss": 0.2527, "step": 20905 }, { "epoch": 0.37754858839078714, "grad_norm": 0.7618777751922607, "learning_rate": 1.375262176170514e-05, "loss": 0.2651, "step": 20910 }, { "epoch": 0.3776388678236879, "grad_norm": 0.434602290391922, "learning_rate": 1.3749992644319701e-05, "loss": 0.2037, "step": 20915 }, { "epoch": 0.37772914725658857, "grad_norm": 0.8464885354042053, "learning_rate": 1.3747363225274615e-05, "loss": 0.266, "step": 20920 }, { "epoch": 0.3778194266894893, "grad_norm": 0.42587873339653015, "learning_rate": 1.3744733504781405e-05, "loss": 0.2134, "step": 20925 }, { "epoch": 0.37790970612239, "grad_norm": 0.3875392973423004, "learning_rate": 1.3742103483051612e-05, "loss": 0.2555, "step": 20930 }, { "epoch": 0.37799998555529074, "grad_norm": 0.4170796573162079, "learning_rate": 1.3739473160296798e-05, "loss": 0.2634, "step": 20935 }, { "epoch": 0.3780902649881914, "grad_norm": 0.411149799823761, "learning_rate": 1.3736842536728563e-05, "loss": 0.239, "step": 20940 }, { "epoch": 0.37818054442109217, "grad_norm": 0.4498842656612396, "learning_rate": 1.3734211612558513e-05, "loss": 0.2336, "step": 20945 }, { "epoch": 0.37827082385399285, "grad_norm": 0.382417231798172, "learning_rate": 1.373158038799829e-05, "loss": 0.3019, "step": 20950 }, { "epoch": 0.3783611032868936, "grad_norm": 0.3414664566516876, "learning_rate": 1.3728948863259555e-05, "loss": 0.3018, "step": 20955 }, { "epoch": 0.3784513827197943, "grad_norm": 0.5627580285072327, "learning_rate": 1.3726317038553996e-05, "loss": 0.2927, "step": 20960 }, { "epoch": 0.378541662152695, "grad_norm": 0.36109137535095215, "learning_rate": 1.372368491409332e-05, "loss": 0.2494, "step": 20965 }, { "epoch": 0.3786319415855957, "grad_norm": 0.2758963406085968, "learning_rate": 1.3721052490089272e-05, "loss": 0.2305, "step": 20970 }, { "epoch": 0.37872222101849645, "grad_norm": 0.37376853823661804, "learning_rate": 1.3718419766753603e-05, "loss": 0.2622, "step": 20975 }, { "epoch": 0.37881250045139714, "grad_norm": 0.47796663641929626, "learning_rate": 1.3715786744298103e-05, "loss": 0.335, "step": 20980 }, { "epoch": 0.3789027798842979, "grad_norm": 0.4772331118583679, "learning_rate": 1.3713153422934571e-05, "loss": 0.3328, "step": 20985 }, { "epoch": 0.37899305931719857, "grad_norm": 0.29484912753105164, "learning_rate": 1.3710519802874847e-05, "loss": 0.2899, "step": 20990 }, { "epoch": 0.3790833387500993, "grad_norm": 0.3672797977924347, "learning_rate": 1.3707885884330781e-05, "loss": 0.3458, "step": 20995 }, { "epoch": 0.379173618183, "grad_norm": 0.33409571647644043, "learning_rate": 1.3705251667514255e-05, "loss": 0.2574, "step": 21000 }, { "epoch": 0.37926389761590074, "grad_norm": 0.35732537508010864, "learning_rate": 1.370261715263717e-05, "loss": 0.2169, "step": 21005 }, { "epoch": 0.3793541770488015, "grad_norm": 0.4787987768650055, "learning_rate": 1.3699982339911459e-05, "loss": 0.2929, "step": 21010 }, { "epoch": 0.37944445648170216, "grad_norm": 0.566339910030365, "learning_rate": 1.369734722954907e-05, "loss": 0.3447, "step": 21015 }, { "epoch": 0.3795347359146029, "grad_norm": 0.5799692869186401, "learning_rate": 1.3694711821761976e-05, "loss": 0.2302, "step": 21020 }, { "epoch": 0.3796250153475036, "grad_norm": 0.5670090913772583, "learning_rate": 1.369207611676218e-05, "loss": 0.2486, "step": 21025 }, { "epoch": 0.37971529478040433, "grad_norm": 0.4892391264438629, "learning_rate": 1.3689440114761708e-05, "loss": 0.3051, "step": 21030 }, { "epoch": 0.379805574213305, "grad_norm": 0.6479849815368652, "learning_rate": 1.3686803815972599e-05, "loss": 0.2842, "step": 21035 }, { "epoch": 0.37989585364620576, "grad_norm": 0.37288862466812134, "learning_rate": 1.3684167220606929e-05, "loss": 0.2737, "step": 21040 }, { "epoch": 0.37998613307910645, "grad_norm": 0.4827922284603119, "learning_rate": 1.3681530328876798e-05, "loss": 0.2171, "step": 21045 }, { "epoch": 0.3800764125120072, "grad_norm": 0.49152135848999023, "learning_rate": 1.3678893140994314e-05, "loss": 0.3088, "step": 21050 }, { "epoch": 0.3801666919449079, "grad_norm": 0.40357691049575806, "learning_rate": 1.3676255657171627e-05, "loss": 0.3664, "step": 21055 }, { "epoch": 0.3802569713778086, "grad_norm": 0.5374104976654053, "learning_rate": 1.3673617877620902e-05, "loss": 0.3357, "step": 21060 }, { "epoch": 0.3803472508107093, "grad_norm": 0.5262294411659241, "learning_rate": 1.3670979802554325e-05, "loss": 0.1575, "step": 21065 }, { "epoch": 0.38043753024361004, "grad_norm": 0.48867297172546387, "learning_rate": 1.3668341432184116e-05, "loss": 0.2814, "step": 21070 }, { "epoch": 0.38052780967651073, "grad_norm": 0.2917868196964264, "learning_rate": 1.366570276672251e-05, "loss": 0.1978, "step": 21075 }, { "epoch": 0.3806180891094115, "grad_norm": 0.4150635004043579, "learning_rate": 1.366306380638177e-05, "loss": 0.3177, "step": 21080 }, { "epoch": 0.38070836854231216, "grad_norm": 0.45227259397506714, "learning_rate": 1.3660424551374175e-05, "loss": 0.2428, "step": 21085 }, { "epoch": 0.3807986479752129, "grad_norm": 0.43083256483078003, "learning_rate": 1.3657785001912045e-05, "loss": 0.349, "step": 21090 }, { "epoch": 0.3808889274081136, "grad_norm": 0.36342352628707886, "learning_rate": 1.3655145158207703e-05, "loss": 0.2375, "step": 21095 }, { "epoch": 0.38097920684101433, "grad_norm": 0.9517899751663208, "learning_rate": 1.3652505020473509e-05, "loss": 0.2719, "step": 21100 }, { "epoch": 0.381069486273915, "grad_norm": 0.6163972616195679, "learning_rate": 1.3649864588921841e-05, "loss": 0.3468, "step": 21105 }, { "epoch": 0.38115976570681576, "grad_norm": 0.6210305094718933, "learning_rate": 1.3647223863765107e-05, "loss": 0.2118, "step": 21110 }, { "epoch": 0.38125004513971644, "grad_norm": 0.5331239700317383, "learning_rate": 1.364458284521573e-05, "loss": 0.3312, "step": 21115 }, { "epoch": 0.3813403245726172, "grad_norm": 0.7067062854766846, "learning_rate": 1.3641941533486158e-05, "loss": 0.3288, "step": 21120 }, { "epoch": 0.38143060400551787, "grad_norm": 0.4058865010738373, "learning_rate": 1.3639299928788875e-05, "loss": 0.2042, "step": 21125 }, { "epoch": 0.3815208834384186, "grad_norm": 0.3381866216659546, "learning_rate": 1.363665803133637e-05, "loss": 0.2507, "step": 21130 }, { "epoch": 0.3816111628713193, "grad_norm": 0.5158727765083313, "learning_rate": 1.3634015841341168e-05, "loss": 0.2603, "step": 21135 }, { "epoch": 0.38170144230422004, "grad_norm": 0.6629533171653748, "learning_rate": 1.3631373359015813e-05, "loss": 0.2761, "step": 21140 }, { "epoch": 0.3817917217371207, "grad_norm": 0.6049644947052002, "learning_rate": 1.3628730584572874e-05, "loss": 0.2861, "step": 21145 }, { "epoch": 0.38188200117002147, "grad_norm": 0.4999704658985138, "learning_rate": 1.362608751822494e-05, "loss": 0.3413, "step": 21150 }, { "epoch": 0.38197228060292215, "grad_norm": 0.3376184403896332, "learning_rate": 1.3623444160184633e-05, "loss": 0.2189, "step": 21155 }, { "epoch": 0.3820625600358229, "grad_norm": 0.4643104672431946, "learning_rate": 1.3620800510664588e-05, "loss": 0.2927, "step": 21160 }, { "epoch": 0.3821528394687236, "grad_norm": 0.37722495198249817, "learning_rate": 1.3618156569877466e-05, "loss": 0.2584, "step": 21165 }, { "epoch": 0.3822431189016243, "grad_norm": 0.4606938660144806, "learning_rate": 1.3615512338035957e-05, "loss": 0.304, "step": 21170 }, { "epoch": 0.382333398334525, "grad_norm": 0.31286484003067017, "learning_rate": 1.3612867815352766e-05, "loss": 0.1905, "step": 21175 }, { "epoch": 0.38242367776742575, "grad_norm": 0.37826356291770935, "learning_rate": 1.3610223002040628e-05, "loss": 0.1855, "step": 21180 }, { "epoch": 0.38251395720032644, "grad_norm": 0.43756335973739624, "learning_rate": 1.3607577898312297e-05, "loss": 0.229, "step": 21185 }, { "epoch": 0.3826042366332272, "grad_norm": 0.4016486406326294, "learning_rate": 1.3604932504380554e-05, "loss": 0.3214, "step": 21190 }, { "epoch": 0.38269451606612787, "grad_norm": 0.523820698261261, "learning_rate": 1.3602286820458204e-05, "loss": 0.3178, "step": 21195 }, { "epoch": 0.3827847954990286, "grad_norm": 0.43063297867774963, "learning_rate": 1.3599640846758067e-05, "loss": 0.2159, "step": 21200 }, { "epoch": 0.3828750749319293, "grad_norm": 0.5800107717514038, "learning_rate": 1.3596994583492997e-05, "loss": 0.2275, "step": 21205 }, { "epoch": 0.38296535436483004, "grad_norm": 0.4514652490615845, "learning_rate": 1.3594348030875866e-05, "loss": 0.2793, "step": 21210 }, { "epoch": 0.3830556337977307, "grad_norm": 0.4241120517253876, "learning_rate": 1.359170118911957e-05, "loss": 0.3655, "step": 21215 }, { "epoch": 0.38314591323063146, "grad_norm": 0.5384905934333801, "learning_rate": 1.3589054058437026e-05, "loss": 0.2727, "step": 21220 }, { "epoch": 0.38323619266353215, "grad_norm": 0.3744039535522461, "learning_rate": 1.3586406639041179e-05, "loss": 0.2432, "step": 21225 }, { "epoch": 0.3833264720964329, "grad_norm": 0.4343605935573578, "learning_rate": 1.3583758931144995e-05, "loss": 0.2756, "step": 21230 }, { "epoch": 0.3834167515293336, "grad_norm": 0.6409178972244263, "learning_rate": 1.3581110934961459e-05, "loss": 0.3335, "step": 21235 }, { "epoch": 0.3835070309622343, "grad_norm": 0.3212273418903351, "learning_rate": 1.3578462650703585e-05, "loss": 0.215, "step": 21240 }, { "epoch": 0.383597310395135, "grad_norm": 0.3993001878261566, "learning_rate": 1.3575814078584411e-05, "loss": 0.2568, "step": 21245 }, { "epoch": 0.38368758982803575, "grad_norm": 0.8827431797981262, "learning_rate": 1.3573165218816991e-05, "loss": 0.2671, "step": 21250 }, { "epoch": 0.38377786926093643, "grad_norm": 0.3880627751350403, "learning_rate": 1.357051607161441e-05, "loss": 0.2351, "step": 21255 }, { "epoch": 0.3838681486938372, "grad_norm": 0.45617276430130005, "learning_rate": 1.356786663718977e-05, "loss": 0.2768, "step": 21260 }, { "epoch": 0.38395842812673786, "grad_norm": 0.4872366487979889, "learning_rate": 1.3565216915756199e-05, "loss": 0.2729, "step": 21265 }, { "epoch": 0.3840487075596386, "grad_norm": 0.638798177242279, "learning_rate": 1.3562566907526851e-05, "loss": 0.3035, "step": 21270 }, { "epoch": 0.3841389869925393, "grad_norm": 0.7527413964271545, "learning_rate": 1.3559916612714898e-05, "loss": 0.3511, "step": 21275 }, { "epoch": 0.38422926642544003, "grad_norm": 0.5562458634376526, "learning_rate": 1.3557266031533536e-05, "loss": 0.2414, "step": 21280 }, { "epoch": 0.3843195458583407, "grad_norm": 0.4700249135494232, "learning_rate": 1.3554615164195983e-05, "loss": 0.2109, "step": 21285 }, { "epoch": 0.38440982529124146, "grad_norm": 0.31953826546669006, "learning_rate": 1.3551964010915488e-05, "loss": 0.2183, "step": 21290 }, { "epoch": 0.38450010472414214, "grad_norm": 0.4933184087276459, "learning_rate": 1.3549312571905313e-05, "loss": 0.2185, "step": 21295 }, { "epoch": 0.3845903841570429, "grad_norm": 0.5202569961547852, "learning_rate": 1.3546660847378746e-05, "loss": 0.2111, "step": 21300 }, { "epoch": 0.38468066358994357, "grad_norm": 0.5522760152816772, "learning_rate": 1.35440088375491e-05, "loss": 0.2552, "step": 21305 }, { "epoch": 0.3847709430228443, "grad_norm": 0.4002067744731903, "learning_rate": 1.3541356542629713e-05, "loss": 0.2786, "step": 21310 }, { "epoch": 0.384861222455745, "grad_norm": 0.44965609908103943, "learning_rate": 1.353870396283394e-05, "loss": 0.2857, "step": 21315 }, { "epoch": 0.38495150188864574, "grad_norm": 0.4091430604457855, "learning_rate": 1.3536051098375162e-05, "loss": 0.3082, "step": 21320 }, { "epoch": 0.38504178132154643, "grad_norm": 0.4339289963245392, "learning_rate": 1.3533397949466785e-05, "loss": 0.2586, "step": 21325 }, { "epoch": 0.38513206075444717, "grad_norm": 0.5966063737869263, "learning_rate": 1.3530744516322227e-05, "loss": 0.3589, "step": 21330 }, { "epoch": 0.38522234018734786, "grad_norm": 0.5281093716621399, "learning_rate": 1.3528090799154946e-05, "loss": 0.3192, "step": 21335 }, { "epoch": 0.3853126196202486, "grad_norm": 1.1246488094329834, "learning_rate": 1.3525436798178416e-05, "loss": 0.2516, "step": 21340 }, { "epoch": 0.3854028990531493, "grad_norm": 0.5170313715934753, "learning_rate": 1.3522782513606123e-05, "loss": 0.258, "step": 21345 }, { "epoch": 0.38549317848605, "grad_norm": 0.3201058804988861, "learning_rate": 1.352012794565159e-05, "loss": 0.3076, "step": 21350 }, { "epoch": 0.3855834579189507, "grad_norm": 0.5544672608375549, "learning_rate": 1.3517473094528359e-05, "loss": 0.1814, "step": 21355 }, { "epoch": 0.38567373735185145, "grad_norm": 0.32615146040916443, "learning_rate": 1.3514817960449994e-05, "loss": 0.2321, "step": 21360 }, { "epoch": 0.38576401678475214, "grad_norm": 0.4158816337585449, "learning_rate": 1.3512162543630076e-05, "loss": 0.3257, "step": 21365 }, { "epoch": 0.3858542962176529, "grad_norm": 0.4445611536502838, "learning_rate": 1.3509506844282221e-05, "loss": 0.2883, "step": 21370 }, { "epoch": 0.38594457565055357, "grad_norm": 0.5737717151641846, "learning_rate": 1.3506850862620055e-05, "loss": 0.2249, "step": 21375 }, { "epoch": 0.3860348550834543, "grad_norm": 0.5599545240402222, "learning_rate": 1.3504194598857234e-05, "loss": 0.2635, "step": 21380 }, { "epoch": 0.386125134516355, "grad_norm": 0.47273439168930054, "learning_rate": 1.3501538053207435e-05, "loss": 0.2185, "step": 21385 }, { "epoch": 0.38621541394925574, "grad_norm": 0.3157688081264496, "learning_rate": 1.349888122588436e-05, "loss": 0.2757, "step": 21390 }, { "epoch": 0.3863056933821564, "grad_norm": 0.38830289244651794, "learning_rate": 1.3496224117101728e-05, "loss": 0.2529, "step": 21395 }, { "epoch": 0.38639597281505716, "grad_norm": 0.26463016867637634, "learning_rate": 1.3493566727073284e-05, "loss": 0.2809, "step": 21400 }, { "epoch": 0.3864862522479579, "grad_norm": 0.2879667580127716, "learning_rate": 1.3490909056012803e-05, "loss": 0.2566, "step": 21405 }, { "epoch": 0.3865765316808586, "grad_norm": 0.4215259850025177, "learning_rate": 1.3488251104134065e-05, "loss": 0.287, "step": 21410 }, { "epoch": 0.38666681111375933, "grad_norm": 0.40119993686676025, "learning_rate": 1.3485592871650889e-05, "loss": 0.2952, "step": 21415 }, { "epoch": 0.38675709054666, "grad_norm": 0.3995286226272583, "learning_rate": 1.348293435877711e-05, "loss": 0.2095, "step": 21420 }, { "epoch": 0.38684736997956076, "grad_norm": 0.46701765060424805, "learning_rate": 1.3480275565726585e-05, "loss": 0.286, "step": 21425 }, { "epoch": 0.38693764941246145, "grad_norm": 1.2765496969223022, "learning_rate": 1.3477616492713195e-05, "loss": 0.2907, "step": 21430 }, { "epoch": 0.3870279288453622, "grad_norm": 0.43694746494293213, "learning_rate": 1.3474957139950844e-05, "loss": 0.2878, "step": 21435 }, { "epoch": 0.3871182082782629, "grad_norm": 0.41188398003578186, "learning_rate": 1.3472297507653454e-05, "loss": 0.2602, "step": 21440 }, { "epoch": 0.3872084877111636, "grad_norm": 0.6653173565864563, "learning_rate": 1.3469637596034978e-05, "loss": 0.2713, "step": 21445 }, { "epoch": 0.3872987671440643, "grad_norm": 0.49647408723831177, "learning_rate": 1.346697740530938e-05, "loss": 0.2557, "step": 21450 }, { "epoch": 0.38738904657696505, "grad_norm": 0.3037748336791992, "learning_rate": 1.346431693569066e-05, "loss": 0.1898, "step": 21455 }, { "epoch": 0.38747932600986573, "grad_norm": 0.4379635155200958, "learning_rate": 1.346165618739283e-05, "loss": 0.2372, "step": 21460 }, { "epoch": 0.3875696054427665, "grad_norm": 0.5771574974060059, "learning_rate": 1.3458995160629928e-05, "loss": 0.2403, "step": 21465 }, { "epoch": 0.38765988487566716, "grad_norm": 0.7019838690757751, "learning_rate": 1.3456333855616017e-05, "loss": 0.2481, "step": 21470 }, { "epoch": 0.3877501643085679, "grad_norm": 0.4674120247364044, "learning_rate": 1.3453672272565175e-05, "loss": 0.2677, "step": 21475 }, { "epoch": 0.3878404437414686, "grad_norm": 0.8151453733444214, "learning_rate": 1.345101041169151e-05, "loss": 0.189, "step": 21480 }, { "epoch": 0.38793072317436933, "grad_norm": 0.35866454243659973, "learning_rate": 1.3448348273209147e-05, "loss": 0.1593, "step": 21485 }, { "epoch": 0.38802100260727, "grad_norm": 0.5386594533920288, "learning_rate": 1.344568585733224e-05, "loss": 0.2609, "step": 21490 }, { "epoch": 0.38811128204017076, "grad_norm": 0.41673269867897034, "learning_rate": 1.3443023164274958e-05, "loss": 0.2291, "step": 21495 }, { "epoch": 0.38820156147307144, "grad_norm": 0.3212771415710449, "learning_rate": 1.3440360194251494e-05, "loss": 0.2891, "step": 21500 }, { "epoch": 0.3882918409059722, "grad_norm": 0.5306001901626587, "learning_rate": 1.3437696947476068e-05, "loss": 0.2982, "step": 21505 }, { "epoch": 0.38838212033887287, "grad_norm": 0.3461056351661682, "learning_rate": 1.3435033424162917e-05, "loss": 0.2977, "step": 21510 }, { "epoch": 0.3884723997717736, "grad_norm": 0.4729425609111786, "learning_rate": 1.34323696245263e-05, "loss": 0.361, "step": 21515 }, { "epoch": 0.3885626792046743, "grad_norm": 0.3501926064491272, "learning_rate": 1.3429705548780505e-05, "loss": 0.2411, "step": 21520 }, { "epoch": 0.38865295863757504, "grad_norm": 0.3689830005168915, "learning_rate": 1.3427041197139835e-05, "loss": 0.2079, "step": 21525 }, { "epoch": 0.3887432380704757, "grad_norm": 0.3276408016681671, "learning_rate": 1.3424376569818614e-05, "loss": 0.3374, "step": 21530 }, { "epoch": 0.38883351750337647, "grad_norm": 0.4310067594051361, "learning_rate": 1.34217116670312e-05, "loss": 0.273, "step": 21535 }, { "epoch": 0.38892379693627716, "grad_norm": 0.3747933506965637, "learning_rate": 1.3419046488991955e-05, "loss": 0.1641, "step": 21540 }, { "epoch": 0.3890140763691779, "grad_norm": 0.3694609999656677, "learning_rate": 1.3416381035915282e-05, "loss": 0.2563, "step": 21545 }, { "epoch": 0.3891043558020786, "grad_norm": 0.3853660821914673, "learning_rate": 1.3413715308015596e-05, "loss": 0.3523, "step": 21550 }, { "epoch": 0.3891946352349793, "grad_norm": 0.4884442090988159, "learning_rate": 1.3411049305507332e-05, "loss": 0.2402, "step": 21555 }, { "epoch": 0.38928491466788, "grad_norm": 0.5782903432846069, "learning_rate": 1.3408383028604952e-05, "loss": 0.2228, "step": 21560 }, { "epoch": 0.38937519410078075, "grad_norm": 0.5393293499946594, "learning_rate": 1.3405716477522938e-05, "loss": 0.2738, "step": 21565 }, { "epoch": 0.38946547353368144, "grad_norm": 0.505789577960968, "learning_rate": 1.3403049652475797e-05, "loss": 0.2972, "step": 21570 }, { "epoch": 0.3895557529665822, "grad_norm": 0.6107163429260254, "learning_rate": 1.3400382553678053e-05, "loss": 0.3708, "step": 21575 }, { "epoch": 0.38964603239948287, "grad_norm": 0.41639187932014465, "learning_rate": 1.3397715181344255e-05, "loss": 0.336, "step": 21580 }, { "epoch": 0.3897363118323836, "grad_norm": 0.5116377472877502, "learning_rate": 1.3395047535688977e-05, "loss": 0.1877, "step": 21585 }, { "epoch": 0.3898265912652843, "grad_norm": 0.5431540012359619, "learning_rate": 1.3392379616926808e-05, "loss": 0.2468, "step": 21590 }, { "epoch": 0.38991687069818504, "grad_norm": 0.562221884727478, "learning_rate": 1.3389711425272363e-05, "loss": 0.2512, "step": 21595 }, { "epoch": 0.3900071501310857, "grad_norm": 0.4357643127441406, "learning_rate": 1.338704296094028e-05, "loss": 0.1748, "step": 21600 }, { "epoch": 0.39009742956398646, "grad_norm": 0.5541381239891052, "learning_rate": 1.3384374224145219e-05, "loss": 0.2372, "step": 21605 }, { "epoch": 0.39018770899688715, "grad_norm": 0.5287481546401978, "learning_rate": 1.3381705215101857e-05, "loss": 0.3041, "step": 21610 }, { "epoch": 0.3902779884297879, "grad_norm": 0.5050212144851685, "learning_rate": 1.3379035934024894e-05, "loss": 0.239, "step": 21615 }, { "epoch": 0.3903682678626886, "grad_norm": 0.6532121896743774, "learning_rate": 1.337636638112906e-05, "loss": 0.2447, "step": 21620 }, { "epoch": 0.3904585472955893, "grad_norm": 0.4641731381416321, "learning_rate": 1.3373696556629102e-05, "loss": 0.2577, "step": 21625 }, { "epoch": 0.39054882672849, "grad_norm": 0.36768922209739685, "learning_rate": 1.3371026460739783e-05, "loss": 0.2899, "step": 21630 }, { "epoch": 0.39063910616139075, "grad_norm": 0.4381278455257416, "learning_rate": 1.3368356093675896e-05, "loss": 0.2455, "step": 21635 }, { "epoch": 0.39072938559429143, "grad_norm": 0.4196430444717407, "learning_rate": 1.336568545565225e-05, "loss": 0.2358, "step": 21640 }, { "epoch": 0.3908196650271922, "grad_norm": 0.6201593279838562, "learning_rate": 1.3363014546883679e-05, "loss": 0.2729, "step": 21645 }, { "epoch": 0.39090994446009286, "grad_norm": 0.6143485903739929, "learning_rate": 1.3360343367585042e-05, "loss": 0.2572, "step": 21650 }, { "epoch": 0.3910002238929936, "grad_norm": 0.2882515788078308, "learning_rate": 1.3357671917971213e-05, "loss": 0.184, "step": 21655 }, { "epoch": 0.3910905033258943, "grad_norm": 0.43455737829208374, "learning_rate": 1.335500019825709e-05, "loss": 0.3488, "step": 21660 }, { "epoch": 0.39118078275879503, "grad_norm": 0.35219860076904297, "learning_rate": 1.3352328208657594e-05, "loss": 0.2462, "step": 21665 }, { "epoch": 0.3912710621916957, "grad_norm": 0.4262147545814514, "learning_rate": 1.3349655949387667e-05, "loss": 0.3275, "step": 21670 }, { "epoch": 0.39136134162459646, "grad_norm": 0.345892995595932, "learning_rate": 1.3346983420662275e-05, "loss": 0.219, "step": 21675 }, { "epoch": 0.39145162105749715, "grad_norm": 0.5044654607772827, "learning_rate": 1.33443106226964e-05, "loss": 0.2579, "step": 21680 }, { "epoch": 0.3915419004903979, "grad_norm": 0.5730448365211487, "learning_rate": 1.3341637555705051e-05, "loss": 0.3406, "step": 21685 }, { "epoch": 0.3916321799232986, "grad_norm": 0.5646150708198547, "learning_rate": 1.333896421990326e-05, "loss": 0.3259, "step": 21690 }, { "epoch": 0.3917224593561993, "grad_norm": 0.5319784879684448, "learning_rate": 1.3336290615506072e-05, "loss": 0.2592, "step": 21695 }, { "epoch": 0.3918127387891, "grad_norm": 0.5225646495819092, "learning_rate": 1.3333616742728559e-05, "loss": 0.2272, "step": 21700 }, { "epoch": 0.39190301822200074, "grad_norm": 0.6103857159614563, "learning_rate": 1.3330942601785819e-05, "loss": 0.309, "step": 21705 }, { "epoch": 0.39199329765490143, "grad_norm": 0.5389057397842407, "learning_rate": 1.3328268192892963e-05, "loss": 0.2542, "step": 21710 }, { "epoch": 0.39208357708780217, "grad_norm": 0.21751132607460022, "learning_rate": 1.3325593516265132e-05, "loss": 0.1974, "step": 21715 }, { "epoch": 0.39217385652070286, "grad_norm": 0.3460856080055237, "learning_rate": 1.3322918572117483e-05, "loss": 0.2763, "step": 21720 }, { "epoch": 0.3922641359536036, "grad_norm": 0.45296546816825867, "learning_rate": 1.3320243360665193e-05, "loss": 0.2324, "step": 21725 }, { "epoch": 0.3923544153865043, "grad_norm": 0.6587622761726379, "learning_rate": 1.3317567882123463e-05, "loss": 0.2041, "step": 21730 }, { "epoch": 0.392444694819405, "grad_norm": 0.3234507441520691, "learning_rate": 1.331489213670752e-05, "loss": 0.2638, "step": 21735 }, { "epoch": 0.3925349742523057, "grad_norm": 0.3528299331665039, "learning_rate": 1.3312216124632609e-05, "loss": 0.2844, "step": 21740 }, { "epoch": 0.39262525368520645, "grad_norm": 0.5674415826797485, "learning_rate": 1.3309539846113988e-05, "loss": 0.277, "step": 21745 }, { "epoch": 0.39271553311810714, "grad_norm": 0.2994835674762726, "learning_rate": 1.330686330136695e-05, "loss": 0.2854, "step": 21750 }, { "epoch": 0.3928058125510079, "grad_norm": 0.7018741369247437, "learning_rate": 1.3304186490606805e-05, "loss": 0.2942, "step": 21755 }, { "epoch": 0.39289609198390857, "grad_norm": 0.4478186368942261, "learning_rate": 1.330150941404888e-05, "loss": 0.2202, "step": 21760 }, { "epoch": 0.3929863714168093, "grad_norm": 0.455462783575058, "learning_rate": 1.3298832071908525e-05, "loss": 0.2077, "step": 21765 }, { "epoch": 0.39307665084971, "grad_norm": 0.5022167563438416, "learning_rate": 1.3296154464401117e-05, "loss": 0.2687, "step": 21770 }, { "epoch": 0.39316693028261074, "grad_norm": 0.5095340609550476, "learning_rate": 1.3293476591742047e-05, "loss": 0.3387, "step": 21775 }, { "epoch": 0.3932572097155114, "grad_norm": 0.4713091254234314, "learning_rate": 1.3290798454146731e-05, "loss": 0.2839, "step": 21780 }, { "epoch": 0.39334748914841217, "grad_norm": 0.5228136777877808, "learning_rate": 1.3288120051830607e-05, "loss": 0.2061, "step": 21785 }, { "epoch": 0.39343776858131285, "grad_norm": 0.5140119194984436, "learning_rate": 1.328544138500913e-05, "loss": 0.2326, "step": 21790 }, { "epoch": 0.3935280480142136, "grad_norm": 0.4276304543018341, "learning_rate": 1.3282762453897779e-05, "loss": 0.1985, "step": 21795 }, { "epoch": 0.39361832744711434, "grad_norm": 0.2913047969341278, "learning_rate": 1.328008325871206e-05, "loss": 0.2378, "step": 21800 }, { "epoch": 0.393708606880015, "grad_norm": 0.5427494049072266, "learning_rate": 1.3277403799667493e-05, "loss": 0.2491, "step": 21805 }, { "epoch": 0.39379888631291576, "grad_norm": 0.47361496090888977, "learning_rate": 1.3274724076979612e-05, "loss": 0.2625, "step": 21810 }, { "epoch": 0.39388916574581645, "grad_norm": 0.42510926723480225, "learning_rate": 1.3272044090863995e-05, "loss": 0.2812, "step": 21815 }, { "epoch": 0.3939794451787172, "grad_norm": 0.5460023880004883, "learning_rate": 1.3269363841536218e-05, "loss": 0.3689, "step": 21820 }, { "epoch": 0.3940697246116179, "grad_norm": 0.5921125411987305, "learning_rate": 1.3266683329211894e-05, "loss": 0.2266, "step": 21825 }, { "epoch": 0.3941600040445186, "grad_norm": 0.6031335592269897, "learning_rate": 1.3264002554106642e-05, "loss": 0.3288, "step": 21830 }, { "epoch": 0.3942502834774193, "grad_norm": 0.4571972191333771, "learning_rate": 1.326132151643612e-05, "loss": 0.244, "step": 21835 }, { "epoch": 0.39434056291032005, "grad_norm": 0.3885684609413147, "learning_rate": 1.3258640216415995e-05, "loss": 0.272, "step": 21840 }, { "epoch": 0.39443084234322073, "grad_norm": 0.788741409778595, "learning_rate": 1.3255958654261952e-05, "loss": 0.3086, "step": 21845 }, { "epoch": 0.3945211217761215, "grad_norm": 0.5432313680648804, "learning_rate": 1.3253276830189712e-05, "loss": 0.2451, "step": 21850 }, { "epoch": 0.39461140120902216, "grad_norm": 0.27433672547340393, "learning_rate": 1.3250594744415006e-05, "loss": 0.284, "step": 21855 }, { "epoch": 0.3947016806419229, "grad_norm": 0.4588322639465332, "learning_rate": 1.3247912397153586e-05, "loss": 0.279, "step": 21860 }, { "epoch": 0.3947919600748236, "grad_norm": 0.35396280884742737, "learning_rate": 1.3245229788621226e-05, "loss": 0.3236, "step": 21865 }, { "epoch": 0.39488223950772433, "grad_norm": 0.4001164138317108, "learning_rate": 1.3242546919033726e-05, "loss": 0.1426, "step": 21870 }, { "epoch": 0.394972518940625, "grad_norm": 2.4019925594329834, "learning_rate": 1.3239863788606902e-05, "loss": 0.1902, "step": 21875 }, { "epoch": 0.39506279837352576, "grad_norm": 0.35852307081222534, "learning_rate": 1.3237180397556592e-05, "loss": 0.2625, "step": 21880 }, { "epoch": 0.39515307780642644, "grad_norm": 0.6176601052284241, "learning_rate": 1.3234496746098657e-05, "loss": 0.2788, "step": 21885 }, { "epoch": 0.3952433572393272, "grad_norm": 0.3619178533554077, "learning_rate": 1.3231812834448976e-05, "loss": 0.2386, "step": 21890 }, { "epoch": 0.3953336366722279, "grad_norm": 0.30432456731796265, "learning_rate": 1.3229128662823447e-05, "loss": 0.2922, "step": 21895 }, { "epoch": 0.3954239161051286, "grad_norm": 0.5979883670806885, "learning_rate": 1.3226444231437998e-05, "loss": 0.2852, "step": 21900 }, { "epoch": 0.3955141955380293, "grad_norm": 0.3860207498073578, "learning_rate": 1.3223759540508568e-05, "loss": 0.2429, "step": 21905 }, { "epoch": 0.39560447497093004, "grad_norm": 0.5804364681243896, "learning_rate": 1.3221074590251121e-05, "loss": 0.2835, "step": 21910 }, { "epoch": 0.39569475440383073, "grad_norm": 0.7440268993377686, "learning_rate": 1.3218389380881645e-05, "loss": 0.3498, "step": 21915 }, { "epoch": 0.39578503383673147, "grad_norm": 0.5781992673873901, "learning_rate": 1.3215703912616143e-05, "loss": 0.3228, "step": 21920 }, { "epoch": 0.39587531326963216, "grad_norm": 0.6474190950393677, "learning_rate": 1.3213018185670638e-05, "loss": 0.3353, "step": 21925 }, { "epoch": 0.3959655927025329, "grad_norm": 0.28923219442367554, "learning_rate": 1.3210332200261183e-05, "loss": 0.3499, "step": 21930 }, { "epoch": 0.3960558721354336, "grad_norm": 0.452716588973999, "learning_rate": 1.3207645956603845e-05, "loss": 0.2217, "step": 21935 }, { "epoch": 0.3961461515683343, "grad_norm": 0.4790239632129669, "learning_rate": 1.3204959454914712e-05, "loss": 0.3062, "step": 21940 }, { "epoch": 0.396236431001235, "grad_norm": 0.5556036233901978, "learning_rate": 1.320227269540989e-05, "loss": 0.286, "step": 21945 }, { "epoch": 0.39632671043413575, "grad_norm": 0.6638554930686951, "learning_rate": 1.3199585678305516e-05, "loss": 0.2642, "step": 21950 }, { "epoch": 0.39641698986703644, "grad_norm": 0.6628899574279785, "learning_rate": 1.3196898403817735e-05, "loss": 0.2448, "step": 21955 }, { "epoch": 0.3965072692999372, "grad_norm": 0.35188084840774536, "learning_rate": 1.3194210872162723e-05, "loss": 0.334, "step": 21960 }, { "epoch": 0.39659754873283787, "grad_norm": 0.33103373646736145, "learning_rate": 1.319152308355667e-05, "loss": 0.2284, "step": 21965 }, { "epoch": 0.3966878281657386, "grad_norm": 0.4334578514099121, "learning_rate": 1.3188835038215788e-05, "loss": 0.2292, "step": 21970 }, { "epoch": 0.3967781075986393, "grad_norm": 0.6531385779380798, "learning_rate": 1.3186146736356314e-05, "loss": 0.2756, "step": 21975 }, { "epoch": 0.39686838703154004, "grad_norm": 0.3080330491065979, "learning_rate": 1.3183458178194499e-05, "loss": 0.2535, "step": 21980 }, { "epoch": 0.3969586664644407, "grad_norm": 2.054600238800049, "learning_rate": 1.3180769363946621e-05, "loss": 0.2555, "step": 21985 }, { "epoch": 0.39704894589734147, "grad_norm": 0.43283167481422424, "learning_rate": 1.3178080293828974e-05, "loss": 0.291, "step": 21990 }, { "epoch": 0.39713922533024215, "grad_norm": 0.371512234210968, "learning_rate": 1.3175390968057872e-05, "loss": 0.3182, "step": 21995 }, { "epoch": 0.3972295047631429, "grad_norm": 0.5806812644004822, "learning_rate": 1.3172701386849659e-05, "loss": 0.2578, "step": 22000 }, { "epoch": 0.3973197841960436, "grad_norm": 0.40547439455986023, "learning_rate": 1.3170011550420682e-05, "loss": 0.2778, "step": 22005 }, { "epoch": 0.3974100636289443, "grad_norm": 0.5131949186325073, "learning_rate": 1.3167321458987326e-05, "loss": 0.1756, "step": 22010 }, { "epoch": 0.397500343061845, "grad_norm": 0.5438373684883118, "learning_rate": 1.3164631112765987e-05, "loss": 0.2903, "step": 22015 }, { "epoch": 0.39759062249474575, "grad_norm": 0.4344537556171417, "learning_rate": 1.3161940511973084e-05, "loss": 0.3439, "step": 22020 }, { "epoch": 0.39768090192764644, "grad_norm": 0.4997982680797577, "learning_rate": 1.3159249656825057e-05, "loss": 0.345, "step": 22025 }, { "epoch": 0.3977711813605472, "grad_norm": 0.36932867765426636, "learning_rate": 1.3156558547538362e-05, "loss": 0.212, "step": 22030 }, { "epoch": 0.39786146079344786, "grad_norm": 0.6555669903755188, "learning_rate": 1.3153867184329488e-05, "loss": 0.2374, "step": 22035 }, { "epoch": 0.3979517402263486, "grad_norm": 3.40368390083313, "learning_rate": 1.3151175567414927e-05, "loss": 0.2516, "step": 22040 }, { "epoch": 0.3980420196592493, "grad_norm": 0.6993756890296936, "learning_rate": 1.3148483697011201e-05, "loss": 0.3079, "step": 22045 }, { "epoch": 0.39813229909215003, "grad_norm": 0.5923683047294617, "learning_rate": 1.3145791573334854e-05, "loss": 0.2734, "step": 22050 }, { "epoch": 0.3982225785250507, "grad_norm": 0.4434371888637543, "learning_rate": 1.314309919660245e-05, "loss": 0.2839, "step": 22055 }, { "epoch": 0.39831285795795146, "grad_norm": 0.5689956545829773, "learning_rate": 1.3140406567030565e-05, "loss": 0.2707, "step": 22060 }, { "epoch": 0.39840313739085215, "grad_norm": 0.354709267616272, "learning_rate": 1.3137713684835806e-05, "loss": 0.2433, "step": 22065 }, { "epoch": 0.3984934168237529, "grad_norm": 0.44803231954574585, "learning_rate": 1.3135020550234797e-05, "loss": 0.2332, "step": 22070 }, { "epoch": 0.3985836962566536, "grad_norm": 0.2759532630443573, "learning_rate": 1.3132327163444174e-05, "loss": 0.181, "step": 22075 }, { "epoch": 0.3986739756895543, "grad_norm": 0.36494264006614685, "learning_rate": 1.3129633524680604e-05, "loss": 0.2335, "step": 22080 }, { "epoch": 0.398764255122455, "grad_norm": 0.37547898292541504, "learning_rate": 1.3126939634160775e-05, "loss": 0.2587, "step": 22085 }, { "epoch": 0.39885453455535574, "grad_norm": 0.4002203643321991, "learning_rate": 1.3124245492101389e-05, "loss": 0.2481, "step": 22090 }, { "epoch": 0.39894481398825643, "grad_norm": 0.4837264120578766, "learning_rate": 1.3121551098719165e-05, "loss": 0.3074, "step": 22095 }, { "epoch": 0.39903509342115717, "grad_norm": 0.38516107201576233, "learning_rate": 1.3118856454230853e-05, "loss": 0.1741, "step": 22100 }, { "epoch": 0.39912537285405786, "grad_norm": 0.43117663264274597, "learning_rate": 1.3116161558853216e-05, "loss": 0.2468, "step": 22105 }, { "epoch": 0.3992156522869586, "grad_norm": 0.8077203035354614, "learning_rate": 1.3113466412803036e-05, "loss": 0.2171, "step": 22110 }, { "epoch": 0.3993059317198593, "grad_norm": 0.5902336835861206, "learning_rate": 1.311077101629712e-05, "loss": 0.2886, "step": 22115 }, { "epoch": 0.39939621115276003, "grad_norm": 0.927211582660675, "learning_rate": 1.3108075369552295e-05, "loss": 0.317, "step": 22120 }, { "epoch": 0.3994864905856607, "grad_norm": 0.27386292815208435, "learning_rate": 1.3105379472785405e-05, "loss": 0.2825, "step": 22125 }, { "epoch": 0.39957677001856146, "grad_norm": 0.4687485992908478, "learning_rate": 1.3102683326213312e-05, "loss": 0.3169, "step": 22130 }, { "epoch": 0.39966704945146214, "grad_norm": 0.5701487064361572, "learning_rate": 1.3099986930052906e-05, "loss": 0.1922, "step": 22135 }, { "epoch": 0.3997573288843629, "grad_norm": 0.5942817330360413, "learning_rate": 1.3097290284521091e-05, "loss": 0.2372, "step": 22140 }, { "epoch": 0.39984760831726357, "grad_norm": 0.6339251399040222, "learning_rate": 1.3094593389834789e-05, "loss": 0.2858, "step": 22145 }, { "epoch": 0.3999378877501643, "grad_norm": 0.30864328145980835, "learning_rate": 1.3091896246210953e-05, "loss": 0.3315, "step": 22150 }, { "epoch": 0.400028167183065, "grad_norm": 0.7753767967224121, "learning_rate": 1.3089198853866543e-05, "loss": 0.2209, "step": 22155 }, { "epoch": 0.40011844661596574, "grad_norm": 0.4362318217754364, "learning_rate": 1.3086501213018546e-05, "loss": 0.2329, "step": 22160 }, { "epoch": 0.4002087260488664, "grad_norm": 0.4449676275253296, "learning_rate": 1.3083803323883966e-05, "loss": 0.2642, "step": 22165 }, { "epoch": 0.40029900548176717, "grad_norm": 0.5464814901351929, "learning_rate": 1.308110518667983e-05, "loss": 0.3336, "step": 22170 }, { "epoch": 0.40038928491466785, "grad_norm": 0.43090885877609253, "learning_rate": 1.3078406801623182e-05, "loss": 0.1789, "step": 22175 }, { "epoch": 0.4004795643475686, "grad_norm": 0.5409093499183655, "learning_rate": 1.3075708168931093e-05, "loss": 0.2445, "step": 22180 }, { "epoch": 0.4005698437804693, "grad_norm": 0.4051618278026581, "learning_rate": 1.3073009288820643e-05, "loss": 0.2874, "step": 22185 }, { "epoch": 0.40066012321337, "grad_norm": 0.3605162799358368, "learning_rate": 1.3070310161508939e-05, "loss": 0.2565, "step": 22190 }, { "epoch": 0.40075040264627076, "grad_norm": 0.31601181626319885, "learning_rate": 1.3067610787213103e-05, "loss": 0.2436, "step": 22195 }, { "epoch": 0.40084068207917145, "grad_norm": 0.4679415225982666, "learning_rate": 1.3064911166150284e-05, "loss": 0.3179, "step": 22200 }, { "epoch": 0.4009309615120722, "grad_norm": 0.23421575129032135, "learning_rate": 1.3062211298537648e-05, "loss": 0.2115, "step": 22205 }, { "epoch": 0.4010212409449729, "grad_norm": 0.3195156455039978, "learning_rate": 1.3059511184592375e-05, "loss": 0.202, "step": 22210 }, { "epoch": 0.4011115203778736, "grad_norm": 0.31775689125061035, "learning_rate": 1.3056810824531674e-05, "loss": 0.1927, "step": 22215 }, { "epoch": 0.4012017998107743, "grad_norm": 0.42000657320022583, "learning_rate": 1.3054110218572766e-05, "loss": 0.2534, "step": 22220 }, { "epoch": 0.40129207924367505, "grad_norm": 0.3960062861442566, "learning_rate": 1.3051409366932894e-05, "loss": 0.3371, "step": 22225 }, { "epoch": 0.40138235867657573, "grad_norm": 0.39160043001174927, "learning_rate": 1.304870826982933e-05, "loss": 0.2168, "step": 22230 }, { "epoch": 0.4014726381094765, "grad_norm": 0.3743045926094055, "learning_rate": 1.3046006927479349e-05, "loss": 0.2778, "step": 22235 }, { "epoch": 0.40156291754237716, "grad_norm": 0.31039419770240784, "learning_rate": 1.3043305340100259e-05, "loss": 0.2947, "step": 22240 }, { "epoch": 0.4016531969752779, "grad_norm": 0.4929141402244568, "learning_rate": 1.304060350790938e-05, "loss": 0.3441, "step": 22245 }, { "epoch": 0.4017434764081786, "grad_norm": 0.4496571719646454, "learning_rate": 1.3037901431124053e-05, "loss": 0.2276, "step": 22250 }, { "epoch": 0.40183375584107933, "grad_norm": 0.44919899106025696, "learning_rate": 1.3035199109961649e-05, "loss": 0.3278, "step": 22255 }, { "epoch": 0.40192403527398, "grad_norm": 0.3038470149040222, "learning_rate": 1.3032496544639539e-05, "loss": 0.3122, "step": 22260 }, { "epoch": 0.40201431470688076, "grad_norm": 0.368513822555542, "learning_rate": 1.3029793735375136e-05, "loss": 0.2556, "step": 22265 }, { "epoch": 0.40210459413978145, "grad_norm": 0.5456978678703308, "learning_rate": 1.3027090682385852e-05, "loss": 0.3737, "step": 22270 }, { "epoch": 0.4021948735726822, "grad_norm": 0.38461658358573914, "learning_rate": 1.3024387385889132e-05, "loss": 0.2524, "step": 22275 }, { "epoch": 0.4022851530055829, "grad_norm": 0.5025389790534973, "learning_rate": 1.3021683846102437e-05, "loss": 0.3344, "step": 22280 }, { "epoch": 0.4023754324384836, "grad_norm": 0.8047762513160706, "learning_rate": 1.3018980063243248e-05, "loss": 0.2436, "step": 22285 }, { "epoch": 0.4024657118713843, "grad_norm": 0.515868067741394, "learning_rate": 1.301627603752906e-05, "loss": 0.23, "step": 22290 }, { "epoch": 0.40255599130428504, "grad_norm": 0.3612858057022095, "learning_rate": 1.3013571769177395e-05, "loss": 0.2775, "step": 22295 }, { "epoch": 0.40264627073718573, "grad_norm": 0.5300875902175903, "learning_rate": 1.3010867258405796e-05, "loss": 0.3596, "step": 22300 }, { "epoch": 0.40273655017008647, "grad_norm": 1.0323123931884766, "learning_rate": 1.3008162505431813e-05, "loss": 0.3625, "step": 22305 }, { "epoch": 0.40282682960298716, "grad_norm": 0.526929497718811, "learning_rate": 1.300545751047303e-05, "loss": 0.2416, "step": 22310 }, { "epoch": 0.4029171090358879, "grad_norm": 0.5251116752624512, "learning_rate": 1.3002752273747041e-05, "loss": 0.313, "step": 22315 }, { "epoch": 0.4030073884687886, "grad_norm": 0.3955122232437134, "learning_rate": 1.3000046795471467e-05, "loss": 0.2874, "step": 22320 }, { "epoch": 0.4030976679016893, "grad_norm": 0.4317103326320648, "learning_rate": 1.2997341075863934e-05, "loss": 0.2103, "step": 22325 }, { "epoch": 0.40318794733459, "grad_norm": 0.6640254259109497, "learning_rate": 1.299463511514211e-05, "loss": 0.326, "step": 22330 }, { "epoch": 0.40327822676749076, "grad_norm": 0.4563729763031006, "learning_rate": 1.2991928913523663e-05, "loss": 0.221, "step": 22335 }, { "epoch": 0.40336850620039144, "grad_norm": 0.35100239515304565, "learning_rate": 1.2989222471226288e-05, "loss": 0.2703, "step": 22340 }, { "epoch": 0.4034587856332922, "grad_norm": 0.3896128237247467, "learning_rate": 1.29865157884677e-05, "loss": 0.3096, "step": 22345 }, { "epoch": 0.40354906506619287, "grad_norm": 1.338800072669983, "learning_rate": 1.2983808865465629e-05, "loss": 0.2735, "step": 22350 }, { "epoch": 0.4036393444990936, "grad_norm": 0.6762623190879822, "learning_rate": 1.2981101702437831e-05, "loss": 0.2059, "step": 22355 }, { "epoch": 0.4037296239319943, "grad_norm": 0.44800934195518494, "learning_rate": 1.2978394299602073e-05, "loss": 0.225, "step": 22360 }, { "epoch": 0.40381990336489504, "grad_norm": 0.2908240556716919, "learning_rate": 1.297568665717615e-05, "loss": 0.2992, "step": 22365 }, { "epoch": 0.4039101827977957, "grad_norm": 0.4273534119129181, "learning_rate": 1.297297877537787e-05, "loss": 0.3031, "step": 22370 }, { "epoch": 0.40400046223069647, "grad_norm": 0.5294716954231262, "learning_rate": 1.2970270654425063e-05, "loss": 0.2404, "step": 22375 }, { "epoch": 0.40409074166359715, "grad_norm": 0.3720170557498932, "learning_rate": 1.296756229453558e-05, "loss": 0.2095, "step": 22380 }, { "epoch": 0.4041810210964979, "grad_norm": 0.9239777326583862, "learning_rate": 1.2964853695927287e-05, "loss": 0.2013, "step": 22385 }, { "epoch": 0.4042713005293986, "grad_norm": 0.13262271881103516, "learning_rate": 1.2962144858818067e-05, "loss": 0.208, "step": 22390 }, { "epoch": 0.4043615799622993, "grad_norm": 0.3081349730491638, "learning_rate": 1.2959435783425834e-05, "loss": 0.3017, "step": 22395 }, { "epoch": 0.4044518593952, "grad_norm": 0.36518728733062744, "learning_rate": 1.2956726469968506e-05, "loss": 0.277, "step": 22400 }, { "epoch": 0.40454213882810075, "grad_norm": 0.3570597767829895, "learning_rate": 1.2954016918664034e-05, "loss": 0.1976, "step": 22405 }, { "epoch": 0.40463241826100144, "grad_norm": 0.5864139795303345, "learning_rate": 1.2951307129730375e-05, "loss": 0.2879, "step": 22410 }, { "epoch": 0.4047226976939022, "grad_norm": 0.27137792110443115, "learning_rate": 1.2948597103385518e-05, "loss": 0.1695, "step": 22415 }, { "epoch": 0.40481297712680286, "grad_norm": 0.894551694393158, "learning_rate": 1.2945886839847464e-05, "loss": 0.3315, "step": 22420 }, { "epoch": 0.4049032565597036, "grad_norm": 0.7413949966430664, "learning_rate": 1.294317633933423e-05, "loss": 0.2742, "step": 22425 }, { "epoch": 0.4049935359926043, "grad_norm": 0.4280196726322174, "learning_rate": 1.2940465602063862e-05, "loss": 0.2159, "step": 22430 }, { "epoch": 0.40508381542550503, "grad_norm": 0.5879983901977539, "learning_rate": 1.293775462825441e-05, "loss": 0.2604, "step": 22435 }, { "epoch": 0.4051740948584057, "grad_norm": 0.46284785866737366, "learning_rate": 1.293504341812396e-05, "loss": 0.216, "step": 22440 }, { "epoch": 0.40526437429130646, "grad_norm": 0.2746809720993042, "learning_rate": 1.2932331971890608e-05, "loss": 0.2496, "step": 22445 }, { "epoch": 0.40535465372420715, "grad_norm": 0.30468782782554626, "learning_rate": 1.2929620289772469e-05, "loss": 0.2481, "step": 22450 }, { "epoch": 0.4054449331571079, "grad_norm": 0.5176340937614441, "learning_rate": 1.2926908371987675e-05, "loss": 0.2774, "step": 22455 }, { "epoch": 0.4055352125900086, "grad_norm": 0.4324280619621277, "learning_rate": 1.2924196218754387e-05, "loss": 0.1958, "step": 22460 }, { "epoch": 0.4056254920229093, "grad_norm": 0.47290143370628357, "learning_rate": 1.292148383029077e-05, "loss": 0.2765, "step": 22465 }, { "epoch": 0.40571577145581, "grad_norm": 0.4480784237384796, "learning_rate": 1.2918771206815023e-05, "loss": 0.3096, "step": 22470 }, { "epoch": 0.40580605088871075, "grad_norm": 0.5606126189231873, "learning_rate": 1.2916058348545352e-05, "loss": 0.268, "step": 22475 }, { "epoch": 0.40589633032161143, "grad_norm": 0.35950228571891785, "learning_rate": 1.2913345255699992e-05, "loss": 0.3666, "step": 22480 }, { "epoch": 0.4059866097545122, "grad_norm": 0.4312303364276886, "learning_rate": 1.2910631928497187e-05, "loss": 0.2431, "step": 22485 }, { "epoch": 0.40607688918741286, "grad_norm": 0.41166362166404724, "learning_rate": 1.2907918367155203e-05, "loss": 0.2415, "step": 22490 }, { "epoch": 0.4061671686203136, "grad_norm": 0.5689197778701782, "learning_rate": 1.2905204571892332e-05, "loss": 0.2023, "step": 22495 }, { "epoch": 0.4062574480532143, "grad_norm": 0.4476293623447418, "learning_rate": 1.2902490542926875e-05, "loss": 0.1702, "step": 22500 }, { "epoch": 0.40634772748611503, "grad_norm": 0.2847082316875458, "learning_rate": 1.2899776280477158e-05, "loss": 0.3206, "step": 22505 }, { "epoch": 0.4064380069190157, "grad_norm": 0.3037201464176178, "learning_rate": 1.289706178476152e-05, "loss": 0.1943, "step": 22510 }, { "epoch": 0.40652828635191646, "grad_norm": 0.5437815189361572, "learning_rate": 1.2894347055998329e-05, "loss": 0.2397, "step": 22515 }, { "epoch": 0.40661856578481714, "grad_norm": 0.461439847946167, "learning_rate": 1.2891632094405962e-05, "loss": 0.3052, "step": 22520 }, { "epoch": 0.4067088452177179, "grad_norm": 0.25330743193626404, "learning_rate": 1.288891690020281e-05, "loss": 0.2289, "step": 22525 }, { "epoch": 0.40679912465061857, "grad_norm": 0.6357237696647644, "learning_rate": 1.2886201473607305e-05, "loss": 0.2533, "step": 22530 }, { "epoch": 0.4068894040835193, "grad_norm": 0.3013076186180115, "learning_rate": 1.2883485814837872e-05, "loss": 0.2569, "step": 22535 }, { "epoch": 0.40697968351642, "grad_norm": 0.45817866921424866, "learning_rate": 1.288076992411297e-05, "loss": 0.2713, "step": 22540 }, { "epoch": 0.40706996294932074, "grad_norm": 0.367135226726532, "learning_rate": 1.2878053801651076e-05, "loss": 0.2432, "step": 22545 }, { "epoch": 0.4071602423822214, "grad_norm": 0.4528060853481293, "learning_rate": 1.2875337447670677e-05, "loss": 0.2921, "step": 22550 }, { "epoch": 0.40725052181512217, "grad_norm": 0.47508856654167175, "learning_rate": 1.2872620862390286e-05, "loss": 0.2638, "step": 22555 }, { "epoch": 0.40734080124802285, "grad_norm": 0.6407237648963928, "learning_rate": 1.2869904046028434e-05, "loss": 0.2509, "step": 22560 }, { "epoch": 0.4074310806809236, "grad_norm": 0.3994412124156952, "learning_rate": 1.2867186998803666e-05, "loss": 0.2549, "step": 22565 }, { "epoch": 0.4075213601138243, "grad_norm": 0.42670902609825134, "learning_rate": 1.2864469720934553e-05, "loss": 0.3734, "step": 22570 }, { "epoch": 0.407611639546725, "grad_norm": 0.42069903016090393, "learning_rate": 1.2861752212639673e-05, "loss": 0.2171, "step": 22575 }, { "epoch": 0.4077019189796257, "grad_norm": 0.5737390518188477, "learning_rate": 1.2859034474137639e-05, "loss": 0.3261, "step": 22580 }, { "epoch": 0.40779219841252645, "grad_norm": 0.4968356490135193, "learning_rate": 1.2856316505647066e-05, "loss": 0.2422, "step": 22585 }, { "epoch": 0.4078824778454272, "grad_norm": 0.5124558806419373, "learning_rate": 1.28535983073866e-05, "loss": 0.2636, "step": 22590 }, { "epoch": 0.4079727572783279, "grad_norm": 0.586444616317749, "learning_rate": 1.2850879879574896e-05, "loss": 0.23, "step": 22595 }, { "epoch": 0.4080630367112286, "grad_norm": 0.5707995891571045, "learning_rate": 1.2848161222430635e-05, "loss": 0.2049, "step": 22600 }, { "epoch": 0.4081533161441293, "grad_norm": 0.32396647334098816, "learning_rate": 1.2845442336172512e-05, "loss": 0.254, "step": 22605 }, { "epoch": 0.40824359557703005, "grad_norm": 0.4279758036136627, "learning_rate": 1.284272322101924e-05, "loss": 0.2778, "step": 22610 }, { "epoch": 0.40833387500993074, "grad_norm": 0.4514820873737335, "learning_rate": 1.2840003877189554e-05, "loss": 0.1986, "step": 22615 }, { "epoch": 0.4084241544428315, "grad_norm": 0.44896379113197327, "learning_rate": 1.2837284304902202e-05, "loss": 0.2626, "step": 22620 }, { "epoch": 0.40851443387573216, "grad_norm": 0.3960501551628113, "learning_rate": 1.2834564504375959e-05, "loss": 0.2557, "step": 22625 }, { "epoch": 0.4086047133086329, "grad_norm": 0.32725077867507935, "learning_rate": 1.2831844475829613e-05, "loss": 0.2286, "step": 22630 }, { "epoch": 0.4086949927415336, "grad_norm": 0.6208406090736389, "learning_rate": 1.2829124219481967e-05, "loss": 0.2369, "step": 22635 }, { "epoch": 0.40878527217443433, "grad_norm": 0.5370245575904846, "learning_rate": 1.2826403735551844e-05, "loss": 0.2311, "step": 22640 }, { "epoch": 0.408875551607335, "grad_norm": 0.5427417159080505, "learning_rate": 1.2823683024258096e-05, "loss": 0.276, "step": 22645 }, { "epoch": 0.40896583104023576, "grad_norm": 0.49803993105888367, "learning_rate": 1.2820962085819577e-05, "loss": 0.1651, "step": 22650 }, { "epoch": 0.40905611047313645, "grad_norm": 0.7014377117156982, "learning_rate": 1.2818240920455166e-05, "loss": 0.2302, "step": 22655 }, { "epoch": 0.4091463899060372, "grad_norm": 0.31781819462776184, "learning_rate": 1.2815519528383767e-05, "loss": 0.2744, "step": 22660 }, { "epoch": 0.4092366693389379, "grad_norm": 0.4156563878059387, "learning_rate": 1.2812797909824291e-05, "loss": 0.2408, "step": 22665 }, { "epoch": 0.4093269487718386, "grad_norm": 0.4748717248439789, "learning_rate": 1.2810076064995673e-05, "loss": 0.2519, "step": 22670 }, { "epoch": 0.4094172282047393, "grad_norm": 0.38562139868736267, "learning_rate": 1.2807353994116866e-05, "loss": 0.2516, "step": 22675 }, { "epoch": 0.40950750763764004, "grad_norm": 0.5304756164550781, "learning_rate": 1.2804631697406845e-05, "loss": 0.2908, "step": 22680 }, { "epoch": 0.40959778707054073, "grad_norm": 0.48928162455558777, "learning_rate": 1.2801909175084595e-05, "loss": 0.3051, "step": 22685 }, { "epoch": 0.4096880665034415, "grad_norm": 0.5467295050621033, "learning_rate": 1.2799186427369121e-05, "loss": 0.2783, "step": 22690 }, { "epoch": 0.40977834593634216, "grad_norm": 1.0147714614868164, "learning_rate": 1.2796463454479454e-05, "loss": 0.2478, "step": 22695 }, { "epoch": 0.4098686253692429, "grad_norm": 0.624610424041748, "learning_rate": 1.2793740256634632e-05, "loss": 0.4032, "step": 22700 }, { "epoch": 0.4099589048021436, "grad_norm": 0.3689691424369812, "learning_rate": 1.2791016834053723e-05, "loss": 0.2545, "step": 22705 }, { "epoch": 0.41004918423504433, "grad_norm": 0.45096608996391296, "learning_rate": 1.2788293186955798e-05, "loss": 0.2484, "step": 22710 }, { "epoch": 0.410139463667945, "grad_norm": 0.5789922475814819, "learning_rate": 1.2785569315559963e-05, "loss": 0.2958, "step": 22715 }, { "epoch": 0.41022974310084576, "grad_norm": 0.3758523166179657, "learning_rate": 1.2782845220085325e-05, "loss": 0.2646, "step": 22720 }, { "epoch": 0.41032002253374644, "grad_norm": 0.41890549659729004, "learning_rate": 1.2780120900751026e-05, "loss": 0.2976, "step": 22725 }, { "epoch": 0.4104103019666472, "grad_norm": 0.6090922355651855, "learning_rate": 1.277739635777621e-05, "loss": 0.2118, "step": 22730 }, { "epoch": 0.41050058139954787, "grad_norm": 0.5390254259109497, "learning_rate": 1.2774671591380054e-05, "loss": 0.3203, "step": 22735 }, { "epoch": 0.4105908608324486, "grad_norm": 0.355477511882782, "learning_rate": 1.277194660178174e-05, "loss": 0.2058, "step": 22740 }, { "epoch": 0.4106811402653493, "grad_norm": 0.3480255603790283, "learning_rate": 1.2769221389200478e-05, "loss": 0.258, "step": 22745 }, { "epoch": 0.41077141969825004, "grad_norm": 0.3804841637611389, "learning_rate": 1.276649595385549e-05, "loss": 0.2201, "step": 22750 }, { "epoch": 0.4108616991311507, "grad_norm": 0.5269613862037659, "learning_rate": 1.2763770295966013e-05, "loss": 0.3018, "step": 22755 }, { "epoch": 0.41095197856405147, "grad_norm": 0.523177444934845, "learning_rate": 1.2761044415751313e-05, "loss": 0.2714, "step": 22760 }, { "epoch": 0.41104225799695215, "grad_norm": 0.3963806927204132, "learning_rate": 1.2758318313430665e-05, "loss": 0.2408, "step": 22765 }, { "epoch": 0.4111325374298529, "grad_norm": 0.5341278314590454, "learning_rate": 1.2755591989223361e-05, "loss": 0.1856, "step": 22770 }, { "epoch": 0.4112228168627536, "grad_norm": 0.735899031162262, "learning_rate": 1.2752865443348719e-05, "loss": 0.3275, "step": 22775 }, { "epoch": 0.4113130962956543, "grad_norm": 0.23954825103282928, "learning_rate": 1.2750138676026068e-05, "loss": 0.3269, "step": 22780 }, { "epoch": 0.411403375728555, "grad_norm": 0.5179336667060852, "learning_rate": 1.2747411687474753e-05, "loss": 0.2438, "step": 22785 }, { "epoch": 0.41149365516145575, "grad_norm": 0.3703598082065582, "learning_rate": 1.2744684477914144e-05, "loss": 0.3266, "step": 22790 }, { "epoch": 0.41158393459435644, "grad_norm": 0.427204430103302, "learning_rate": 1.2741957047563627e-05, "loss": 0.2964, "step": 22795 }, { "epoch": 0.4116742140272572, "grad_norm": 0.5042241215705872, "learning_rate": 1.2739229396642601e-05, "loss": 0.2434, "step": 22800 }, { "epoch": 0.41176449346015787, "grad_norm": 0.44526392221450806, "learning_rate": 1.2736501525370483e-05, "loss": 0.2303, "step": 22805 }, { "epoch": 0.4118547728930586, "grad_norm": 0.41565486788749695, "learning_rate": 1.2733773433966717e-05, "loss": 0.2661, "step": 22810 }, { "epoch": 0.4119450523259593, "grad_norm": 0.8846193552017212, "learning_rate": 1.2731045122650754e-05, "loss": 0.2412, "step": 22815 }, { "epoch": 0.41203533175886004, "grad_norm": 0.537651002407074, "learning_rate": 1.2728316591642065e-05, "loss": 0.2317, "step": 22820 }, { "epoch": 0.4121256111917607, "grad_norm": 0.41685473918914795, "learning_rate": 1.2725587841160146e-05, "loss": 0.2167, "step": 22825 }, { "epoch": 0.41221589062466146, "grad_norm": 0.43952709436416626, "learning_rate": 1.27228588714245e-05, "loss": 0.3201, "step": 22830 }, { "epoch": 0.41230617005756215, "grad_norm": 0.4234485328197479, "learning_rate": 1.2720129682654659e-05, "loss": 0.2127, "step": 22835 }, { "epoch": 0.4123964494904629, "grad_norm": 0.5349525809288025, "learning_rate": 1.2717400275070154e-05, "loss": 0.2614, "step": 22840 }, { "epoch": 0.4124867289233636, "grad_norm": 0.6713441014289856, "learning_rate": 1.2714670648890562e-05, "loss": 0.3333, "step": 22845 }, { "epoch": 0.4125770083562643, "grad_norm": 0.3737652897834778, "learning_rate": 1.2711940804335452e-05, "loss": 0.1947, "step": 22850 }, { "epoch": 0.412667287789165, "grad_norm": 0.4805316627025604, "learning_rate": 1.2709210741624418e-05, "loss": 0.2479, "step": 22855 }, { "epoch": 0.41275756722206575, "grad_norm": 0.5672625303268433, "learning_rate": 1.2706480460977081e-05, "loss": 0.2142, "step": 22860 }, { "epoch": 0.41284784665496643, "grad_norm": 0.4623817801475525, "learning_rate": 1.270374996261307e-05, "loss": 0.2617, "step": 22865 }, { "epoch": 0.4129381260878672, "grad_norm": 0.37226539850234985, "learning_rate": 1.2701019246752034e-05, "loss": 0.2183, "step": 22870 }, { "epoch": 0.41302840552076786, "grad_norm": 0.889231264591217, "learning_rate": 1.2698288313613634e-05, "loss": 0.2961, "step": 22875 }, { "epoch": 0.4131186849536686, "grad_norm": 0.506869912147522, "learning_rate": 1.2695557163417558e-05, "loss": 0.2368, "step": 22880 }, { "epoch": 0.4132089643865693, "grad_norm": 0.7621840238571167, "learning_rate": 1.2692825796383508e-05, "loss": 0.2583, "step": 22885 }, { "epoch": 0.41329924381947003, "grad_norm": 0.5102425217628479, "learning_rate": 1.2690094212731202e-05, "loss": 0.3368, "step": 22890 }, { "epoch": 0.4133895232523707, "grad_norm": 0.3810027837753296, "learning_rate": 1.2687362412680375e-05, "loss": 0.1905, "step": 22895 }, { "epoch": 0.41347980268527146, "grad_norm": 0.4110488295555115, "learning_rate": 1.2684630396450782e-05, "loss": 0.3392, "step": 22900 }, { "epoch": 0.41357008211817214, "grad_norm": 0.3828012943267822, "learning_rate": 1.268189816426219e-05, "loss": 0.1826, "step": 22905 }, { "epoch": 0.4136603615510729, "grad_norm": 0.5053306221961975, "learning_rate": 1.2679165716334391e-05, "loss": 0.3074, "step": 22910 }, { "epoch": 0.41375064098397357, "grad_norm": 0.3866608440876007, "learning_rate": 1.2676433052887192e-05, "loss": 0.3494, "step": 22915 }, { "epoch": 0.4138409204168743, "grad_norm": 0.5869969129562378, "learning_rate": 1.2673700174140408e-05, "loss": 0.3472, "step": 22920 }, { "epoch": 0.413931199849775, "grad_norm": 0.6221544742584229, "learning_rate": 1.267096708031389e-05, "loss": 0.2017, "step": 22925 }, { "epoch": 0.41402147928267574, "grad_norm": 0.5329904556274414, "learning_rate": 1.2668233771627488e-05, "loss": 0.3864, "step": 22930 }, { "epoch": 0.41411175871557643, "grad_norm": 0.6042413711547852, "learning_rate": 1.2665500248301079e-05, "loss": 0.3168, "step": 22935 }, { "epoch": 0.41420203814847717, "grad_norm": 0.43554195761680603, "learning_rate": 1.2662766510554554e-05, "loss": 0.3326, "step": 22940 }, { "epoch": 0.41429231758137786, "grad_norm": 0.4658290445804596, "learning_rate": 1.2660032558607827e-05, "loss": 0.2667, "step": 22945 }, { "epoch": 0.4143825970142786, "grad_norm": 0.514781653881073, "learning_rate": 1.265729839268082e-05, "loss": 0.3871, "step": 22950 }, { "epoch": 0.4144728764471793, "grad_norm": 0.47646093368530273, "learning_rate": 1.2654564012993472e-05, "loss": 0.2737, "step": 22955 }, { "epoch": 0.41456315588008, "grad_norm": 0.49809929728507996, "learning_rate": 1.2651829419765755e-05, "loss": 0.3645, "step": 22960 }, { "epoch": 0.4146534353129807, "grad_norm": 0.35647469758987427, "learning_rate": 1.2649094613217643e-05, "loss": 0.2278, "step": 22965 }, { "epoch": 0.41474371474588145, "grad_norm": 1.0289150476455688, "learning_rate": 1.2646359593569128e-05, "loss": 0.2856, "step": 22970 }, { "epoch": 0.41483399417878214, "grad_norm": 0.35534340143203735, "learning_rate": 1.2643624361040222e-05, "loss": 0.1699, "step": 22975 }, { "epoch": 0.4149242736116829, "grad_norm": 1.2184815406799316, "learning_rate": 1.2640888915850964e-05, "loss": 0.2914, "step": 22980 }, { "epoch": 0.4150145530445836, "grad_norm": 0.30880922079086304, "learning_rate": 1.2638153258221389e-05, "loss": 0.2778, "step": 22985 }, { "epoch": 0.4151048324774843, "grad_norm": 0.45844656229019165, "learning_rate": 1.2635417388371565e-05, "loss": 0.3023, "step": 22990 }, { "epoch": 0.41519511191038505, "grad_norm": 0.40415072441101074, "learning_rate": 1.2632681306521577e-05, "loss": 0.2527, "step": 22995 }, { "epoch": 0.41528539134328574, "grad_norm": 0.37578916549682617, "learning_rate": 1.2629945012891515e-05, "loss": 0.2063, "step": 23000 }, { "epoch": 0.4153756707761865, "grad_norm": 0.448086678981781, "learning_rate": 1.2627208507701501e-05, "loss": 0.31, "step": 23005 }, { "epoch": 0.41546595020908716, "grad_norm": 0.4715213179588318, "learning_rate": 1.2624471791171665e-05, "loss": 0.2169, "step": 23010 }, { "epoch": 0.4155562296419879, "grad_norm": 0.7206951975822449, "learning_rate": 1.2621734863522152e-05, "loss": 0.3105, "step": 23015 }, { "epoch": 0.4156465090748886, "grad_norm": 0.8258635997772217, "learning_rate": 1.2618997724973132e-05, "loss": 0.2576, "step": 23020 }, { "epoch": 0.41573678850778933, "grad_norm": 0.3769485652446747, "learning_rate": 1.2616260375744788e-05, "loss": 0.1799, "step": 23025 }, { "epoch": 0.41582706794069, "grad_norm": 0.514301598072052, "learning_rate": 1.2613522816057319e-05, "loss": 0.3868, "step": 23030 }, { "epoch": 0.41591734737359076, "grad_norm": 0.456122487783432, "learning_rate": 1.2610785046130943e-05, "loss": 0.2225, "step": 23035 }, { "epoch": 0.41600762680649145, "grad_norm": 0.6266130805015564, "learning_rate": 1.260804706618589e-05, "loss": 0.226, "step": 23040 }, { "epoch": 0.4160979062393922, "grad_norm": 0.33325594663619995, "learning_rate": 1.2605308876442412e-05, "loss": 0.2569, "step": 23045 }, { "epoch": 0.4161881856722929, "grad_norm": 0.5136666297912598, "learning_rate": 1.2602570477120779e-05, "loss": 0.1929, "step": 23050 }, { "epoch": 0.4162784651051936, "grad_norm": 0.35206708312034607, "learning_rate": 1.2599831868441272e-05, "loss": 0.1797, "step": 23055 }, { "epoch": 0.4163687445380943, "grad_norm": 0.38802480697631836, "learning_rate": 1.2597093050624196e-05, "loss": 0.3006, "step": 23060 }, { "epoch": 0.41645902397099505, "grad_norm": 0.575931966304779, "learning_rate": 1.2594354023889865e-05, "loss": 0.2287, "step": 23065 }, { "epoch": 0.41654930340389573, "grad_norm": 1.2482432126998901, "learning_rate": 1.259161478845862e-05, "loss": 0.2879, "step": 23070 }, { "epoch": 0.4166395828367965, "grad_norm": 0.28200772404670715, "learning_rate": 1.2588875344550805e-05, "loss": 0.2682, "step": 23075 }, { "epoch": 0.41672986226969716, "grad_norm": 0.40231940150260925, "learning_rate": 1.258613569238679e-05, "loss": 0.2936, "step": 23080 }, { "epoch": 0.4168201417025979, "grad_norm": 0.43480750918388367, "learning_rate": 1.258339583218696e-05, "loss": 0.3142, "step": 23085 }, { "epoch": 0.4169104211354986, "grad_norm": 0.4709104895591736, "learning_rate": 1.2580655764171724e-05, "loss": 0.2487, "step": 23090 }, { "epoch": 0.41700070056839933, "grad_norm": 0.27902746200561523, "learning_rate": 1.2577915488561492e-05, "loss": 0.2756, "step": 23095 }, { "epoch": 0.4170909800013, "grad_norm": 0.5836136341094971, "learning_rate": 1.25751750055767e-05, "loss": 0.2459, "step": 23100 }, { "epoch": 0.41718125943420076, "grad_norm": 0.3742866814136505, "learning_rate": 1.2572434315437805e-05, "loss": 0.244, "step": 23105 }, { "epoch": 0.41727153886710144, "grad_norm": 0.48316583037376404, "learning_rate": 1.2569693418365272e-05, "loss": 0.2519, "step": 23110 }, { "epoch": 0.4173618183000022, "grad_norm": 0.3660636246204376, "learning_rate": 1.2566952314579589e-05, "loss": 0.2304, "step": 23115 }, { "epoch": 0.41745209773290287, "grad_norm": 0.6036543846130371, "learning_rate": 1.256421100430125e-05, "loss": 0.3532, "step": 23120 }, { "epoch": 0.4175423771658036, "grad_norm": 0.3633591830730438, "learning_rate": 1.2561469487750784e-05, "loss": 0.2204, "step": 23125 }, { "epoch": 0.4176326565987043, "grad_norm": 0.30054494738578796, "learning_rate": 1.2558727765148722e-05, "loss": 0.2039, "step": 23130 }, { "epoch": 0.41772293603160504, "grad_norm": 0.3902128338813782, "learning_rate": 1.2555985836715612e-05, "loss": 0.2077, "step": 23135 }, { "epoch": 0.4178132154645057, "grad_norm": 0.3770866394042969, "learning_rate": 1.2553243702672027e-05, "loss": 0.2709, "step": 23140 }, { "epoch": 0.41790349489740647, "grad_norm": 0.3882823586463928, "learning_rate": 1.255050136323855e-05, "loss": 0.1715, "step": 23145 }, { "epoch": 0.41799377433030716, "grad_norm": 0.46126997470855713, "learning_rate": 1.2547758818635782e-05, "loss": 0.2966, "step": 23150 }, { "epoch": 0.4180840537632079, "grad_norm": 0.2830755412578583, "learning_rate": 1.2545016069084339e-05, "loss": 0.2856, "step": 23155 }, { "epoch": 0.4181743331961086, "grad_norm": 0.4069587290287018, "learning_rate": 1.2542273114804862e-05, "loss": 0.2747, "step": 23160 }, { "epoch": 0.4182646126290093, "grad_norm": 0.593215823173523, "learning_rate": 1.2539529956017997e-05, "loss": 0.3418, "step": 23165 }, { "epoch": 0.41835489206191, "grad_norm": 0.5367265939712524, "learning_rate": 1.2536786592944405e-05, "loss": 0.2829, "step": 23170 }, { "epoch": 0.41844517149481075, "grad_norm": 0.44455626606941223, "learning_rate": 1.253404302580478e-05, "loss": 0.1921, "step": 23175 }, { "epoch": 0.41853545092771144, "grad_norm": 0.49821528792381287, "learning_rate": 1.253129925481982e-05, "loss": 0.334, "step": 23180 }, { "epoch": 0.4186257303606122, "grad_norm": 0.4233429729938507, "learning_rate": 1.2528555280210233e-05, "loss": 0.2826, "step": 23185 }, { "epoch": 0.41871600979351287, "grad_norm": 0.5704105496406555, "learning_rate": 1.2525811102196763e-05, "loss": 0.2453, "step": 23190 }, { "epoch": 0.4188062892264136, "grad_norm": 0.46656692028045654, "learning_rate": 1.2523066721000154e-05, "loss": 0.3139, "step": 23195 }, { "epoch": 0.4188965686593143, "grad_norm": 0.423334538936615, "learning_rate": 1.2520322136841169e-05, "loss": 0.2353, "step": 23200 }, { "epoch": 0.41898684809221504, "grad_norm": 0.47647279500961304, "learning_rate": 1.2517577349940595e-05, "loss": 0.2704, "step": 23205 }, { "epoch": 0.4190771275251157, "grad_norm": 0.5600226521492004, "learning_rate": 1.2514832360519227e-05, "loss": 0.2792, "step": 23210 }, { "epoch": 0.41916740695801646, "grad_norm": 0.43187180161476135, "learning_rate": 1.2512087168797882e-05, "loss": 0.2976, "step": 23215 }, { "epoch": 0.41925768639091715, "grad_norm": 0.6845264434814453, "learning_rate": 1.2509341774997385e-05, "loss": 0.2715, "step": 23220 }, { "epoch": 0.4193479658238179, "grad_norm": 0.702416181564331, "learning_rate": 1.250659617933859e-05, "loss": 0.21, "step": 23225 }, { "epoch": 0.4194382452567186, "grad_norm": 0.45580577850341797, "learning_rate": 1.2503850382042355e-05, "loss": 0.2385, "step": 23230 }, { "epoch": 0.4195285246896193, "grad_norm": 0.5321224927902222, "learning_rate": 1.250110438332956e-05, "loss": 0.2274, "step": 23235 }, { "epoch": 0.41961880412252, "grad_norm": 0.49888378381729126, "learning_rate": 1.2498358183421106e-05, "loss": 0.3107, "step": 23240 }, { "epoch": 0.41970908355542075, "grad_norm": 0.4273422956466675, "learning_rate": 1.2495611782537902e-05, "loss": 0.1971, "step": 23245 }, { "epoch": 0.41979936298832143, "grad_norm": 0.6001827716827393, "learning_rate": 1.2492865180900874e-05, "loss": 0.2154, "step": 23250 }, { "epoch": 0.4198896424212222, "grad_norm": 0.3414769172668457, "learning_rate": 1.2490118378730964e-05, "loss": 0.2462, "step": 23255 }, { "epoch": 0.41997992185412286, "grad_norm": 0.5230898857116699, "learning_rate": 1.2487371376249137e-05, "loss": 0.1892, "step": 23260 }, { "epoch": 0.4200702012870236, "grad_norm": 0.4658863842487335, "learning_rate": 1.248462417367637e-05, "loss": 0.2641, "step": 23265 }, { "epoch": 0.4201604807199243, "grad_norm": 0.6205943822860718, "learning_rate": 1.248187677123365e-05, "loss": 0.3032, "step": 23270 }, { "epoch": 0.42025076015282503, "grad_norm": 0.4406639635562897, "learning_rate": 1.247912916914199e-05, "loss": 0.2362, "step": 23275 }, { "epoch": 0.4203410395857257, "grad_norm": 0.44867652654647827, "learning_rate": 1.2476381367622414e-05, "loss": 0.2107, "step": 23280 }, { "epoch": 0.42043131901862646, "grad_norm": 0.341111958026886, "learning_rate": 1.2473633366895959e-05, "loss": 0.2036, "step": 23285 }, { "epoch": 0.42052159845152715, "grad_norm": 0.4511333703994751, "learning_rate": 1.2470885167183685e-05, "loss": 0.2541, "step": 23290 }, { "epoch": 0.4206118778844279, "grad_norm": 0.3271433711051941, "learning_rate": 1.2468136768706669e-05, "loss": 0.2803, "step": 23295 }, { "epoch": 0.4207021573173286, "grad_norm": 0.5134062767028809, "learning_rate": 1.246538817168599e-05, "loss": 0.3147, "step": 23300 }, { "epoch": 0.4207924367502293, "grad_norm": 0.9824153780937195, "learning_rate": 1.246263937634276e-05, "loss": 0.3014, "step": 23305 }, { "epoch": 0.42088271618313, "grad_norm": 1.0824674367904663, "learning_rate": 1.2459890382898098e-05, "loss": 0.2731, "step": 23310 }, { "epoch": 0.42097299561603074, "grad_norm": 0.33683472871780396, "learning_rate": 1.245714119157314e-05, "loss": 0.3048, "step": 23315 }, { "epoch": 0.42106327504893143, "grad_norm": 0.5700289607048035, "learning_rate": 1.2454391802589036e-05, "loss": 0.2221, "step": 23320 }, { "epoch": 0.42115355448183217, "grad_norm": 0.25581130385398865, "learning_rate": 1.245164221616696e-05, "loss": 0.216, "step": 23325 }, { "epoch": 0.42124383391473286, "grad_norm": 0.4467751979827881, "learning_rate": 1.244889243252809e-05, "loss": 0.336, "step": 23330 }, { "epoch": 0.4213341133476336, "grad_norm": 0.22876004874706268, "learning_rate": 1.2446142451893628e-05, "loss": 0.1749, "step": 23335 }, { "epoch": 0.4214243927805343, "grad_norm": 0.7102181911468506, "learning_rate": 1.2443392274484794e-05, "loss": 0.298, "step": 23340 }, { "epoch": 0.421514672213435, "grad_norm": 0.45810016989707947, "learning_rate": 1.2440641900522818e-05, "loss": 0.2286, "step": 23345 }, { "epoch": 0.4216049516463357, "grad_norm": 0.3493264615535736, "learning_rate": 1.2437891330228942e-05, "loss": 0.3413, "step": 23350 }, { "epoch": 0.42169523107923645, "grad_norm": 0.262546181678772, "learning_rate": 1.2435140563824435e-05, "loss": 0.2425, "step": 23355 }, { "epoch": 0.42178551051213714, "grad_norm": 0.39406266808509827, "learning_rate": 1.2432389601530576e-05, "loss": 0.2303, "step": 23360 }, { "epoch": 0.4218757899450379, "grad_norm": 0.5727577209472656, "learning_rate": 1.2429638443568657e-05, "loss": 0.3111, "step": 23365 }, { "epoch": 0.42196606937793857, "grad_norm": 0.6123302578926086, "learning_rate": 1.2426887090159992e-05, "loss": 0.2073, "step": 23370 }, { "epoch": 0.4220563488108393, "grad_norm": 0.5969008207321167, "learning_rate": 1.2424135541525906e-05, "loss": 0.2418, "step": 23375 }, { "epoch": 0.42214662824374, "grad_norm": 0.36988839507102966, "learning_rate": 1.2421383797887738e-05, "loss": 0.2219, "step": 23380 }, { "epoch": 0.42223690767664074, "grad_norm": 0.4886693060398102, "learning_rate": 1.241863185946685e-05, "loss": 0.2436, "step": 23385 }, { "epoch": 0.4223271871095415, "grad_norm": 0.334306001663208, "learning_rate": 1.2415879726484614e-05, "loss": 0.2905, "step": 23390 }, { "epoch": 0.42241746654244217, "grad_norm": 0.4661378264427185, "learning_rate": 1.2413127399162422e-05, "loss": 0.3199, "step": 23395 }, { "epoch": 0.4225077459753429, "grad_norm": 0.2870534062385559, "learning_rate": 1.2410374877721672e-05, "loss": 0.2479, "step": 23400 }, { "epoch": 0.4225980254082436, "grad_norm": 0.3354700803756714, "learning_rate": 1.240762216238379e-05, "loss": 0.3226, "step": 23405 }, { "epoch": 0.42268830484114434, "grad_norm": 0.8332720398902893, "learning_rate": 1.2404869253370212e-05, "loss": 0.167, "step": 23410 }, { "epoch": 0.422778584274045, "grad_norm": 0.4037450850009918, "learning_rate": 1.2402116150902384e-05, "loss": 0.2679, "step": 23415 }, { "epoch": 0.42286886370694576, "grad_norm": 0.5018265843391418, "learning_rate": 1.2399362855201778e-05, "loss": 0.2422, "step": 23420 }, { "epoch": 0.42295914313984645, "grad_norm": 0.3457806408405304, "learning_rate": 1.2396609366489876e-05, "loss": 0.2515, "step": 23425 }, { "epoch": 0.4230494225727472, "grad_norm": 0.4327681064605713, "learning_rate": 1.2393855684988176e-05, "loss": 0.2149, "step": 23430 }, { "epoch": 0.4231397020056479, "grad_norm": 0.49335646629333496, "learning_rate": 1.239110181091819e-05, "loss": 0.3475, "step": 23435 }, { "epoch": 0.4232299814385486, "grad_norm": 0.4823317229747772, "learning_rate": 1.238834774450145e-05, "loss": 0.358, "step": 23440 }, { "epoch": 0.4233202608714493, "grad_norm": 0.38144388794898987, "learning_rate": 1.2385593485959497e-05, "loss": 0.256, "step": 23445 }, { "epoch": 0.42341054030435005, "grad_norm": 0.5280448198318481, "learning_rate": 1.2382839035513894e-05, "loss": 0.2755, "step": 23450 }, { "epoch": 0.42350081973725073, "grad_norm": 0.3799925446510315, "learning_rate": 1.2380084393386214e-05, "loss": 0.1969, "step": 23455 }, { "epoch": 0.4235910991701515, "grad_norm": 0.6687923073768616, "learning_rate": 1.2377329559798053e-05, "loss": 0.2413, "step": 23460 }, { "epoch": 0.42368137860305216, "grad_norm": 0.3646118640899658, "learning_rate": 1.237457453497101e-05, "loss": 0.1871, "step": 23465 }, { "epoch": 0.4237716580359529, "grad_norm": 0.4773862957954407, "learning_rate": 1.237181931912671e-05, "loss": 0.2827, "step": 23470 }, { "epoch": 0.4238619374688536, "grad_norm": 0.4353986382484436, "learning_rate": 1.2369063912486793e-05, "loss": 0.2613, "step": 23475 }, { "epoch": 0.42395221690175433, "grad_norm": 0.9043532013893127, "learning_rate": 1.2366308315272911e-05, "loss": 0.2748, "step": 23480 }, { "epoch": 0.424042496334655, "grad_norm": 0.6638239622116089, "learning_rate": 1.2363552527706724e-05, "loss": 0.2582, "step": 23485 }, { "epoch": 0.42413277576755576, "grad_norm": 0.3441218435764313, "learning_rate": 1.2360796550009924e-05, "loss": 0.2524, "step": 23490 }, { "epoch": 0.42422305520045644, "grad_norm": 0.5626394748687744, "learning_rate": 1.2358040382404207e-05, "loss": 0.3088, "step": 23495 }, { "epoch": 0.4243133346333572, "grad_norm": 0.40816786885261536, "learning_rate": 1.2355284025111282e-05, "loss": 0.2522, "step": 23500 }, { "epoch": 0.4244036140662579, "grad_norm": 0.6815304756164551, "learning_rate": 1.2352527478352886e-05, "loss": 0.3174, "step": 23505 }, { "epoch": 0.4244938934991586, "grad_norm": 0.4805319309234619, "learning_rate": 1.2349770742350759e-05, "loss": 0.2346, "step": 23510 }, { "epoch": 0.4245841729320593, "grad_norm": 0.4371818006038666, "learning_rate": 1.234701381732666e-05, "loss": 0.3627, "step": 23515 }, { "epoch": 0.42467445236496004, "grad_norm": 0.4816952645778656, "learning_rate": 1.234425670350236e-05, "loss": 0.3147, "step": 23520 }, { "epoch": 0.42476473179786073, "grad_norm": 0.25366294384002686, "learning_rate": 1.2341499401099656e-05, "loss": 0.1932, "step": 23525 }, { "epoch": 0.42485501123076147, "grad_norm": 0.5283558964729309, "learning_rate": 1.233874191034035e-05, "loss": 0.2441, "step": 23530 }, { "epoch": 0.42494529066366216, "grad_norm": 0.4625545144081116, "learning_rate": 1.233598423144626e-05, "loss": 0.1845, "step": 23535 }, { "epoch": 0.4250355700965629, "grad_norm": 0.5480676889419556, "learning_rate": 1.2333226364639226e-05, "loss": 0.2443, "step": 23540 }, { "epoch": 0.4251258495294636, "grad_norm": 0.40750113129615784, "learning_rate": 1.2330468310141092e-05, "loss": 0.2311, "step": 23545 }, { "epoch": 0.4252161289623643, "grad_norm": 0.30785858631134033, "learning_rate": 1.2327710068173726e-05, "loss": 0.2228, "step": 23550 }, { "epoch": 0.425306408395265, "grad_norm": 0.3887018859386444, "learning_rate": 1.2324951638959011e-05, "loss": 0.1793, "step": 23555 }, { "epoch": 0.42539668782816575, "grad_norm": 0.4880499243736267, "learning_rate": 1.232219302271884e-05, "loss": 0.1978, "step": 23560 }, { "epoch": 0.42548696726106644, "grad_norm": 0.42758166790008545, "learning_rate": 1.231943421967512e-05, "loss": 0.2948, "step": 23565 }, { "epoch": 0.4255772466939672, "grad_norm": 0.4794660806655884, "learning_rate": 1.2316675230049788e-05, "loss": 0.2399, "step": 23570 }, { "epoch": 0.42566752612686787, "grad_norm": 0.32832571864128113, "learning_rate": 1.2313916054064774e-05, "loss": 0.286, "step": 23575 }, { "epoch": 0.4257578055597686, "grad_norm": 0.3535519540309906, "learning_rate": 1.231115669194204e-05, "loss": 0.2398, "step": 23580 }, { "epoch": 0.4258480849926693, "grad_norm": 0.49113744497299194, "learning_rate": 1.230839714390355e-05, "loss": 0.3338, "step": 23585 }, { "epoch": 0.42593836442557004, "grad_norm": 0.28829047083854675, "learning_rate": 1.2305637410171296e-05, "loss": 0.274, "step": 23590 }, { "epoch": 0.4260286438584707, "grad_norm": 0.244572713971138, "learning_rate": 1.2302877490967275e-05, "loss": 0.1644, "step": 23595 }, { "epoch": 0.42611892329137147, "grad_norm": 0.3454359173774719, "learning_rate": 1.2300117386513501e-05, "loss": 0.178, "step": 23600 }, { "epoch": 0.42620920272427215, "grad_norm": 0.3768734931945801, "learning_rate": 1.229735709703201e-05, "loss": 0.3869, "step": 23605 }, { "epoch": 0.4262994821571729, "grad_norm": 0.4586016535758972, "learning_rate": 1.2294596622744845e-05, "loss": 0.2703, "step": 23610 }, { "epoch": 0.4263897615900736, "grad_norm": 0.4032464623451233, "learning_rate": 1.2291835963874063e-05, "loss": 0.273, "step": 23615 }, { "epoch": 0.4264800410229743, "grad_norm": 0.4911908209323883, "learning_rate": 1.228907512064174e-05, "loss": 0.2433, "step": 23620 }, { "epoch": 0.426570320455875, "grad_norm": 0.3181964159011841, "learning_rate": 1.228631409326997e-05, "loss": 0.2955, "step": 23625 }, { "epoch": 0.42666059988877575, "grad_norm": 0.35814782977104187, "learning_rate": 1.2283552881980852e-05, "loss": 0.2569, "step": 23630 }, { "epoch": 0.42675087932167644, "grad_norm": 0.6058424115180969, "learning_rate": 1.2280791486996509e-05, "loss": 0.251, "step": 23635 }, { "epoch": 0.4268411587545772, "grad_norm": 0.5719448328018188, "learning_rate": 1.2278029908539073e-05, "loss": 0.2149, "step": 23640 }, { "epoch": 0.42693143818747786, "grad_norm": 0.3337777554988861, "learning_rate": 1.2275268146830691e-05, "loss": 0.275, "step": 23645 }, { "epoch": 0.4270217176203786, "grad_norm": 0.47124147415161133, "learning_rate": 1.2272506202093533e-05, "loss": 0.3303, "step": 23650 }, { "epoch": 0.4271119970532793, "grad_norm": 0.4274919331073761, "learning_rate": 1.2269744074549773e-05, "loss": 0.2398, "step": 23655 }, { "epoch": 0.42720227648618003, "grad_norm": 0.3180335462093353, "learning_rate": 1.2266981764421603e-05, "loss": 0.1748, "step": 23660 }, { "epoch": 0.4272925559190807, "grad_norm": 0.753650426864624, "learning_rate": 1.2264219271931234e-05, "loss": 0.2856, "step": 23665 }, { "epoch": 0.42738283535198146, "grad_norm": 0.622544527053833, "learning_rate": 1.2261456597300888e-05, "loss": 0.2688, "step": 23670 }, { "epoch": 0.42747311478488215, "grad_norm": 0.42950546741485596, "learning_rate": 1.22586937407528e-05, "loss": 0.3049, "step": 23675 }, { "epoch": 0.4275633942177829, "grad_norm": 0.632057249546051, "learning_rate": 1.2255930702509224e-05, "loss": 0.3121, "step": 23680 }, { "epoch": 0.4276536736506836, "grad_norm": 0.3873615562915802, "learning_rate": 1.2253167482792423e-05, "loss": 0.3521, "step": 23685 }, { "epoch": 0.4277439530835843, "grad_norm": 0.4572433829307556, "learning_rate": 1.2250404081824683e-05, "loss": 0.1936, "step": 23690 }, { "epoch": 0.427834232516485, "grad_norm": 0.5588839054107666, "learning_rate": 1.2247640499828295e-05, "loss": 0.2833, "step": 23695 }, { "epoch": 0.42792451194938574, "grad_norm": 0.5518315434455872, "learning_rate": 1.2244876737025568e-05, "loss": 0.3176, "step": 23700 }, { "epoch": 0.42801479138228643, "grad_norm": 0.5185930728912354, "learning_rate": 1.2242112793638833e-05, "loss": 0.2754, "step": 23705 }, { "epoch": 0.42810507081518717, "grad_norm": 0.3396175503730774, "learning_rate": 1.2239348669890426e-05, "loss": 0.1739, "step": 23710 }, { "epoch": 0.42819535024808786, "grad_norm": 0.3102880120277405, "learning_rate": 1.22365843660027e-05, "loss": 0.152, "step": 23715 }, { "epoch": 0.4282856296809886, "grad_norm": 0.307441383600235, "learning_rate": 1.2233819882198021e-05, "loss": 0.2695, "step": 23720 }, { "epoch": 0.4283759091138893, "grad_norm": 0.5969069004058838, "learning_rate": 1.2231055218698775e-05, "loss": 0.301, "step": 23725 }, { "epoch": 0.42846618854679, "grad_norm": 0.27882397174835205, "learning_rate": 1.2228290375727358e-05, "loss": 0.2568, "step": 23730 }, { "epoch": 0.4285564679796907, "grad_norm": 0.4679926037788391, "learning_rate": 1.2225525353506182e-05, "loss": 0.2583, "step": 23735 }, { "epoch": 0.42864674741259146, "grad_norm": 0.4088680148124695, "learning_rate": 1.2222760152257674e-05, "loss": 0.2584, "step": 23740 }, { "epoch": 0.42873702684549214, "grad_norm": 0.8026046752929688, "learning_rate": 1.221999477220427e-05, "loss": 0.2621, "step": 23745 }, { "epoch": 0.4288273062783929, "grad_norm": 0.5004242062568665, "learning_rate": 1.2217229213568427e-05, "loss": 0.3945, "step": 23750 }, { "epoch": 0.42891758571129357, "grad_norm": 0.5434352159500122, "learning_rate": 1.2214463476572615e-05, "loss": 0.2763, "step": 23755 }, { "epoch": 0.4290078651441943, "grad_norm": 0.5118503570556641, "learning_rate": 1.2211697561439318e-05, "loss": 0.2636, "step": 23760 }, { "epoch": 0.429098144577095, "grad_norm": 0.5239500403404236, "learning_rate": 1.2208931468391031e-05, "loss": 0.1781, "step": 23765 }, { "epoch": 0.42918842400999574, "grad_norm": 0.4253959357738495, "learning_rate": 1.220616519765027e-05, "loss": 0.3, "step": 23770 }, { "epoch": 0.4292787034428964, "grad_norm": 0.5123077630996704, "learning_rate": 1.220339874943956e-05, "loss": 0.2837, "step": 23775 }, { "epoch": 0.42936898287579717, "grad_norm": 0.6357778310775757, "learning_rate": 1.220063212398144e-05, "loss": 0.1701, "step": 23780 }, { "epoch": 0.4294592623086979, "grad_norm": 0.4320623278617859, "learning_rate": 1.2197865321498463e-05, "loss": 0.3303, "step": 23785 }, { "epoch": 0.4295495417415986, "grad_norm": 0.4319918155670166, "learning_rate": 1.2195098342213205e-05, "loss": 0.307, "step": 23790 }, { "epoch": 0.42963982117449934, "grad_norm": 0.4354119896888733, "learning_rate": 1.2192331186348243e-05, "loss": 0.2688, "step": 23795 }, { "epoch": 0.4297301006074, "grad_norm": 0.44331490993499756, "learning_rate": 1.2189563854126177e-05, "loss": 0.2869, "step": 23800 }, { "epoch": 0.42982038004030076, "grad_norm": 0.5300556421279907, "learning_rate": 1.2186796345769619e-05, "loss": 0.1792, "step": 23805 }, { "epoch": 0.42991065947320145, "grad_norm": 0.44784700870513916, "learning_rate": 1.2184028661501197e-05, "loss": 0.3702, "step": 23810 }, { "epoch": 0.4300009389061022, "grad_norm": 0.5087341666221619, "learning_rate": 1.2181260801543545e-05, "loss": 0.342, "step": 23815 }, { "epoch": 0.4300912183390029, "grad_norm": 0.6763350963592529, "learning_rate": 1.217849276611932e-05, "loss": 0.3196, "step": 23820 }, { "epoch": 0.4301814977719036, "grad_norm": 0.30958354473114014, "learning_rate": 1.2175724555451195e-05, "loss": 0.2174, "step": 23825 }, { "epoch": 0.4302717772048043, "grad_norm": 0.5187954306602478, "learning_rate": 1.2172956169761845e-05, "loss": 0.2851, "step": 23830 }, { "epoch": 0.43036205663770505, "grad_norm": 0.8124983906745911, "learning_rate": 1.217018760927397e-05, "loss": 0.2497, "step": 23835 }, { "epoch": 0.43045233607060573, "grad_norm": 0.3612107038497925, "learning_rate": 1.2167418874210285e-05, "loss": 0.2518, "step": 23840 }, { "epoch": 0.4305426155035065, "grad_norm": 0.6983647346496582, "learning_rate": 1.2164649964793506e-05, "loss": 0.2057, "step": 23845 }, { "epoch": 0.43063289493640716, "grad_norm": 0.4899793267250061, "learning_rate": 1.2161880881246378e-05, "loss": 0.3078, "step": 23850 }, { "epoch": 0.4307231743693079, "grad_norm": 0.33246493339538574, "learning_rate": 1.2159111623791651e-05, "loss": 0.1778, "step": 23855 }, { "epoch": 0.4308134538022086, "grad_norm": 0.46771249175071716, "learning_rate": 1.2156342192652093e-05, "loss": 0.2582, "step": 23860 }, { "epoch": 0.43090373323510933, "grad_norm": 0.6646603345870972, "learning_rate": 1.2153572588050479e-05, "loss": 0.3399, "step": 23865 }, { "epoch": 0.43099401266801, "grad_norm": 0.3225894272327423, "learning_rate": 1.2150802810209614e-05, "loss": 0.2621, "step": 23870 }, { "epoch": 0.43108429210091076, "grad_norm": 0.44844329357147217, "learning_rate": 1.2148032859352298e-05, "loss": 0.2642, "step": 23875 }, { "epoch": 0.43117457153381145, "grad_norm": 0.39339956641197205, "learning_rate": 1.2145262735701357e-05, "loss": 0.2938, "step": 23880 }, { "epoch": 0.4312648509667122, "grad_norm": 0.4607665240764618, "learning_rate": 1.2142492439479625e-05, "loss": 0.3199, "step": 23885 }, { "epoch": 0.4313551303996129, "grad_norm": 0.4481928050518036, "learning_rate": 1.2139721970909956e-05, "loss": 0.2893, "step": 23890 }, { "epoch": 0.4314454098325136, "grad_norm": 0.538311243057251, "learning_rate": 1.2136951330215212e-05, "loss": 0.2104, "step": 23895 }, { "epoch": 0.4315356892654143, "grad_norm": 0.35558781027793884, "learning_rate": 1.213418051761827e-05, "loss": 0.2451, "step": 23900 }, { "epoch": 0.43162596869831504, "grad_norm": 0.5374397039413452, "learning_rate": 1.2131409533342022e-05, "loss": 0.1912, "step": 23905 }, { "epoch": 0.43171624813121573, "grad_norm": 0.7439382672309875, "learning_rate": 1.2128638377609373e-05, "loss": 0.2741, "step": 23910 }, { "epoch": 0.43180652756411647, "grad_norm": 0.46327850222587585, "learning_rate": 1.2125867050643243e-05, "loss": 0.3571, "step": 23915 }, { "epoch": 0.43189680699701716, "grad_norm": 0.4672674238681793, "learning_rate": 1.2123095552666566e-05, "loss": 0.3456, "step": 23920 }, { "epoch": 0.4319870864299179, "grad_norm": 0.45635950565338135, "learning_rate": 1.212032388390229e-05, "loss": 0.2549, "step": 23925 }, { "epoch": 0.4320773658628186, "grad_norm": 0.49297451972961426, "learning_rate": 1.211755204457337e-05, "loss": 0.2797, "step": 23930 }, { "epoch": 0.4321676452957193, "grad_norm": 0.45111674070358276, "learning_rate": 1.2114780034902787e-05, "loss": 0.2937, "step": 23935 }, { "epoch": 0.43225792472862, "grad_norm": 0.4889915883541107, "learning_rate": 1.2112007855113526e-05, "loss": 0.3148, "step": 23940 }, { "epoch": 0.43234820416152075, "grad_norm": 0.29992303252220154, "learning_rate": 1.210923550542859e-05, "loss": 0.3375, "step": 23945 }, { "epoch": 0.43243848359442144, "grad_norm": 0.6777974367141724, "learning_rate": 1.2106462986070988e-05, "loss": 0.285, "step": 23950 }, { "epoch": 0.4325287630273222, "grad_norm": 0.5728838443756104, "learning_rate": 1.210369029726376e-05, "loss": 0.1933, "step": 23955 }, { "epoch": 0.43261904246022287, "grad_norm": 0.5338889360427856, "learning_rate": 1.210091743922994e-05, "loss": 0.2527, "step": 23960 }, { "epoch": 0.4327093218931236, "grad_norm": 0.43478772044181824, "learning_rate": 1.2098144412192587e-05, "loss": 0.3012, "step": 23965 }, { "epoch": 0.4327996013260243, "grad_norm": 0.3326312303543091, "learning_rate": 1.2095371216374771e-05, "loss": 0.293, "step": 23970 }, { "epoch": 0.43288988075892504, "grad_norm": 0.4028029143810272, "learning_rate": 1.2092597851999578e-05, "loss": 0.2374, "step": 23975 }, { "epoch": 0.4329801601918257, "grad_norm": 0.45053476095199585, "learning_rate": 1.2089824319290097e-05, "loss": 0.2518, "step": 23980 }, { "epoch": 0.43307043962472647, "grad_norm": 0.5274935364723206, "learning_rate": 1.208705061846945e-05, "loss": 0.198, "step": 23985 }, { "epoch": 0.43316071905762715, "grad_norm": 0.5276974439620972, "learning_rate": 1.2084276749760756e-05, "loss": 0.2876, "step": 23990 }, { "epoch": 0.4332509984905279, "grad_norm": 0.49531668424606323, "learning_rate": 1.2081502713387145e-05, "loss": 0.3453, "step": 23995 }, { "epoch": 0.4333412779234286, "grad_norm": 0.5129486918449402, "learning_rate": 1.207872850957178e-05, "loss": 0.2143, "step": 24000 }, { "epoch": 0.4334315573563293, "grad_norm": 0.8107872009277344, "learning_rate": 1.2075954138537817e-05, "loss": 0.202, "step": 24005 }, { "epoch": 0.43352183678923, "grad_norm": 0.5041257739067078, "learning_rate": 1.207317960050844e-05, "loss": 0.2312, "step": 24010 }, { "epoch": 0.43361211622213075, "grad_norm": 0.4606269896030426, "learning_rate": 1.2070404895706835e-05, "loss": 0.3045, "step": 24015 }, { "epoch": 0.43370239565503144, "grad_norm": 0.42314013838768005, "learning_rate": 1.2067630024356212e-05, "loss": 0.28, "step": 24020 }, { "epoch": 0.4337926750879322, "grad_norm": 0.5237497091293335, "learning_rate": 1.2064854986679784e-05, "loss": 0.3149, "step": 24025 }, { "epoch": 0.43388295452083286, "grad_norm": 0.47119367122650146, "learning_rate": 1.2062079782900785e-05, "loss": 0.2723, "step": 24030 }, { "epoch": 0.4339732339537336, "grad_norm": 0.4915752112865448, "learning_rate": 1.2059304413242463e-05, "loss": 0.3197, "step": 24035 }, { "epoch": 0.4340635133866343, "grad_norm": 0.43169498443603516, "learning_rate": 1.2056528877928073e-05, "loss": 0.2272, "step": 24040 }, { "epoch": 0.43415379281953503, "grad_norm": 0.25861960649490356, "learning_rate": 1.2053753177180888e-05, "loss": 0.2338, "step": 24045 }, { "epoch": 0.4342440722524357, "grad_norm": 0.6282423138618469, "learning_rate": 1.2050977311224188e-05, "loss": 0.3242, "step": 24050 }, { "epoch": 0.43433435168533646, "grad_norm": 0.4926910400390625, "learning_rate": 1.2048201280281278e-05, "loss": 0.2658, "step": 24055 }, { "epoch": 0.43442463111823715, "grad_norm": 0.42777496576309204, "learning_rate": 1.2045425084575466e-05, "loss": 0.283, "step": 24060 }, { "epoch": 0.4345149105511379, "grad_norm": 0.5594484210014343, "learning_rate": 1.2042648724330078e-05, "loss": 0.3006, "step": 24065 }, { "epoch": 0.4346051899840386, "grad_norm": 0.4370863437652588, "learning_rate": 1.2039872199768451e-05, "loss": 0.2146, "step": 24070 }, { "epoch": 0.4346954694169393, "grad_norm": 0.44897207617759705, "learning_rate": 1.203709551111394e-05, "loss": 0.2844, "step": 24075 }, { "epoch": 0.43478574884984, "grad_norm": 0.5095186829566956, "learning_rate": 1.2034318658589902e-05, "loss": 0.3773, "step": 24080 }, { "epoch": 0.43487602828274075, "grad_norm": 0.45647507905960083, "learning_rate": 1.2031541642419723e-05, "loss": 0.3153, "step": 24085 }, { "epoch": 0.43496630771564143, "grad_norm": 0.4166284203529358, "learning_rate": 1.2028764462826789e-05, "loss": 0.1815, "step": 24090 }, { "epoch": 0.4350565871485422, "grad_norm": 0.2727082371711731, "learning_rate": 1.2025987120034502e-05, "loss": 0.2251, "step": 24095 }, { "epoch": 0.43514686658144286, "grad_norm": 0.2521149218082428, "learning_rate": 1.2023209614266282e-05, "loss": 0.239, "step": 24100 }, { "epoch": 0.4352371460143436, "grad_norm": 0.3187403380870819, "learning_rate": 1.2020431945745561e-05, "loss": 0.2084, "step": 24105 }, { "epoch": 0.4353274254472443, "grad_norm": 0.797163724899292, "learning_rate": 1.2017654114695779e-05, "loss": 0.2989, "step": 24110 }, { "epoch": 0.43541770488014503, "grad_norm": 1.0744043588638306, "learning_rate": 1.2014876121340389e-05, "loss": 0.2556, "step": 24115 }, { "epoch": 0.4355079843130457, "grad_norm": 0.7450100779533386, "learning_rate": 1.201209796590287e-05, "loss": 0.2484, "step": 24120 }, { "epoch": 0.43559826374594646, "grad_norm": 0.5865199565887451, "learning_rate": 1.2009319648606696e-05, "loss": 0.3165, "step": 24125 }, { "epoch": 0.43568854317884714, "grad_norm": 0.3864075243473053, "learning_rate": 1.2006541169675365e-05, "loss": 0.251, "step": 24130 }, { "epoch": 0.4357788226117479, "grad_norm": 0.5079840421676636, "learning_rate": 1.2003762529332388e-05, "loss": 0.198, "step": 24135 }, { "epoch": 0.43586910204464857, "grad_norm": 0.4628409445285797, "learning_rate": 1.2000983727801285e-05, "loss": 0.3116, "step": 24140 }, { "epoch": 0.4359593814775493, "grad_norm": 0.41381558775901794, "learning_rate": 1.1998204765305585e-05, "loss": 0.2505, "step": 24145 }, { "epoch": 0.43604966091045, "grad_norm": 1.1286911964416504, "learning_rate": 1.1995425642068845e-05, "loss": 0.2429, "step": 24150 }, { "epoch": 0.43613994034335074, "grad_norm": 0.700363278388977, "learning_rate": 1.1992646358314619e-05, "loss": 0.1841, "step": 24155 }, { "epoch": 0.4362302197762514, "grad_norm": 0.538574755191803, "learning_rate": 1.1989866914266481e-05, "loss": 0.2661, "step": 24160 }, { "epoch": 0.43632049920915217, "grad_norm": 0.3559228479862213, "learning_rate": 1.1987087310148016e-05, "loss": 0.2594, "step": 24165 }, { "epoch": 0.43641077864205285, "grad_norm": 0.5627803802490234, "learning_rate": 1.1984307546182827e-05, "loss": 0.2349, "step": 24170 }, { "epoch": 0.4365010580749536, "grad_norm": 0.5101236701011658, "learning_rate": 1.1981527622594525e-05, "loss": 0.2979, "step": 24175 }, { "epoch": 0.43659133750785434, "grad_norm": 0.5389246344566345, "learning_rate": 1.1978747539606728e-05, "loss": 0.2205, "step": 24180 }, { "epoch": 0.436681616940755, "grad_norm": 0.4691467583179474, "learning_rate": 1.197596729744308e-05, "loss": 0.2294, "step": 24185 }, { "epoch": 0.43677189637365577, "grad_norm": 0.3519728481769562, "learning_rate": 1.1973186896327232e-05, "loss": 0.255, "step": 24190 }, { "epoch": 0.43686217580655645, "grad_norm": 0.5833948850631714, "learning_rate": 1.197040633648284e-05, "loss": 0.3013, "step": 24195 }, { "epoch": 0.4369524552394572, "grad_norm": 0.7892794013023376, "learning_rate": 1.1967625618133586e-05, "loss": 0.3072, "step": 24200 }, { "epoch": 0.4370427346723579, "grad_norm": 2.0248167514801025, "learning_rate": 1.1964844741503159e-05, "loss": 0.2601, "step": 24205 }, { "epoch": 0.4371330141052586, "grad_norm": 0.3766515254974365, "learning_rate": 1.1962063706815256e-05, "loss": 0.2675, "step": 24210 }, { "epoch": 0.4372232935381593, "grad_norm": 0.4443298280239105, "learning_rate": 1.1959282514293596e-05, "loss": 0.3409, "step": 24215 }, { "epoch": 0.43731357297106005, "grad_norm": 0.40835708379745483, "learning_rate": 1.1956501164161903e-05, "loss": 0.1445, "step": 24220 }, { "epoch": 0.43740385240396074, "grad_norm": 0.5901013016700745, "learning_rate": 1.1953719656643914e-05, "loss": 0.2464, "step": 24225 }, { "epoch": 0.4374941318368615, "grad_norm": 0.5692929625511169, "learning_rate": 1.1950937991963385e-05, "loss": 0.2389, "step": 24230 }, { "epoch": 0.43758441126976216, "grad_norm": 0.4530095160007477, "learning_rate": 1.1948156170344079e-05, "loss": 0.2317, "step": 24235 }, { "epoch": 0.4376746907026629, "grad_norm": 0.46489042043685913, "learning_rate": 1.1945374192009773e-05, "loss": 0.2053, "step": 24240 }, { "epoch": 0.4377649701355636, "grad_norm": 0.4560989439487457, "learning_rate": 1.1942592057184256e-05, "loss": 0.2311, "step": 24245 }, { "epoch": 0.43785524956846433, "grad_norm": 0.6620902419090271, "learning_rate": 1.1939809766091334e-05, "loss": 0.2191, "step": 24250 }, { "epoch": 0.437945529001365, "grad_norm": 0.48353275656700134, "learning_rate": 1.1937027318954823e-05, "loss": 0.2172, "step": 24255 }, { "epoch": 0.43803580843426576, "grad_norm": 0.2793586254119873, "learning_rate": 1.1934244715998547e-05, "loss": 0.223, "step": 24260 }, { "epoch": 0.43812608786716645, "grad_norm": 0.4108912944793701, "learning_rate": 1.1931461957446342e-05, "loss": 0.3581, "step": 24265 }, { "epoch": 0.4382163673000672, "grad_norm": 0.2431604564189911, "learning_rate": 1.1928679043522071e-05, "loss": 0.1932, "step": 24270 }, { "epoch": 0.4383066467329679, "grad_norm": 0.4234777092933655, "learning_rate": 1.1925895974449588e-05, "loss": 0.2708, "step": 24275 }, { "epoch": 0.4383969261658686, "grad_norm": 0.7244442105293274, "learning_rate": 1.192311275045278e-05, "loss": 0.2523, "step": 24280 }, { "epoch": 0.4384872055987693, "grad_norm": 0.505651593208313, "learning_rate": 1.1920329371755535e-05, "loss": 0.2314, "step": 24285 }, { "epoch": 0.43857748503167004, "grad_norm": 0.4934924840927124, "learning_rate": 1.1917545838581754e-05, "loss": 0.2486, "step": 24290 }, { "epoch": 0.43866776446457073, "grad_norm": 0.39593183994293213, "learning_rate": 1.1914762151155348e-05, "loss": 0.2177, "step": 24295 }, { "epoch": 0.4387580438974715, "grad_norm": 0.700186014175415, "learning_rate": 1.1911978309700252e-05, "loss": 0.3491, "step": 24300 }, { "epoch": 0.43884832333037216, "grad_norm": 0.4597525894641876, "learning_rate": 1.1909194314440402e-05, "loss": 0.2484, "step": 24305 }, { "epoch": 0.4389386027632729, "grad_norm": 1.573953628540039, "learning_rate": 1.1906410165599748e-05, "loss": 0.2956, "step": 24310 }, { "epoch": 0.4390288821961736, "grad_norm": 0.4366190731525421, "learning_rate": 1.1903625863402261e-05, "loss": 0.1852, "step": 24315 }, { "epoch": 0.43911916162907433, "grad_norm": 0.2975316643714905, "learning_rate": 1.1900841408071912e-05, "loss": 0.3055, "step": 24320 }, { "epoch": 0.439209441061975, "grad_norm": 0.5020982027053833, "learning_rate": 1.1898056799832694e-05, "loss": 0.2907, "step": 24325 }, { "epoch": 0.43929972049487576, "grad_norm": 0.4672057032585144, "learning_rate": 1.1895272038908604e-05, "loss": 0.2599, "step": 24330 }, { "epoch": 0.43938999992777644, "grad_norm": 0.33109766244888306, "learning_rate": 1.189248712552366e-05, "loss": 0.2419, "step": 24335 }, { "epoch": 0.4394802793606772, "grad_norm": 0.2925727963447571, "learning_rate": 1.1889702059901887e-05, "loss": 0.1978, "step": 24340 }, { "epoch": 0.43957055879357787, "grad_norm": 0.3118140697479248, "learning_rate": 1.188691684226732e-05, "loss": 0.1405, "step": 24345 }, { "epoch": 0.4396608382264786, "grad_norm": 0.37156930565834045, "learning_rate": 1.1884131472844013e-05, "loss": 0.1736, "step": 24350 }, { "epoch": 0.4397511176593793, "grad_norm": 1.8306384086608887, "learning_rate": 1.1881345951856031e-05, "loss": 0.32, "step": 24355 }, { "epoch": 0.43984139709228004, "grad_norm": 0.772351086139679, "learning_rate": 1.1878560279527445e-05, "loss": 0.2236, "step": 24360 }, { "epoch": 0.4399316765251807, "grad_norm": 0.4637385904788971, "learning_rate": 1.1875774456082342e-05, "loss": 0.182, "step": 24365 }, { "epoch": 0.44002195595808147, "grad_norm": 0.4528012275695801, "learning_rate": 1.1872988481744822e-05, "loss": 0.2313, "step": 24370 }, { "epoch": 0.44011223539098215, "grad_norm": 0.42925751209259033, "learning_rate": 1.1870202356738996e-05, "loss": 0.3206, "step": 24375 }, { "epoch": 0.4402025148238829, "grad_norm": 0.611354649066925, "learning_rate": 1.186741608128899e-05, "loss": 0.2631, "step": 24380 }, { "epoch": 0.4402927942567836, "grad_norm": 0.2863406240940094, "learning_rate": 1.1864629655618938e-05, "loss": 0.2927, "step": 24385 }, { "epoch": 0.4403830736896843, "grad_norm": 0.5054121613502502, "learning_rate": 1.1861843079952986e-05, "loss": 0.2239, "step": 24390 }, { "epoch": 0.440473353122585, "grad_norm": 0.44482725858688354, "learning_rate": 1.1859056354515292e-05, "loss": 0.2104, "step": 24395 }, { "epoch": 0.44056363255548575, "grad_norm": 0.47482234239578247, "learning_rate": 1.1856269479530036e-05, "loss": 0.3047, "step": 24400 }, { "epoch": 0.44065391198838644, "grad_norm": 0.6331995725631714, "learning_rate": 1.1853482455221395e-05, "loss": 0.1987, "step": 24405 }, { "epoch": 0.4407441914212872, "grad_norm": 0.45549681782722473, "learning_rate": 1.1850695281813565e-05, "loss": 0.2732, "step": 24410 }, { "epoch": 0.44083447085418787, "grad_norm": 0.2735913097858429, "learning_rate": 1.1847907959530755e-05, "loss": 0.2056, "step": 24415 }, { "epoch": 0.4409247502870886, "grad_norm": 0.45871978998184204, "learning_rate": 1.1845120488597189e-05, "loss": 0.2659, "step": 24420 }, { "epoch": 0.4410150297199893, "grad_norm": 0.40871867537498474, "learning_rate": 1.1842332869237094e-05, "loss": 0.2257, "step": 24425 }, { "epoch": 0.44110530915289003, "grad_norm": 0.5864661931991577, "learning_rate": 1.1839545101674712e-05, "loss": 0.2137, "step": 24430 }, { "epoch": 0.4411955885857907, "grad_norm": 0.4320935904979706, "learning_rate": 1.1836757186134302e-05, "loss": 0.2865, "step": 24435 }, { "epoch": 0.44128586801869146, "grad_norm": 0.6176201105117798, "learning_rate": 1.1833969122840134e-05, "loss": 0.3079, "step": 24440 }, { "epoch": 0.44137614745159215, "grad_norm": 0.6133599877357483, "learning_rate": 1.183118091201648e-05, "loss": 0.336, "step": 24445 }, { "epoch": 0.4414664268844929, "grad_norm": 0.3526920974254608, "learning_rate": 1.182839255388764e-05, "loss": 0.255, "step": 24450 }, { "epoch": 0.4415567063173936, "grad_norm": 0.3885346055030823, "learning_rate": 1.1825604048677913e-05, "loss": 0.1731, "step": 24455 }, { "epoch": 0.4416469857502943, "grad_norm": 0.4212827682495117, "learning_rate": 1.182281539661161e-05, "loss": 0.2904, "step": 24460 }, { "epoch": 0.441737265183195, "grad_norm": 0.2621988356113434, "learning_rate": 1.1820026597913061e-05, "loss": 0.2868, "step": 24465 }, { "epoch": 0.44182754461609575, "grad_norm": 0.537579357624054, "learning_rate": 1.1817237652806607e-05, "loss": 0.2877, "step": 24470 }, { "epoch": 0.44191782404899643, "grad_norm": 0.4155954420566559, "learning_rate": 1.1814448561516595e-05, "loss": 0.2729, "step": 24475 }, { "epoch": 0.4420081034818972, "grad_norm": 0.6439969539642334, "learning_rate": 1.181165932426739e-05, "loss": 0.2617, "step": 24480 }, { "epoch": 0.44209838291479786, "grad_norm": 0.5035080313682556, "learning_rate": 1.1808869941283367e-05, "loss": 0.2067, "step": 24485 }, { "epoch": 0.4421886623476986, "grad_norm": 0.5220370888710022, "learning_rate": 1.1806080412788905e-05, "loss": 0.2631, "step": 24490 }, { "epoch": 0.4422789417805993, "grad_norm": 0.3431147336959839, "learning_rate": 1.1803290739008406e-05, "loss": 0.2998, "step": 24495 }, { "epoch": 0.44236922121350003, "grad_norm": 0.411819189786911, "learning_rate": 1.180050092016628e-05, "loss": 0.2614, "step": 24500 }, { "epoch": 0.4424595006464007, "grad_norm": 0.3154963552951813, "learning_rate": 1.1797710956486946e-05, "loss": 0.239, "step": 24505 }, { "epoch": 0.44254978007930146, "grad_norm": 0.39507150650024414, "learning_rate": 1.179492084819483e-05, "loss": 0.2851, "step": 24510 }, { "epoch": 0.44264005951220214, "grad_norm": 0.47061803936958313, "learning_rate": 1.1792130595514388e-05, "loss": 0.2528, "step": 24515 }, { "epoch": 0.4427303389451029, "grad_norm": 0.5809831619262695, "learning_rate": 1.178934019867007e-05, "loss": 0.2567, "step": 24520 }, { "epoch": 0.44282061837800357, "grad_norm": 0.34608837962150574, "learning_rate": 1.1786549657886342e-05, "loss": 0.2107, "step": 24525 }, { "epoch": 0.4429108978109043, "grad_norm": 0.39510759711265564, "learning_rate": 1.1783758973387682e-05, "loss": 0.3024, "step": 24530 }, { "epoch": 0.443001177243805, "grad_norm": 0.4356182813644409, "learning_rate": 1.1780968145398581e-05, "loss": 0.2404, "step": 24535 }, { "epoch": 0.44309145667670574, "grad_norm": 0.5873695611953735, "learning_rate": 1.1778177174143546e-05, "loss": 0.2397, "step": 24540 }, { "epoch": 0.44318173610960643, "grad_norm": 2.7690255641937256, "learning_rate": 1.1775386059847083e-05, "loss": 0.3104, "step": 24545 }, { "epoch": 0.44327201554250717, "grad_norm": 0.33572685718536377, "learning_rate": 1.177259480273372e-05, "loss": 0.2598, "step": 24550 }, { "epoch": 0.44336229497540786, "grad_norm": 0.37144502997398376, "learning_rate": 1.1769803403027992e-05, "loss": 0.2386, "step": 24555 }, { "epoch": 0.4434525744083086, "grad_norm": 0.2875464856624603, "learning_rate": 1.176701186095445e-05, "loss": 0.2305, "step": 24560 }, { "epoch": 0.4435428538412093, "grad_norm": 0.4528631567955017, "learning_rate": 1.1764220176737649e-05, "loss": 0.2791, "step": 24565 }, { "epoch": 0.44363313327411, "grad_norm": 0.6390991806983948, "learning_rate": 1.1761428350602164e-05, "loss": 0.381, "step": 24570 }, { "epoch": 0.44372341270701077, "grad_norm": 0.38894256949424744, "learning_rate": 1.1758636382772572e-05, "loss": 0.2269, "step": 24575 }, { "epoch": 0.44381369213991145, "grad_norm": 0.3657304048538208, "learning_rate": 1.1755844273473473e-05, "loss": 0.275, "step": 24580 }, { "epoch": 0.4439039715728122, "grad_norm": 0.4715575873851776, "learning_rate": 1.1753052022929468e-05, "loss": 0.2281, "step": 24585 }, { "epoch": 0.4439942510057129, "grad_norm": 0.322095662355423, "learning_rate": 1.1750259631365173e-05, "loss": 0.2573, "step": 24590 }, { "epoch": 0.4440845304386136, "grad_norm": 0.304795503616333, "learning_rate": 1.1747467099005214e-05, "loss": 0.3018, "step": 24595 }, { "epoch": 0.4441748098715143, "grad_norm": 0.6157793998718262, "learning_rate": 1.1744674426074234e-05, "loss": 0.2722, "step": 24600 }, { "epoch": 0.44426508930441505, "grad_norm": 0.53531813621521, "learning_rate": 1.1741881612796883e-05, "loss": 0.2626, "step": 24605 }, { "epoch": 0.44435536873731574, "grad_norm": 0.42279717326164246, "learning_rate": 1.1739088659397817e-05, "loss": 0.2222, "step": 24610 }, { "epoch": 0.4444456481702165, "grad_norm": 0.4403485655784607, "learning_rate": 1.1736295566101717e-05, "loss": 0.3001, "step": 24615 }, { "epoch": 0.44453592760311716, "grad_norm": 0.3616626262664795, "learning_rate": 1.1733502333133261e-05, "loss": 0.2753, "step": 24620 }, { "epoch": 0.4446262070360179, "grad_norm": 0.6336793303489685, "learning_rate": 1.1730708960717147e-05, "loss": 0.264, "step": 24625 }, { "epoch": 0.4447164864689186, "grad_norm": 0.38618341088294983, "learning_rate": 1.1727915449078077e-05, "loss": 0.2094, "step": 24630 }, { "epoch": 0.44480676590181933, "grad_norm": 0.45772358775138855, "learning_rate": 1.172512179844078e-05, "loss": 0.2328, "step": 24635 }, { "epoch": 0.44489704533472, "grad_norm": 0.34782519936561584, "learning_rate": 1.1722328009029969e-05, "loss": 0.171, "step": 24640 }, { "epoch": 0.44498732476762076, "grad_norm": 0.37528547644615173, "learning_rate": 1.1719534081070396e-05, "loss": 0.2841, "step": 24645 }, { "epoch": 0.44507760420052145, "grad_norm": 0.3782618045806885, "learning_rate": 1.1716740014786809e-05, "loss": 0.2331, "step": 24650 }, { "epoch": 0.4451678836334222, "grad_norm": 0.5546488761901855, "learning_rate": 1.1713945810403968e-05, "loss": 0.2844, "step": 24655 }, { "epoch": 0.4452581630663229, "grad_norm": 0.375745564699173, "learning_rate": 1.1711151468146648e-05, "loss": 0.2949, "step": 24660 }, { "epoch": 0.4453484424992236, "grad_norm": 0.24210117757320404, "learning_rate": 1.1708356988239636e-05, "loss": 0.1624, "step": 24665 }, { "epoch": 0.4454387219321243, "grad_norm": 0.30786609649658203, "learning_rate": 1.1705562370907724e-05, "loss": 0.2435, "step": 24670 }, { "epoch": 0.44552900136502505, "grad_norm": 0.3296472132205963, "learning_rate": 1.1702767616375717e-05, "loss": 0.2309, "step": 24675 }, { "epoch": 0.44561928079792573, "grad_norm": 0.29616475105285645, "learning_rate": 1.1699972724868438e-05, "loss": 0.246, "step": 24680 }, { "epoch": 0.4457095602308265, "grad_norm": 0.5503436923027039, "learning_rate": 1.1697177696610713e-05, "loss": 0.2321, "step": 24685 }, { "epoch": 0.44579983966372716, "grad_norm": 0.4225671589374542, "learning_rate": 1.1694382531827385e-05, "loss": 0.1839, "step": 24690 }, { "epoch": 0.4458901190966279, "grad_norm": 0.5932523608207703, "learning_rate": 1.1691587230743297e-05, "loss": 0.2267, "step": 24695 }, { "epoch": 0.4459803985295286, "grad_norm": 0.4628986716270447, "learning_rate": 1.1688791793583318e-05, "loss": 0.2749, "step": 24700 }, { "epoch": 0.44607067796242933, "grad_norm": 0.4341941177845001, "learning_rate": 1.1685996220572318e-05, "loss": 0.2234, "step": 24705 }, { "epoch": 0.44616095739533, "grad_norm": 0.3851144015789032, "learning_rate": 1.1683200511935177e-05, "loss": 0.2791, "step": 24710 }, { "epoch": 0.44625123682823076, "grad_norm": 0.8717007637023926, "learning_rate": 1.1680404667896797e-05, "loss": 0.3386, "step": 24715 }, { "epoch": 0.44634151626113144, "grad_norm": 0.46784061193466187, "learning_rate": 1.1677608688682077e-05, "loss": 0.2856, "step": 24720 }, { "epoch": 0.4464317956940322, "grad_norm": 0.22613851726055145, "learning_rate": 1.1674812574515938e-05, "loss": 0.2416, "step": 24725 }, { "epoch": 0.44652207512693287, "grad_norm": 0.30826452374458313, "learning_rate": 1.16720163256233e-05, "loss": 0.2006, "step": 24730 }, { "epoch": 0.4466123545598336, "grad_norm": 0.4111037254333496, "learning_rate": 1.166921994222911e-05, "loss": 0.307, "step": 24735 }, { "epoch": 0.4467026339927343, "grad_norm": 0.3378446698188782, "learning_rate": 1.1666423424558309e-05, "loss": 0.1922, "step": 24740 }, { "epoch": 0.44679291342563504, "grad_norm": 0.5012000203132629, "learning_rate": 1.166362677283586e-05, "loss": 0.2858, "step": 24745 }, { "epoch": 0.4468831928585357, "grad_norm": 0.5589067935943604, "learning_rate": 1.1660829987286734e-05, "loss": 0.2197, "step": 24750 }, { "epoch": 0.44697347229143647, "grad_norm": 0.7075532078742981, "learning_rate": 1.1658033068135912e-05, "loss": 0.4324, "step": 24755 }, { "epoch": 0.44706375172433716, "grad_norm": 0.5003488659858704, "learning_rate": 1.165523601560838e-05, "loss": 0.281, "step": 24760 }, { "epoch": 0.4471540311572379, "grad_norm": 0.3863239288330078, "learning_rate": 1.1652438829929149e-05, "loss": 0.2743, "step": 24765 }, { "epoch": 0.4472443105901386, "grad_norm": 0.1344427764415741, "learning_rate": 1.164964151132323e-05, "loss": 0.2354, "step": 24770 }, { "epoch": 0.4473345900230393, "grad_norm": 0.6510555744171143, "learning_rate": 1.164684406001564e-05, "loss": 0.2332, "step": 24775 }, { "epoch": 0.44742486945594, "grad_norm": 0.45478320121765137, "learning_rate": 1.1644046476231423e-05, "loss": 0.2003, "step": 24780 }, { "epoch": 0.44751514888884075, "grad_norm": 0.5587338805198669, "learning_rate": 1.164124876019562e-05, "loss": 0.2565, "step": 24785 }, { "epoch": 0.44760542832174144, "grad_norm": 0.4031519591808319, "learning_rate": 1.1638450912133288e-05, "loss": 0.2197, "step": 24790 }, { "epoch": 0.4476957077546422, "grad_norm": 0.5361907482147217, "learning_rate": 1.163565293226949e-05, "loss": 0.2296, "step": 24795 }, { "epoch": 0.44778598718754287, "grad_norm": 0.41891780495643616, "learning_rate": 1.163285482082931e-05, "loss": 0.2556, "step": 24800 }, { "epoch": 0.4478762666204436, "grad_norm": 0.4059182107448578, "learning_rate": 1.163005657803783e-05, "loss": 0.3062, "step": 24805 }, { "epoch": 0.4479665460533443, "grad_norm": 0.6129568219184875, "learning_rate": 1.1627258204120146e-05, "loss": 0.3074, "step": 24810 }, { "epoch": 0.44805682548624504, "grad_norm": 0.7538573145866394, "learning_rate": 1.1624459699301375e-05, "loss": 0.1785, "step": 24815 }, { "epoch": 0.4481471049191457, "grad_norm": 0.40244433283805847, "learning_rate": 1.1621661063806635e-05, "loss": 0.2526, "step": 24820 }, { "epoch": 0.44823738435204646, "grad_norm": 0.6283896565437317, "learning_rate": 1.1618862297861046e-05, "loss": 0.2274, "step": 24825 }, { "epoch": 0.44832766378494715, "grad_norm": 0.34380289912223816, "learning_rate": 1.161606340168976e-05, "loss": 0.2386, "step": 24830 }, { "epoch": 0.4484179432178479, "grad_norm": 0.5499282479286194, "learning_rate": 1.1613264375517923e-05, "loss": 0.2102, "step": 24835 }, { "epoch": 0.4485082226507486, "grad_norm": 0.23248335719108582, "learning_rate": 1.1610465219570695e-05, "loss": 0.2723, "step": 24840 }, { "epoch": 0.4485985020836493, "grad_norm": 0.347027063369751, "learning_rate": 1.1607665934073248e-05, "loss": 0.2351, "step": 24845 }, { "epoch": 0.44868878151655, "grad_norm": 0.5538097620010376, "learning_rate": 1.1604866519250768e-05, "loss": 0.3567, "step": 24850 }, { "epoch": 0.44877906094945075, "grad_norm": 0.292410284280777, "learning_rate": 1.1602066975328446e-05, "loss": 0.2196, "step": 24855 }, { "epoch": 0.44886934038235143, "grad_norm": 0.7871230840682983, "learning_rate": 1.159926730253148e-05, "loss": 0.2483, "step": 24860 }, { "epoch": 0.4489596198152522, "grad_norm": 0.3869844675064087, "learning_rate": 1.159646750108509e-05, "loss": 0.2416, "step": 24865 }, { "epoch": 0.44904989924815286, "grad_norm": 0.5626687407493591, "learning_rate": 1.1593667571214498e-05, "loss": 0.2799, "step": 24870 }, { "epoch": 0.4491401786810536, "grad_norm": 0.6964197158813477, "learning_rate": 1.1590867513144934e-05, "loss": 0.1481, "step": 24875 }, { "epoch": 0.4492304581139543, "grad_norm": 0.3524242341518402, "learning_rate": 1.1588067327101649e-05, "loss": 0.2354, "step": 24880 }, { "epoch": 0.44932073754685503, "grad_norm": 0.5947927832603455, "learning_rate": 1.1585267013309892e-05, "loss": 0.2916, "step": 24885 }, { "epoch": 0.4494110169797557, "grad_norm": 0.5788018107414246, "learning_rate": 1.1582466571994925e-05, "loss": 0.2421, "step": 24890 }, { "epoch": 0.44950129641265646, "grad_norm": 0.662961483001709, "learning_rate": 1.1579666003382035e-05, "loss": 0.258, "step": 24895 }, { "epoch": 0.44959157584555715, "grad_norm": 0.44949132204055786, "learning_rate": 1.1576865307696499e-05, "loss": 0.2572, "step": 24900 }, { "epoch": 0.4496818552784579, "grad_norm": 1.094267725944519, "learning_rate": 1.1574064485163613e-05, "loss": 0.3229, "step": 24905 }, { "epoch": 0.4497721347113586, "grad_norm": 0.39188432693481445, "learning_rate": 1.157126353600868e-05, "loss": 0.2184, "step": 24910 }, { "epoch": 0.4498624141442593, "grad_norm": 0.34874260425567627, "learning_rate": 1.1568462460457028e-05, "loss": 0.2069, "step": 24915 }, { "epoch": 0.44995269357716, "grad_norm": 0.6210698485374451, "learning_rate": 1.156566125873397e-05, "loss": 0.3669, "step": 24920 }, { "epoch": 0.45004297301006074, "grad_norm": 0.4312707781791687, "learning_rate": 1.1562859931064845e-05, "loss": 0.1418, "step": 24925 }, { "epoch": 0.45013325244296143, "grad_norm": 0.3792107403278351, "learning_rate": 1.1560058477675004e-05, "loss": 0.3266, "step": 24930 }, { "epoch": 0.45022353187586217, "grad_norm": 0.37607336044311523, "learning_rate": 1.1557256898789801e-05, "loss": 0.2038, "step": 24935 }, { "epoch": 0.45031381130876286, "grad_norm": 0.49079403281211853, "learning_rate": 1.1554455194634601e-05, "loss": 0.2078, "step": 24940 }, { "epoch": 0.4504040907416636, "grad_norm": 0.3289194703102112, "learning_rate": 1.1551653365434787e-05, "loss": 0.2301, "step": 24945 }, { "epoch": 0.4504943701745643, "grad_norm": 0.4723817706108093, "learning_rate": 1.1548851411415739e-05, "loss": 0.1823, "step": 24950 }, { "epoch": 0.450584649607465, "grad_norm": 0.3931238055229187, "learning_rate": 1.1546049332802855e-05, "loss": 0.2415, "step": 24955 }, { "epoch": 0.4506749290403657, "grad_norm": 0.2613414525985718, "learning_rate": 1.1543247129821545e-05, "loss": 0.2462, "step": 24960 }, { "epoch": 0.45076520847326645, "grad_norm": 0.5624940991401672, "learning_rate": 1.1540444802697221e-05, "loss": 0.3012, "step": 24965 }, { "epoch": 0.4508554879061672, "grad_norm": 0.35730329155921936, "learning_rate": 1.1537642351655316e-05, "loss": 0.2074, "step": 24970 }, { "epoch": 0.4509457673390679, "grad_norm": 0.5674639940261841, "learning_rate": 1.153483977692126e-05, "loss": 0.3158, "step": 24975 }, { "epoch": 0.4510360467719686, "grad_norm": 0.676696240901947, "learning_rate": 1.1532037078720508e-05, "loss": 0.1583, "step": 24980 }, { "epoch": 0.4511263262048693, "grad_norm": 0.5902599096298218, "learning_rate": 1.152923425727851e-05, "loss": 0.2561, "step": 24985 }, { "epoch": 0.45121660563777005, "grad_norm": 0.6040018200874329, "learning_rate": 1.1526431312820731e-05, "loss": 0.2519, "step": 24990 }, { "epoch": 0.45130688507067074, "grad_norm": 0.3789654076099396, "learning_rate": 1.1523628245572655e-05, "loss": 0.2538, "step": 24995 }, { "epoch": 0.4513971645035715, "grad_norm": 0.5607237219810486, "learning_rate": 1.1520825055759765e-05, "loss": 0.4231, "step": 25000 }, { "epoch": 0.45148744393647217, "grad_norm": 0.45001712441444397, "learning_rate": 1.1518021743607558e-05, "loss": 0.2767, "step": 25005 }, { "epoch": 0.4515777233693729, "grad_norm": 0.3102862536907196, "learning_rate": 1.1515218309341535e-05, "loss": 0.2226, "step": 25010 }, { "epoch": 0.4516680028022736, "grad_norm": 0.5020765662193298, "learning_rate": 1.1512414753187218e-05, "loss": 0.2048, "step": 25015 }, { "epoch": 0.45175828223517434, "grad_norm": 0.4714259207248688, "learning_rate": 1.1509611075370131e-05, "loss": 0.2712, "step": 25020 }, { "epoch": 0.451848561668075, "grad_norm": 0.9056620001792908, "learning_rate": 1.1506807276115806e-05, "loss": 0.3684, "step": 25025 }, { "epoch": 0.45193884110097576, "grad_norm": 0.6726963520050049, "learning_rate": 1.1504003355649794e-05, "loss": 0.2887, "step": 25030 }, { "epoch": 0.45202912053387645, "grad_norm": 0.5326406359672546, "learning_rate": 1.1501199314197644e-05, "loss": 0.2195, "step": 25035 }, { "epoch": 0.4521193999667772, "grad_norm": 0.5724204182624817, "learning_rate": 1.1498395151984925e-05, "loss": 0.3536, "step": 25040 }, { "epoch": 0.4522096793996779, "grad_norm": 0.4841223657131195, "learning_rate": 1.1495590869237213e-05, "loss": 0.1531, "step": 25045 }, { "epoch": 0.4522999588325786, "grad_norm": 0.3502753674983978, "learning_rate": 1.1492786466180086e-05, "loss": 0.2277, "step": 25050 }, { "epoch": 0.4523902382654793, "grad_norm": 0.4281364381313324, "learning_rate": 1.1489981943039141e-05, "loss": 0.2408, "step": 25055 }, { "epoch": 0.45248051769838005, "grad_norm": 0.5470902323722839, "learning_rate": 1.1487177300039984e-05, "loss": 0.2491, "step": 25060 }, { "epoch": 0.45257079713128073, "grad_norm": 0.4884530305862427, "learning_rate": 1.1484372537408229e-05, "loss": 0.1792, "step": 25065 }, { "epoch": 0.4526610765641815, "grad_norm": 0.481823205947876, "learning_rate": 1.1481567655369491e-05, "loss": 0.1847, "step": 25070 }, { "epoch": 0.45275135599708216, "grad_norm": 0.41728538274765015, "learning_rate": 1.1478762654149409e-05, "loss": 0.2356, "step": 25075 }, { "epoch": 0.4528416354299829, "grad_norm": 0.5025848746299744, "learning_rate": 1.1475957533973623e-05, "loss": 0.2256, "step": 25080 }, { "epoch": 0.4529319148628836, "grad_norm": 0.5028762817382812, "learning_rate": 1.1473152295067787e-05, "loss": 0.2719, "step": 25085 }, { "epoch": 0.45302219429578433, "grad_norm": 0.5323634147644043, "learning_rate": 1.1470346937657555e-05, "loss": 0.2936, "step": 25090 }, { "epoch": 0.453112473728685, "grad_norm": 0.4943752586841583, "learning_rate": 1.1467541461968607e-05, "loss": 0.2228, "step": 25095 }, { "epoch": 0.45320275316158576, "grad_norm": 0.6112527251243591, "learning_rate": 1.1464735868226618e-05, "loss": 0.2062, "step": 25100 }, { "epoch": 0.45329303259448644, "grad_norm": 0.4090018570423126, "learning_rate": 1.1461930156657273e-05, "loss": 0.3034, "step": 25105 }, { "epoch": 0.4533833120273872, "grad_norm": 0.6104006767272949, "learning_rate": 1.1459124327486279e-05, "loss": 0.2675, "step": 25110 }, { "epoch": 0.4534735914602879, "grad_norm": 0.33331215381622314, "learning_rate": 1.1456318380939344e-05, "loss": 0.2653, "step": 25115 }, { "epoch": 0.4535638708931886, "grad_norm": 0.38586562871932983, "learning_rate": 1.145351231724218e-05, "loss": 0.274, "step": 25120 }, { "epoch": 0.4536541503260893, "grad_norm": 0.41210177540779114, "learning_rate": 1.1450706136620523e-05, "loss": 0.2445, "step": 25125 }, { "epoch": 0.45374442975899004, "grad_norm": 0.35308703780174255, "learning_rate": 1.14478998393001e-05, "loss": 0.1937, "step": 25130 }, { "epoch": 0.45383470919189073, "grad_norm": 0.42387494444847107, "learning_rate": 1.1445093425506666e-05, "loss": 0.3267, "step": 25135 }, { "epoch": 0.45392498862479147, "grad_norm": 0.4690084755420685, "learning_rate": 1.1442286895465968e-05, "loss": 0.2214, "step": 25140 }, { "epoch": 0.45401526805769216, "grad_norm": 0.6119925379753113, "learning_rate": 1.143948024940378e-05, "loss": 0.2009, "step": 25145 }, { "epoch": 0.4541055474905929, "grad_norm": 0.2987114489078522, "learning_rate": 1.143667348754587e-05, "loss": 0.1875, "step": 25150 }, { "epoch": 0.4541958269234936, "grad_norm": 0.6101985573768616, "learning_rate": 1.1433866610118023e-05, "loss": 0.2181, "step": 25155 }, { "epoch": 0.4542861063563943, "grad_norm": 0.7137722373008728, "learning_rate": 1.1431059617346034e-05, "loss": 0.2883, "step": 25160 }, { "epoch": 0.454376385789295, "grad_norm": 0.2992022931575775, "learning_rate": 1.1428252509455704e-05, "loss": 0.2287, "step": 25165 }, { "epoch": 0.45446666522219575, "grad_norm": 0.3725275993347168, "learning_rate": 1.1425445286672843e-05, "loss": 0.2557, "step": 25170 }, { "epoch": 0.45455694465509644, "grad_norm": 0.534310519695282, "learning_rate": 1.1422637949223272e-05, "loss": 0.1826, "step": 25175 }, { "epoch": 0.4546472240879972, "grad_norm": 0.31912413239479065, "learning_rate": 1.1419830497332822e-05, "loss": 0.2428, "step": 25180 }, { "epoch": 0.45473750352089787, "grad_norm": 0.5752959251403809, "learning_rate": 1.1417022931227332e-05, "loss": 0.2639, "step": 25185 }, { "epoch": 0.4548277829537986, "grad_norm": 0.352507084608078, "learning_rate": 1.1414215251132652e-05, "loss": 0.3151, "step": 25190 }, { "epoch": 0.4549180623866993, "grad_norm": 0.6433412432670593, "learning_rate": 1.1411407457274634e-05, "loss": 0.2357, "step": 25195 }, { "epoch": 0.45500834181960004, "grad_norm": 0.5227245688438416, "learning_rate": 1.1408599549879151e-05, "loss": 0.2158, "step": 25200 }, { "epoch": 0.4550986212525007, "grad_norm": 0.8377803564071655, "learning_rate": 1.1405791529172072e-05, "loss": 0.2124, "step": 25205 }, { "epoch": 0.45518890068540147, "grad_norm": 0.5074353218078613, "learning_rate": 1.140298339537929e-05, "loss": 0.1699, "step": 25210 }, { "epoch": 0.45527918011830215, "grad_norm": 0.3799591064453125, "learning_rate": 1.1400175148726693e-05, "loss": 0.3373, "step": 25215 }, { "epoch": 0.4553694595512029, "grad_norm": 0.6017751097679138, "learning_rate": 1.1397366789440183e-05, "loss": 0.2376, "step": 25220 }, { "epoch": 0.4554597389841036, "grad_norm": 0.5219336748123169, "learning_rate": 1.1394558317745677e-05, "loss": 0.2721, "step": 25225 }, { "epoch": 0.4555500184170043, "grad_norm": 0.578639030456543, "learning_rate": 1.1391749733869094e-05, "loss": 0.2706, "step": 25230 }, { "epoch": 0.455640297849905, "grad_norm": 0.4614681601524353, "learning_rate": 1.1388941038036363e-05, "loss": 0.2798, "step": 25235 }, { "epoch": 0.45573057728280575, "grad_norm": 0.2860408425331116, "learning_rate": 1.1386132230473421e-05, "loss": 0.2544, "step": 25240 }, { "epoch": 0.45582085671570644, "grad_norm": 0.3257483243942261, "learning_rate": 1.1383323311406223e-05, "loss": 0.2625, "step": 25245 }, { "epoch": 0.4559111361486072, "grad_norm": 0.29570409655570984, "learning_rate": 1.1380514281060723e-05, "loss": 0.2186, "step": 25250 }, { "epoch": 0.45600141558150786, "grad_norm": 0.5505267381668091, "learning_rate": 1.1377705139662882e-05, "loss": 0.2637, "step": 25255 }, { "epoch": 0.4560916950144086, "grad_norm": 0.4645271897315979, "learning_rate": 1.1374895887438684e-05, "loss": 0.2102, "step": 25260 }, { "epoch": 0.4561819744473093, "grad_norm": 0.4300084710121155, "learning_rate": 1.1372086524614107e-05, "loss": 0.2516, "step": 25265 }, { "epoch": 0.45627225388021003, "grad_norm": 0.46449944376945496, "learning_rate": 1.1369277051415146e-05, "loss": 0.2904, "step": 25270 }, { "epoch": 0.4563625333131107, "grad_norm": 0.4134475588798523, "learning_rate": 1.13664674680678e-05, "loss": 0.2882, "step": 25275 }, { "epoch": 0.45645281274601146, "grad_norm": 0.4197487533092499, "learning_rate": 1.1363657774798084e-05, "loss": 0.1875, "step": 25280 }, { "epoch": 0.45654309217891215, "grad_norm": 0.34673747420310974, "learning_rate": 1.1360847971832012e-05, "loss": 0.2741, "step": 25285 }, { "epoch": 0.4566333716118129, "grad_norm": 0.35914045572280884, "learning_rate": 1.1358038059395615e-05, "loss": 0.2237, "step": 25290 }, { "epoch": 0.4567236510447136, "grad_norm": 0.9550446271896362, "learning_rate": 1.1355228037714932e-05, "loss": 0.2301, "step": 25295 }, { "epoch": 0.4568139304776143, "grad_norm": 0.716317892074585, "learning_rate": 1.1352417907016007e-05, "loss": 0.2322, "step": 25300 }, { "epoch": 0.456904209910515, "grad_norm": 0.45155155658721924, "learning_rate": 1.1349607667524892e-05, "loss": 0.2183, "step": 25305 }, { "epoch": 0.45699448934341574, "grad_norm": 0.392500102519989, "learning_rate": 1.1346797319467652e-05, "loss": 0.3804, "step": 25310 }, { "epoch": 0.45708476877631643, "grad_norm": 0.6335352063179016, "learning_rate": 1.1343986863070363e-05, "loss": 0.2859, "step": 25315 }, { "epoch": 0.45717504820921717, "grad_norm": 0.3605762720108032, "learning_rate": 1.13411762985591e-05, "loss": 0.2202, "step": 25320 }, { "epoch": 0.45726532764211786, "grad_norm": 0.37601563334465027, "learning_rate": 1.1338365626159957e-05, "loss": 0.2028, "step": 25325 }, { "epoch": 0.4573556070750186, "grad_norm": 0.3913663327693939, "learning_rate": 1.1335554846099029e-05, "loss": 0.2659, "step": 25330 }, { "epoch": 0.4574458865079193, "grad_norm": 0.3613426983356476, "learning_rate": 1.1332743958602426e-05, "loss": 0.2835, "step": 25335 }, { "epoch": 0.45753616594082, "grad_norm": 0.40373799204826355, "learning_rate": 1.1329932963896257e-05, "loss": 0.2213, "step": 25340 }, { "epoch": 0.4576264453737207, "grad_norm": 0.5222960114479065, "learning_rate": 1.1327121862206656e-05, "loss": 0.1844, "step": 25345 }, { "epoch": 0.45771672480662146, "grad_norm": 0.48120495676994324, "learning_rate": 1.1324310653759748e-05, "loss": 0.2273, "step": 25350 }, { "epoch": 0.45780700423952214, "grad_norm": 0.5859454274177551, "learning_rate": 1.1321499338781675e-05, "loss": 0.2183, "step": 25355 }, { "epoch": 0.4578972836724229, "grad_norm": 0.4917299449443817, "learning_rate": 1.131868791749859e-05, "loss": 0.2939, "step": 25360 }, { "epoch": 0.4579875631053236, "grad_norm": 0.6798126697540283, "learning_rate": 1.131587639013665e-05, "loss": 0.3164, "step": 25365 }, { "epoch": 0.4580778425382243, "grad_norm": 0.6465598940849304, "learning_rate": 1.1313064756922023e-05, "loss": 0.2533, "step": 25370 }, { "epoch": 0.45816812197112505, "grad_norm": 0.25099602341651917, "learning_rate": 1.1310253018080881e-05, "loss": 0.2755, "step": 25375 }, { "epoch": 0.45825840140402574, "grad_norm": 0.823212742805481, "learning_rate": 1.1307441173839413e-05, "loss": 0.2083, "step": 25380 }, { "epoch": 0.4583486808369265, "grad_norm": 0.43957242369651794, "learning_rate": 1.1304629224423805e-05, "loss": 0.2222, "step": 25385 }, { "epoch": 0.45843896026982717, "grad_norm": 0.3509773910045624, "learning_rate": 1.1301817170060262e-05, "loss": 0.2608, "step": 25390 }, { "epoch": 0.4585292397027279, "grad_norm": 0.43266743421554565, "learning_rate": 1.1299005010974994e-05, "loss": 0.247, "step": 25395 }, { "epoch": 0.4586195191356286, "grad_norm": 0.3913414776325226, "learning_rate": 1.1296192747394219e-05, "loss": 0.1879, "step": 25400 }, { "epoch": 0.45870979856852934, "grad_norm": 0.3474053144454956, "learning_rate": 1.1293380379544159e-05, "loss": 0.2345, "step": 25405 }, { "epoch": 0.45880007800143, "grad_norm": 0.5937274098396301, "learning_rate": 1.1290567907651053e-05, "loss": 0.2455, "step": 25410 }, { "epoch": 0.45889035743433076, "grad_norm": 0.5838733911514282, "learning_rate": 1.1287755331941142e-05, "loss": 0.1668, "step": 25415 }, { "epoch": 0.45898063686723145, "grad_norm": 0.4663761258125305, "learning_rate": 1.1284942652640674e-05, "loss": 0.2531, "step": 25420 }, { "epoch": 0.4590709163001322, "grad_norm": 0.7597901225090027, "learning_rate": 1.1282129869975915e-05, "loss": 0.3999, "step": 25425 }, { "epoch": 0.4591611957330329, "grad_norm": 0.694631814956665, "learning_rate": 1.1279316984173132e-05, "loss": 0.2582, "step": 25430 }, { "epoch": 0.4592514751659336, "grad_norm": 1.2105693817138672, "learning_rate": 1.1276503995458596e-05, "loss": 0.3124, "step": 25435 }, { "epoch": 0.4593417545988343, "grad_norm": 0.5668565630912781, "learning_rate": 1.1273690904058595e-05, "loss": 0.2417, "step": 25440 }, { "epoch": 0.45943203403173505, "grad_norm": 0.5524459481239319, "learning_rate": 1.1270877710199421e-05, "loss": 0.238, "step": 25445 }, { "epoch": 0.45952231346463573, "grad_norm": 0.5903627872467041, "learning_rate": 1.1268064414107379e-05, "loss": 0.2152, "step": 25450 }, { "epoch": 0.4596125928975365, "grad_norm": 0.7859571576118469, "learning_rate": 1.126525101600877e-05, "loss": 0.3142, "step": 25455 }, { "epoch": 0.45970287233043716, "grad_norm": 0.3708322048187256, "learning_rate": 1.1262437516129918e-05, "loss": 0.3153, "step": 25460 }, { "epoch": 0.4597931517633379, "grad_norm": 0.5059872269630432, "learning_rate": 1.1259623914697149e-05, "loss": 0.2025, "step": 25465 }, { "epoch": 0.4598834311962386, "grad_norm": 0.365987092256546, "learning_rate": 1.125681021193679e-05, "loss": 0.226, "step": 25470 }, { "epoch": 0.45997371062913933, "grad_norm": 0.37558290362358093, "learning_rate": 1.1253996408075192e-05, "loss": 0.1914, "step": 25475 }, { "epoch": 0.46006399006204, "grad_norm": 0.45481544733047485, "learning_rate": 1.1251182503338698e-05, "loss": 0.2769, "step": 25480 }, { "epoch": 0.46015426949494076, "grad_norm": 0.6855883598327637, "learning_rate": 1.1248368497953666e-05, "loss": 0.322, "step": 25485 }, { "epoch": 0.46024454892784145, "grad_norm": 0.3675699830055237, "learning_rate": 1.1245554392146467e-05, "loss": 0.316, "step": 25490 }, { "epoch": 0.4603348283607422, "grad_norm": 0.46729376912117004, "learning_rate": 1.1242740186143475e-05, "loss": 0.275, "step": 25495 }, { "epoch": 0.4604251077936429, "grad_norm": 0.3551467955112457, "learning_rate": 1.123992588017107e-05, "loss": 0.3124, "step": 25500 }, { "epoch": 0.4605153872265436, "grad_norm": 0.5993281006813049, "learning_rate": 1.123711147445564e-05, "loss": 0.3097, "step": 25505 }, { "epoch": 0.4606056666594443, "grad_norm": 0.322431743144989, "learning_rate": 1.123429696922359e-05, "loss": 0.2159, "step": 25510 }, { "epoch": 0.46069594609234504, "grad_norm": 0.5621485114097595, "learning_rate": 1.1231482364701323e-05, "loss": 0.3048, "step": 25515 }, { "epoch": 0.46078622552524573, "grad_norm": 0.34407296776771545, "learning_rate": 1.1228667661115249e-05, "loss": 0.2101, "step": 25520 }, { "epoch": 0.46087650495814647, "grad_norm": 0.48472723364830017, "learning_rate": 1.12258528586918e-05, "loss": 0.1564, "step": 25525 }, { "epoch": 0.46096678439104716, "grad_norm": 0.41901537775993347, "learning_rate": 1.1223037957657399e-05, "loss": 0.1618, "step": 25530 }, { "epoch": 0.4610570638239479, "grad_norm": 0.3657739460468292, "learning_rate": 1.122022295823849e-05, "loss": 0.208, "step": 25535 }, { "epoch": 0.4611473432568486, "grad_norm": 0.443582683801651, "learning_rate": 1.1217407860661509e-05, "loss": 0.2967, "step": 25540 }, { "epoch": 0.4612376226897493, "grad_norm": 0.8349863290786743, "learning_rate": 1.1214592665152924e-05, "loss": 0.2257, "step": 25545 }, { "epoch": 0.46132790212265, "grad_norm": 0.5645694732666016, "learning_rate": 1.1211777371939186e-05, "loss": 0.2244, "step": 25550 }, { "epoch": 0.46141818155555075, "grad_norm": 0.254520982503891, "learning_rate": 1.1208961981246768e-05, "loss": 0.1787, "step": 25555 }, { "epoch": 0.46150846098845144, "grad_norm": 0.3871762752532959, "learning_rate": 1.1206146493302155e-05, "loss": 0.321, "step": 25560 }, { "epoch": 0.4615987404213522, "grad_norm": 0.39559558033943176, "learning_rate": 1.1203330908331822e-05, "loss": 0.2453, "step": 25565 }, { "epoch": 0.46168901985425287, "grad_norm": 0.3969613015651703, "learning_rate": 1.1200515226562264e-05, "loss": 0.2542, "step": 25570 }, { "epoch": 0.4617792992871536, "grad_norm": 0.4680093228816986, "learning_rate": 1.1197699448219988e-05, "loss": 0.3573, "step": 25575 }, { "epoch": 0.4618695787200543, "grad_norm": 0.38695165514945984, "learning_rate": 1.11948835735315e-05, "loss": 0.2162, "step": 25580 }, { "epoch": 0.46195985815295504, "grad_norm": 0.4600764513015747, "learning_rate": 1.1192067602723311e-05, "loss": 0.2679, "step": 25585 }, { "epoch": 0.4620501375858557, "grad_norm": 0.5053609013557434, "learning_rate": 1.1189251536021955e-05, "loss": 0.3082, "step": 25590 }, { "epoch": 0.46214041701875647, "grad_norm": 0.5011573433876038, "learning_rate": 1.1186435373653959e-05, "loss": 0.2691, "step": 25595 }, { "epoch": 0.46223069645165715, "grad_norm": 0.42619091272354126, "learning_rate": 1.1183619115845863e-05, "loss": 0.1645, "step": 25600 }, { "epoch": 0.4623209758845579, "grad_norm": 0.8814185857772827, "learning_rate": 1.1180802762824215e-05, "loss": 0.2909, "step": 25605 }, { "epoch": 0.4624112553174586, "grad_norm": 0.4841145873069763, "learning_rate": 1.117798631481557e-05, "loss": 0.2913, "step": 25610 }, { "epoch": 0.4625015347503593, "grad_norm": 0.4144599437713623, "learning_rate": 1.117516977204649e-05, "loss": 0.2421, "step": 25615 }, { "epoch": 0.46259181418326, "grad_norm": 0.4766666293144226, "learning_rate": 1.1172353134743546e-05, "loss": 0.2619, "step": 25620 }, { "epoch": 0.46268209361616075, "grad_norm": 0.4696897566318512, "learning_rate": 1.1169536403133319e-05, "loss": 0.3143, "step": 25625 }, { "epoch": 0.46277237304906144, "grad_norm": 0.8463373184204102, "learning_rate": 1.1166719577442389e-05, "loss": 0.2457, "step": 25630 }, { "epoch": 0.4628626524819622, "grad_norm": 0.3645748496055603, "learning_rate": 1.1163902657897355e-05, "loss": 0.2786, "step": 25635 }, { "epoch": 0.46295293191486286, "grad_norm": 0.3574855625629425, "learning_rate": 1.116108564472481e-05, "loss": 0.2294, "step": 25640 }, { "epoch": 0.4630432113477636, "grad_norm": 0.3958921730518341, "learning_rate": 1.1158268538151369e-05, "loss": 0.2407, "step": 25645 }, { "epoch": 0.4631334907806643, "grad_norm": 0.4588448405265808, "learning_rate": 1.1155451338403645e-05, "loss": 0.2109, "step": 25650 }, { "epoch": 0.46322377021356503, "grad_norm": 0.5760933756828308, "learning_rate": 1.1152634045708263e-05, "loss": 0.1958, "step": 25655 }, { "epoch": 0.4633140496464657, "grad_norm": 0.5492554903030396, "learning_rate": 1.1149816660291852e-05, "loss": 0.2414, "step": 25660 }, { "epoch": 0.46340432907936646, "grad_norm": 0.5135606527328491, "learning_rate": 1.1146999182381051e-05, "loss": 0.2112, "step": 25665 }, { "epoch": 0.46349460851226715, "grad_norm": 0.2507036328315735, "learning_rate": 1.1144181612202502e-05, "loss": 0.2593, "step": 25670 }, { "epoch": 0.4635848879451679, "grad_norm": 0.672838568687439, "learning_rate": 1.1141363949982867e-05, "loss": 0.18, "step": 25675 }, { "epoch": 0.4636751673780686, "grad_norm": 0.29975202679634094, "learning_rate": 1.1138546195948798e-05, "loss": 0.3004, "step": 25680 }, { "epoch": 0.4637654468109693, "grad_norm": 0.44783079624176025, "learning_rate": 1.1135728350326965e-05, "loss": 0.2838, "step": 25685 }, { "epoch": 0.46385572624387, "grad_norm": 0.5177004933357239, "learning_rate": 1.1132910413344042e-05, "loss": 0.296, "step": 25690 }, { "epoch": 0.46394600567677075, "grad_norm": 0.47807779908180237, "learning_rate": 1.1130092385226717e-05, "loss": 0.2883, "step": 25695 }, { "epoch": 0.46403628510967143, "grad_norm": 0.3265976011753082, "learning_rate": 1.1127274266201676e-05, "loss": 0.2491, "step": 25700 }, { "epoch": 0.4641265645425722, "grad_norm": 0.5433142781257629, "learning_rate": 1.1124456056495611e-05, "loss": 0.2103, "step": 25705 }, { "epoch": 0.46421684397547286, "grad_norm": 0.36991173028945923, "learning_rate": 1.1121637756335238e-05, "loss": 0.2209, "step": 25710 }, { "epoch": 0.4643071234083736, "grad_norm": 0.6328502893447876, "learning_rate": 1.111881936594726e-05, "loss": 0.3442, "step": 25715 }, { "epoch": 0.4643974028412743, "grad_norm": 0.36517852544784546, "learning_rate": 1.1116000885558398e-05, "loss": 0.1797, "step": 25720 }, { "epoch": 0.46448768227417503, "grad_norm": 0.4057893455028534, "learning_rate": 1.1113182315395379e-05, "loss": 0.3115, "step": 25725 }, { "epoch": 0.4645779617070757, "grad_norm": 0.6015501618385315, "learning_rate": 1.111036365568494e-05, "loss": 0.3159, "step": 25730 }, { "epoch": 0.46466824113997646, "grad_norm": 0.3591579496860504, "learning_rate": 1.1107544906653811e-05, "loss": 0.2598, "step": 25735 }, { "epoch": 0.46475852057287714, "grad_norm": 0.5037890672683716, "learning_rate": 1.1104726068528755e-05, "loss": 0.2334, "step": 25740 }, { "epoch": 0.4648488000057779, "grad_norm": 0.31298428773880005, "learning_rate": 1.1101907141536514e-05, "loss": 0.2427, "step": 25745 }, { "epoch": 0.46493907943867857, "grad_norm": 0.2975746989250183, "learning_rate": 1.1099088125903855e-05, "loss": 0.298, "step": 25750 }, { "epoch": 0.4650293588715793, "grad_norm": 0.4254186749458313, "learning_rate": 1.109626902185755e-05, "loss": 0.2685, "step": 25755 }, { "epoch": 0.46511963830448005, "grad_norm": 0.4576852023601532, "learning_rate": 1.1093449829624372e-05, "loss": 0.2352, "step": 25760 }, { "epoch": 0.46520991773738074, "grad_norm": 0.2610947787761688, "learning_rate": 1.1090630549431105e-05, "loss": 0.1378, "step": 25765 }, { "epoch": 0.4653001971702815, "grad_norm": 0.35531389713287354, "learning_rate": 1.1087811181504538e-05, "loss": 0.1956, "step": 25770 }, { "epoch": 0.46539047660318217, "grad_norm": 0.52121901512146, "learning_rate": 1.1084991726071472e-05, "loss": 0.293, "step": 25775 }, { "epoch": 0.4654807560360829, "grad_norm": 0.5938030481338501, "learning_rate": 1.1082172183358713e-05, "loss": 0.2746, "step": 25780 }, { "epoch": 0.4655710354689836, "grad_norm": 0.5315929055213928, "learning_rate": 1.1079352553593067e-05, "loss": 0.3005, "step": 25785 }, { "epoch": 0.46566131490188434, "grad_norm": 0.40332263708114624, "learning_rate": 1.1076532837001355e-05, "loss": 0.2105, "step": 25790 }, { "epoch": 0.465751594334785, "grad_norm": 0.4936860203742981, "learning_rate": 1.1073713033810405e-05, "loss": 0.2534, "step": 25795 }, { "epoch": 0.46584187376768577, "grad_norm": 0.5080345869064331, "learning_rate": 1.1070893144247046e-05, "loss": 0.2169, "step": 25800 }, { "epoch": 0.46593215320058645, "grad_norm": 0.46547484397888184, "learning_rate": 1.1068073168538123e-05, "loss": 0.2345, "step": 25805 }, { "epoch": 0.4660224326334872, "grad_norm": 0.4039542078971863, "learning_rate": 1.1065253106910476e-05, "loss": 0.2727, "step": 25810 }, { "epoch": 0.4661127120663879, "grad_norm": 0.5644627213478088, "learning_rate": 1.1062432959590965e-05, "loss": 0.2923, "step": 25815 }, { "epoch": 0.4662029914992886, "grad_norm": 0.3832181692123413, "learning_rate": 1.1059612726806444e-05, "loss": 0.2747, "step": 25820 }, { "epoch": 0.4662932709321893, "grad_norm": 0.24436330795288086, "learning_rate": 1.1056792408783786e-05, "loss": 0.2788, "step": 25825 }, { "epoch": 0.46638355036509005, "grad_norm": 0.39775246381759644, "learning_rate": 1.1053972005749861e-05, "loss": 0.1851, "step": 25830 }, { "epoch": 0.46647382979799074, "grad_norm": 0.7041085958480835, "learning_rate": 1.1051151517931551e-05, "loss": 0.32, "step": 25835 }, { "epoch": 0.4665641092308915, "grad_norm": 0.49672818183898926, "learning_rate": 1.1048330945555745e-05, "loss": 0.2239, "step": 25840 }, { "epoch": 0.46665438866379216, "grad_norm": 0.5358633399009705, "learning_rate": 1.1045510288849336e-05, "loss": 0.3883, "step": 25845 }, { "epoch": 0.4667446680966929, "grad_norm": 0.41424885392189026, "learning_rate": 1.1042689548039224e-05, "loss": 0.2624, "step": 25850 }, { "epoch": 0.4668349475295936, "grad_norm": 0.5248550772666931, "learning_rate": 1.103986872335232e-05, "loss": 0.3169, "step": 25855 }, { "epoch": 0.46692522696249433, "grad_norm": 0.4089259207248688, "learning_rate": 1.103704781501554e-05, "loss": 0.3263, "step": 25860 }, { "epoch": 0.467015506395395, "grad_norm": 0.6299268007278442, "learning_rate": 1.1034226823255802e-05, "loss": 0.252, "step": 25865 }, { "epoch": 0.46710578582829576, "grad_norm": 0.5844053030014038, "learning_rate": 1.1031405748300035e-05, "loss": 0.3051, "step": 25870 }, { "epoch": 0.46719606526119645, "grad_norm": 0.472973108291626, "learning_rate": 1.1028584590375175e-05, "loss": 0.2391, "step": 25875 }, { "epoch": 0.4672863446940972, "grad_norm": 0.5440942049026489, "learning_rate": 1.1025763349708163e-05, "loss": 0.3175, "step": 25880 }, { "epoch": 0.4673766241269979, "grad_norm": 0.5540207028388977, "learning_rate": 1.1022942026525947e-05, "loss": 0.2434, "step": 25885 }, { "epoch": 0.4674669035598986, "grad_norm": 0.5777327418327332, "learning_rate": 1.1020120621055483e-05, "loss": 0.2079, "step": 25890 }, { "epoch": 0.4675571829927993, "grad_norm": 0.658270001411438, "learning_rate": 1.1017299133523734e-05, "loss": 0.2382, "step": 25895 }, { "epoch": 0.46764746242570004, "grad_norm": 0.5810391902923584, "learning_rate": 1.1014477564157661e-05, "loss": 0.262, "step": 25900 }, { "epoch": 0.46773774185860073, "grad_norm": 0.3897687494754791, "learning_rate": 1.1011655913184249e-05, "loss": 0.2611, "step": 25905 }, { "epoch": 0.4678280212915015, "grad_norm": 0.45382043719291687, "learning_rate": 1.1008834180830472e-05, "loss": 0.3405, "step": 25910 }, { "epoch": 0.46791830072440216, "grad_norm": 0.3488757908344269, "learning_rate": 1.1006012367323322e-05, "loss": 0.2745, "step": 25915 }, { "epoch": 0.4680085801573029, "grad_norm": 0.35227203369140625, "learning_rate": 1.1003190472889788e-05, "loss": 0.3258, "step": 25920 }, { "epoch": 0.4680988595902036, "grad_norm": 0.4962708652019501, "learning_rate": 1.100036849775688e-05, "loss": 0.2832, "step": 25925 }, { "epoch": 0.46818913902310433, "grad_norm": 0.6424579620361328, "learning_rate": 1.0997546442151597e-05, "loss": 0.2182, "step": 25930 }, { "epoch": 0.468279418456005, "grad_norm": 0.35105547308921814, "learning_rate": 1.0994724306300953e-05, "loss": 0.2243, "step": 25935 }, { "epoch": 0.46836969788890576, "grad_norm": 0.6380845904350281, "learning_rate": 1.0991902090431971e-05, "loss": 0.1955, "step": 25940 }, { "epoch": 0.46845997732180644, "grad_norm": 0.31674501299858093, "learning_rate": 1.0989079794771682e-05, "loss": 0.2868, "step": 25945 }, { "epoch": 0.4685502567547072, "grad_norm": 0.6792436242103577, "learning_rate": 1.098625741954711e-05, "loss": 0.2341, "step": 25950 }, { "epoch": 0.46864053618760787, "grad_norm": 0.5717369318008423, "learning_rate": 1.0983434964985303e-05, "loss": 0.1819, "step": 25955 }, { "epoch": 0.4687308156205086, "grad_norm": 0.49002474546432495, "learning_rate": 1.0980612431313302e-05, "loss": 0.2725, "step": 25960 }, { "epoch": 0.4688210950534093, "grad_norm": 0.6652733683586121, "learning_rate": 1.0977789818758157e-05, "loss": 0.3086, "step": 25965 }, { "epoch": 0.46891137448631004, "grad_norm": 0.5847036838531494, "learning_rate": 1.0974967127546937e-05, "loss": 0.3057, "step": 25970 }, { "epoch": 0.4690016539192107, "grad_norm": 0.5464393496513367, "learning_rate": 1.0972144357906696e-05, "loss": 0.3747, "step": 25975 }, { "epoch": 0.46909193335211147, "grad_norm": 0.6326963305473328, "learning_rate": 1.0969321510064512e-05, "loss": 0.2874, "step": 25980 }, { "epoch": 0.46918221278501215, "grad_norm": 0.4930112659931183, "learning_rate": 1.0966498584247455e-05, "loss": 0.2897, "step": 25985 }, { "epoch": 0.4692724922179129, "grad_norm": 0.3699823319911957, "learning_rate": 1.096367558068262e-05, "loss": 0.1979, "step": 25990 }, { "epoch": 0.4693627716508136, "grad_norm": 0.5999264717102051, "learning_rate": 1.096085249959709e-05, "loss": 0.2248, "step": 25995 }, { "epoch": 0.4694530510837143, "grad_norm": 0.3253791332244873, "learning_rate": 1.095802934121796e-05, "loss": 0.2934, "step": 26000 }, { "epoch": 0.469543330516615, "grad_norm": 0.3865527808666229, "learning_rate": 1.0955206105772336e-05, "loss": 0.2156, "step": 26005 }, { "epoch": 0.46963360994951575, "grad_norm": 1.0743937492370605, "learning_rate": 1.0952382793487327e-05, "loss": 0.2454, "step": 26010 }, { "epoch": 0.46972388938241644, "grad_norm": 0.5202724933624268, "learning_rate": 1.0949559404590046e-05, "loss": 0.2771, "step": 26015 }, { "epoch": 0.4698141688153172, "grad_norm": 0.4546166956424713, "learning_rate": 1.0946735939307616e-05, "loss": 0.2314, "step": 26020 }, { "epoch": 0.46990444824821787, "grad_norm": 0.4739367365837097, "learning_rate": 1.0943912397867162e-05, "loss": 0.2837, "step": 26025 }, { "epoch": 0.4699947276811186, "grad_norm": 0.6224332451820374, "learning_rate": 1.0941088780495819e-05, "loss": 0.2555, "step": 26030 }, { "epoch": 0.4700850071140193, "grad_norm": 0.7361112833023071, "learning_rate": 1.0938265087420725e-05, "loss": 0.3031, "step": 26035 }, { "epoch": 0.47017528654692003, "grad_norm": 0.3367628753185272, "learning_rate": 1.0935441318869027e-05, "loss": 0.2023, "step": 26040 }, { "epoch": 0.4702655659798207, "grad_norm": 0.378570556640625, "learning_rate": 1.0932617475067877e-05, "loss": 0.3861, "step": 26045 }, { "epoch": 0.47035584541272146, "grad_norm": 0.5609474182128906, "learning_rate": 1.092979355624443e-05, "loss": 0.22, "step": 26050 }, { "epoch": 0.47044612484562215, "grad_norm": 0.5063912868499756, "learning_rate": 1.0926969562625852e-05, "loss": 0.279, "step": 26055 }, { "epoch": 0.4705364042785229, "grad_norm": 0.21222810447216034, "learning_rate": 1.0924145494439316e-05, "loss": 0.3078, "step": 26060 }, { "epoch": 0.4706266837114236, "grad_norm": 0.4927471876144409, "learning_rate": 1.0921321351911989e-05, "loss": 0.1854, "step": 26065 }, { "epoch": 0.4707169631443243, "grad_norm": 0.5247211456298828, "learning_rate": 1.091849713527106e-05, "loss": 0.3319, "step": 26070 }, { "epoch": 0.470807242577225, "grad_norm": 0.3175283372402191, "learning_rate": 1.0915672844743717e-05, "loss": 0.2239, "step": 26075 }, { "epoch": 0.47089752201012575, "grad_norm": 0.4371960163116455, "learning_rate": 1.091284848055715e-05, "loss": 0.3051, "step": 26080 }, { "epoch": 0.47098780144302643, "grad_norm": 0.5870938897132874, "learning_rate": 1.0910024042938556e-05, "loss": 0.2086, "step": 26085 }, { "epoch": 0.4710780808759272, "grad_norm": 0.6371119618415833, "learning_rate": 1.0907199532115149e-05, "loss": 0.2639, "step": 26090 }, { "epoch": 0.47116836030882786, "grad_norm": 0.5096482634544373, "learning_rate": 1.0904374948314134e-05, "loss": 0.2345, "step": 26095 }, { "epoch": 0.4712586397417286, "grad_norm": 0.48447567224502563, "learning_rate": 1.0901550291762729e-05, "loss": 0.3136, "step": 26100 }, { "epoch": 0.4713489191746293, "grad_norm": 0.49430206418037415, "learning_rate": 1.0898725562688157e-05, "loss": 0.1687, "step": 26105 }, { "epoch": 0.47143919860753003, "grad_norm": 0.36696097254753113, "learning_rate": 1.0895900761317653e-05, "loss": 0.3424, "step": 26110 }, { "epoch": 0.4715294780404307, "grad_norm": 0.56471848487854, "learning_rate": 1.0893075887878442e-05, "loss": 0.2464, "step": 26115 }, { "epoch": 0.47161975747333146, "grad_norm": 1.4889129400253296, "learning_rate": 1.089025094259777e-05, "loss": 0.1967, "step": 26120 }, { "epoch": 0.47171003690623214, "grad_norm": 0.4604998826980591, "learning_rate": 1.0887425925702884e-05, "loss": 0.2182, "step": 26125 }, { "epoch": 0.4718003163391329, "grad_norm": 0.5376400351524353, "learning_rate": 1.0884600837421032e-05, "loss": 0.3413, "step": 26130 }, { "epoch": 0.47189059577203357, "grad_norm": 0.5955811738967896, "learning_rate": 1.0881775677979474e-05, "loss": 0.2632, "step": 26135 }, { "epoch": 0.4719808752049343, "grad_norm": 0.7458768486976624, "learning_rate": 1.0878950447605476e-05, "loss": 0.2911, "step": 26140 }, { "epoch": 0.472071154637835, "grad_norm": 0.6885311007499695, "learning_rate": 1.0876125146526304e-05, "loss": 0.1883, "step": 26145 }, { "epoch": 0.47216143407073574, "grad_norm": 0.5044176578521729, "learning_rate": 1.0873299774969233e-05, "loss": 0.3279, "step": 26150 }, { "epoch": 0.4722517135036365, "grad_norm": 0.39769864082336426, "learning_rate": 1.0870474333161547e-05, "loss": 0.2401, "step": 26155 }, { "epoch": 0.47234199293653717, "grad_norm": 0.2982105016708374, "learning_rate": 1.0867648821330529e-05, "loss": 0.2673, "step": 26160 }, { "epoch": 0.4724322723694379, "grad_norm": 0.4600881040096283, "learning_rate": 1.0864823239703471e-05, "loss": 0.3246, "step": 26165 }, { "epoch": 0.4725225518023386, "grad_norm": 0.513886570930481, "learning_rate": 1.086199758850767e-05, "loss": 0.2409, "step": 26170 }, { "epoch": 0.47261283123523934, "grad_norm": 0.39647242426872253, "learning_rate": 1.0859171867970434e-05, "loss": 0.2802, "step": 26175 }, { "epoch": 0.47270311066814, "grad_norm": 0.45733892917633057, "learning_rate": 1.0856346078319067e-05, "loss": 0.2692, "step": 26180 }, { "epoch": 0.47279339010104077, "grad_norm": 2.128892660140991, "learning_rate": 1.0853520219780882e-05, "loss": 0.3395, "step": 26185 }, { "epoch": 0.47288366953394145, "grad_norm": 0.3948085606098175, "learning_rate": 1.0850694292583201e-05, "loss": 0.2767, "step": 26190 }, { "epoch": 0.4729739489668422, "grad_norm": 0.3208751678466797, "learning_rate": 1.0847868296953352e-05, "loss": 0.3812, "step": 26195 }, { "epoch": 0.4730642283997429, "grad_norm": 0.47710272669792175, "learning_rate": 1.0845042233118663e-05, "loss": 0.3216, "step": 26200 }, { "epoch": 0.4731545078326436, "grad_norm": 0.4054561257362366, "learning_rate": 1.0842216101306468e-05, "loss": 0.2704, "step": 26205 }, { "epoch": 0.4732447872655443, "grad_norm": 0.5142204761505127, "learning_rate": 1.0839389901744113e-05, "loss": 0.2548, "step": 26210 }, { "epoch": 0.47333506669844505, "grad_norm": 0.5620430707931519, "learning_rate": 1.083656363465894e-05, "loss": 0.2676, "step": 26215 }, { "epoch": 0.47342534613134574, "grad_norm": 0.4242805242538452, "learning_rate": 1.0833737300278312e-05, "loss": 0.2053, "step": 26220 }, { "epoch": 0.4735156255642465, "grad_norm": 0.787665843963623, "learning_rate": 1.0830910898829573e-05, "loss": 0.3023, "step": 26225 }, { "epoch": 0.47360590499714716, "grad_norm": 0.36403128504753113, "learning_rate": 1.0828084430540096e-05, "loss": 0.2033, "step": 26230 }, { "epoch": 0.4736961844300479, "grad_norm": 0.35800427198410034, "learning_rate": 1.0825257895637246e-05, "loss": 0.2328, "step": 26235 }, { "epoch": 0.4737864638629486, "grad_norm": 0.5503618121147156, "learning_rate": 1.0822431294348403e-05, "loss": 0.2239, "step": 26240 }, { "epoch": 0.47387674329584933, "grad_norm": 0.5361478328704834, "learning_rate": 1.081960462690094e-05, "loss": 0.3129, "step": 26245 }, { "epoch": 0.47396702272875, "grad_norm": 0.9362848997116089, "learning_rate": 1.081677789352224e-05, "loss": 0.2872, "step": 26250 }, { "epoch": 0.47405730216165076, "grad_norm": 0.3955424129962921, "learning_rate": 1.08139510944397e-05, "loss": 0.2665, "step": 26255 }, { "epoch": 0.47414758159455145, "grad_norm": 0.4825882315635681, "learning_rate": 1.0811124229880715e-05, "loss": 0.1996, "step": 26260 }, { "epoch": 0.4742378610274522, "grad_norm": 0.5206092000007629, "learning_rate": 1.0808297300072677e-05, "loss": 0.2343, "step": 26265 }, { "epoch": 0.4743281404603529, "grad_norm": 0.4103998839855194, "learning_rate": 1.0805470305243001e-05, "loss": 0.2462, "step": 26270 }, { "epoch": 0.4744184198932536, "grad_norm": 0.31804096698760986, "learning_rate": 1.0802643245619097e-05, "loss": 0.2299, "step": 26275 }, { "epoch": 0.4745086993261543, "grad_norm": 0.35483410954475403, "learning_rate": 1.079981612142838e-05, "loss": 0.2021, "step": 26280 }, { "epoch": 0.47459897875905505, "grad_norm": 0.6194055676460266, "learning_rate": 1.0796988932898267e-05, "loss": 0.3255, "step": 26285 }, { "epoch": 0.47468925819195573, "grad_norm": 0.44059252738952637, "learning_rate": 1.0794161680256194e-05, "loss": 0.2525, "step": 26290 }, { "epoch": 0.4747795376248565, "grad_norm": 0.34184208512306213, "learning_rate": 1.0791334363729586e-05, "loss": 0.2726, "step": 26295 }, { "epoch": 0.47486981705775716, "grad_norm": 0.48157361149787903, "learning_rate": 1.0788506983545881e-05, "loss": 0.1915, "step": 26300 }, { "epoch": 0.4749600964906579, "grad_norm": 0.503015398979187, "learning_rate": 1.0785679539932526e-05, "loss": 0.2278, "step": 26305 }, { "epoch": 0.4750503759235586, "grad_norm": 0.37844520807266235, "learning_rate": 1.0782852033116959e-05, "loss": 0.1732, "step": 26310 }, { "epoch": 0.47514065535645933, "grad_norm": 0.49388542771339417, "learning_rate": 1.078002446332664e-05, "loss": 0.3277, "step": 26315 }, { "epoch": 0.47523093478936, "grad_norm": 0.29744821786880493, "learning_rate": 1.0777196830789025e-05, "loss": 0.2413, "step": 26320 }, { "epoch": 0.47532121422226076, "grad_norm": 0.604690670967102, "learning_rate": 1.0774369135731575e-05, "loss": 0.2685, "step": 26325 }, { "epoch": 0.47541149365516144, "grad_norm": 0.32783612608909607, "learning_rate": 1.0771541378381758e-05, "loss": 0.2013, "step": 26330 }, { "epoch": 0.4755017730880622, "grad_norm": 0.29162389039993286, "learning_rate": 1.0768713558967048e-05, "loss": 0.2443, "step": 26335 }, { "epoch": 0.47559205252096287, "grad_norm": 0.38048410415649414, "learning_rate": 1.076588567771492e-05, "loss": 0.2531, "step": 26340 }, { "epoch": 0.4756823319538636, "grad_norm": 0.5431281924247742, "learning_rate": 1.0763057734852857e-05, "loss": 0.2447, "step": 26345 }, { "epoch": 0.4757726113867643, "grad_norm": 0.40416088700294495, "learning_rate": 1.0760229730608348e-05, "loss": 0.2327, "step": 26350 }, { "epoch": 0.47586289081966504, "grad_norm": 0.3202001452445984, "learning_rate": 1.0757401665208886e-05, "loss": 0.1971, "step": 26355 }, { "epoch": 0.4759531702525657, "grad_norm": 0.2053840309381485, "learning_rate": 1.0754573538881968e-05, "loss": 0.2461, "step": 26360 }, { "epoch": 0.47604344968546647, "grad_norm": 1.687205195426941, "learning_rate": 1.075174535185509e-05, "loss": 0.2447, "step": 26365 }, { "epoch": 0.47613372911836715, "grad_norm": 0.4893333911895752, "learning_rate": 1.0748917104355766e-05, "loss": 0.3474, "step": 26370 }, { "epoch": 0.4762240085512679, "grad_norm": 0.6483590602874756, "learning_rate": 1.074608879661151e-05, "loss": 0.1697, "step": 26375 }, { "epoch": 0.4763142879841686, "grad_norm": 0.29549115896224976, "learning_rate": 1.0743260428849832e-05, "loss": 0.1897, "step": 26380 }, { "epoch": 0.4764045674170693, "grad_norm": 0.5002672076225281, "learning_rate": 1.0740432001298258e-05, "loss": 0.2914, "step": 26385 }, { "epoch": 0.47649484684997, "grad_norm": 0.5598426461219788, "learning_rate": 1.0737603514184312e-05, "loss": 0.2492, "step": 26390 }, { "epoch": 0.47658512628287075, "grad_norm": 1.129087209701538, "learning_rate": 1.0734774967735523e-05, "loss": 0.2702, "step": 26395 }, { "epoch": 0.47667540571577144, "grad_norm": 0.6787183880805969, "learning_rate": 1.0731946362179434e-05, "loss": 0.2756, "step": 26400 }, { "epoch": 0.4767656851486722, "grad_norm": 0.6386555433273315, "learning_rate": 1.072911769774358e-05, "loss": 0.245, "step": 26405 }, { "epoch": 0.47685596458157287, "grad_norm": 0.30959513783454895, "learning_rate": 1.072628897465551e-05, "loss": 0.2177, "step": 26410 }, { "epoch": 0.4769462440144736, "grad_norm": 0.36446917057037354, "learning_rate": 1.0723460193142772e-05, "loss": 0.3273, "step": 26415 }, { "epoch": 0.4770365234473743, "grad_norm": 0.5002758502960205, "learning_rate": 1.072063135343292e-05, "loss": 0.2759, "step": 26420 }, { "epoch": 0.47712680288027504, "grad_norm": 0.3369194269180298, "learning_rate": 1.0717802455753519e-05, "loss": 0.191, "step": 26425 }, { "epoch": 0.4772170823131757, "grad_norm": 0.5441814661026001, "learning_rate": 1.0714973500332124e-05, "loss": 0.2409, "step": 26430 }, { "epoch": 0.47730736174607646, "grad_norm": 1.0081517696380615, "learning_rate": 1.0712144487396313e-05, "loss": 0.2947, "step": 26435 }, { "epoch": 0.47739764117897715, "grad_norm": 0.4605311155319214, "learning_rate": 1.0709315417173656e-05, "loss": 0.2141, "step": 26440 }, { "epoch": 0.4774879206118779, "grad_norm": 0.4445546567440033, "learning_rate": 1.0706486289891729e-05, "loss": 0.2481, "step": 26445 }, { "epoch": 0.4775782000447786, "grad_norm": 0.4483776092529297, "learning_rate": 1.0703657105778116e-05, "loss": 0.2655, "step": 26450 }, { "epoch": 0.4776684794776793, "grad_norm": 0.6984508037567139, "learning_rate": 1.0700827865060409e-05, "loss": 0.29, "step": 26455 }, { "epoch": 0.47775875891058, "grad_norm": 0.36635181307792664, "learning_rate": 1.0697998567966192e-05, "loss": 0.2816, "step": 26460 }, { "epoch": 0.47784903834348075, "grad_norm": 0.4521954357624054, "learning_rate": 1.0695169214723065e-05, "loss": 0.2911, "step": 26465 }, { "epoch": 0.47793931777638143, "grad_norm": 0.3105730712413788, "learning_rate": 1.0692339805558632e-05, "loss": 0.3114, "step": 26470 }, { "epoch": 0.4780295972092822, "grad_norm": 0.34403181076049805, "learning_rate": 1.0689510340700493e-05, "loss": 0.2183, "step": 26475 }, { "epoch": 0.47811987664218286, "grad_norm": 0.3706817328929901, "learning_rate": 1.068668082037626e-05, "loss": 0.3542, "step": 26480 }, { "epoch": 0.4782101560750836, "grad_norm": 0.7294043302536011, "learning_rate": 1.068385124481355e-05, "loss": 0.2699, "step": 26485 }, { "epoch": 0.4783004355079843, "grad_norm": 0.5159411430358887, "learning_rate": 1.068102161423998e-05, "loss": 0.3034, "step": 26490 }, { "epoch": 0.47839071494088503, "grad_norm": 0.32585665583610535, "learning_rate": 1.0678191928883168e-05, "loss": 0.1609, "step": 26495 }, { "epoch": 0.4784809943737857, "grad_norm": 0.45307326316833496, "learning_rate": 1.067536218897075e-05, "loss": 0.2491, "step": 26500 }, { "epoch": 0.47857127380668646, "grad_norm": 0.7151533365249634, "learning_rate": 1.0672532394730354e-05, "loss": 0.2381, "step": 26505 }, { "epoch": 0.47866155323958715, "grad_norm": 0.3486301004886627, "learning_rate": 1.0669702546389617e-05, "loss": 0.247, "step": 26510 }, { "epoch": 0.4787518326724879, "grad_norm": 0.8058130741119385, "learning_rate": 1.0666872644176176e-05, "loss": 0.2803, "step": 26515 }, { "epoch": 0.4788421121053886, "grad_norm": 0.3897343873977661, "learning_rate": 1.0664042688317682e-05, "loss": 0.1999, "step": 26520 }, { "epoch": 0.4789323915382893, "grad_norm": 0.49622663855552673, "learning_rate": 1.0661212679041784e-05, "loss": 0.1602, "step": 26525 }, { "epoch": 0.47902267097119, "grad_norm": 0.7422386407852173, "learning_rate": 1.0658382616576128e-05, "loss": 0.2099, "step": 26530 }, { "epoch": 0.47911295040409074, "grad_norm": 0.42769497632980347, "learning_rate": 1.065555250114838e-05, "loss": 0.2941, "step": 26535 }, { "epoch": 0.47920322983699143, "grad_norm": 0.5021001696586609, "learning_rate": 1.0652722332986201e-05, "loss": 0.2319, "step": 26540 }, { "epoch": 0.47929350926989217, "grad_norm": 0.4900134205818176, "learning_rate": 1.0649892112317258e-05, "loss": 0.2659, "step": 26545 }, { "epoch": 0.47938378870279286, "grad_norm": 0.5937042832374573, "learning_rate": 1.0647061839369213e-05, "loss": 0.3298, "step": 26550 }, { "epoch": 0.4794740681356936, "grad_norm": 0.40473803877830505, "learning_rate": 1.0644231514369751e-05, "loss": 0.13, "step": 26555 }, { "epoch": 0.47956434756859434, "grad_norm": 0.38396719098091125, "learning_rate": 1.064140113754655e-05, "loss": 0.227, "step": 26560 }, { "epoch": 0.479654627001495, "grad_norm": 0.1891910433769226, "learning_rate": 1.0638570709127287e-05, "loss": 0.2141, "step": 26565 }, { "epoch": 0.47974490643439577, "grad_norm": 0.31764113903045654, "learning_rate": 1.0635740229339657e-05, "loss": 0.2441, "step": 26570 }, { "epoch": 0.47983518586729645, "grad_norm": 0.4471171796321869, "learning_rate": 1.063290969841135e-05, "loss": 0.291, "step": 26575 }, { "epoch": 0.4799254653001972, "grad_norm": 0.25125494599342346, "learning_rate": 1.063007911657005e-05, "loss": 0.2535, "step": 26580 }, { "epoch": 0.4800157447330979, "grad_norm": 0.4169851541519165, "learning_rate": 1.0627248484043473e-05, "loss": 0.333, "step": 26585 }, { "epoch": 0.4801060241659986, "grad_norm": 0.5211341381072998, "learning_rate": 1.0624417801059317e-05, "loss": 0.2166, "step": 26590 }, { "epoch": 0.4801963035988993, "grad_norm": 0.5397112965583801, "learning_rate": 1.0621587067845285e-05, "loss": 0.2862, "step": 26595 }, { "epoch": 0.48028658303180005, "grad_norm": 0.9622629284858704, "learning_rate": 1.0618756284629095e-05, "loss": 0.2888, "step": 26600 }, { "epoch": 0.48037686246470074, "grad_norm": 0.5056719779968262, "learning_rate": 1.061592545163846e-05, "loss": 0.216, "step": 26605 }, { "epoch": 0.4804671418976015, "grad_norm": 0.27500560879707336, "learning_rate": 1.06130945691011e-05, "loss": 0.2227, "step": 26610 }, { "epoch": 0.48055742133050217, "grad_norm": 0.5323085784912109, "learning_rate": 1.061026363724474e-05, "loss": 0.2357, "step": 26615 }, { "epoch": 0.4806477007634029, "grad_norm": 0.6595057845115662, "learning_rate": 1.0607432656297108e-05, "loss": 0.2965, "step": 26620 }, { "epoch": 0.4807379801963036, "grad_norm": 0.4934602975845337, "learning_rate": 1.0604601626485935e-05, "loss": 0.2645, "step": 26625 }, { "epoch": 0.48082825962920434, "grad_norm": 0.42322760820388794, "learning_rate": 1.0601770548038957e-05, "loss": 0.2174, "step": 26630 }, { "epoch": 0.480918539062105, "grad_norm": 0.35548311471939087, "learning_rate": 1.0598939421183914e-05, "loss": 0.2364, "step": 26635 }, { "epoch": 0.48100881849500576, "grad_norm": 0.21498507261276245, "learning_rate": 1.0596108246148549e-05, "loss": 0.2376, "step": 26640 }, { "epoch": 0.48109909792790645, "grad_norm": 0.22194571793079376, "learning_rate": 1.059327702316061e-05, "loss": 0.1536, "step": 26645 }, { "epoch": 0.4811893773608072, "grad_norm": 0.3227093815803528, "learning_rate": 1.0590445752447847e-05, "loss": 0.3052, "step": 26650 }, { "epoch": 0.4812796567937079, "grad_norm": 0.45720019936561584, "learning_rate": 1.0587614434238017e-05, "loss": 0.2323, "step": 26655 }, { "epoch": 0.4813699362266086, "grad_norm": 0.6177743077278137, "learning_rate": 1.0584783068758878e-05, "loss": 0.1444, "step": 26660 }, { "epoch": 0.4814602156595093, "grad_norm": 0.6074416637420654, "learning_rate": 1.0581951656238194e-05, "loss": 0.3255, "step": 26665 }, { "epoch": 0.48155049509241005, "grad_norm": 0.9755043983459473, "learning_rate": 1.057912019690373e-05, "loss": 0.3404, "step": 26670 }, { "epoch": 0.48164077452531073, "grad_norm": 0.7641077637672424, "learning_rate": 1.0576288690983255e-05, "loss": 0.2589, "step": 26675 }, { "epoch": 0.4817310539582115, "grad_norm": 0.35916680097579956, "learning_rate": 1.0573457138704546e-05, "loss": 0.2822, "step": 26680 }, { "epoch": 0.48182133339111216, "grad_norm": 0.3647823631763458, "learning_rate": 1.057062554029538e-05, "loss": 0.1295, "step": 26685 }, { "epoch": 0.4819116128240129, "grad_norm": 0.4745776057243347, "learning_rate": 1.0567793895983538e-05, "loss": 0.2181, "step": 26690 }, { "epoch": 0.4820018922569136, "grad_norm": 0.5288276076316833, "learning_rate": 1.0564962205996802e-05, "loss": 0.277, "step": 26695 }, { "epoch": 0.48209217168981433, "grad_norm": 0.49253758788108826, "learning_rate": 1.0562130470562968e-05, "loss": 0.2034, "step": 26700 }, { "epoch": 0.482182451122715, "grad_norm": 0.669064998626709, "learning_rate": 1.0559298689909825e-05, "loss": 0.238, "step": 26705 }, { "epoch": 0.48227273055561576, "grad_norm": 0.6141138672828674, "learning_rate": 1.0556466864265167e-05, "loss": 0.2832, "step": 26710 }, { "epoch": 0.48236300998851644, "grad_norm": 0.3418901264667511, "learning_rate": 1.0553634993856796e-05, "loss": 0.2015, "step": 26715 }, { "epoch": 0.4824532894214172, "grad_norm": 0.5083296895027161, "learning_rate": 1.0550803078912517e-05, "loss": 0.1523, "step": 26720 }, { "epoch": 0.4825435688543179, "grad_norm": 0.4708094894886017, "learning_rate": 1.0547971119660135e-05, "loss": 0.2269, "step": 26725 }, { "epoch": 0.4826338482872186, "grad_norm": 0.500835657119751, "learning_rate": 1.0545139116327456e-05, "loss": 0.2771, "step": 26730 }, { "epoch": 0.4827241277201193, "grad_norm": 0.4330381751060486, "learning_rate": 1.0542307069142303e-05, "loss": 0.246, "step": 26735 }, { "epoch": 0.48281440715302004, "grad_norm": 0.4174950122833252, "learning_rate": 1.0539474978332489e-05, "loss": 0.2575, "step": 26740 }, { "epoch": 0.48290468658592073, "grad_norm": 0.4970250129699707, "learning_rate": 1.0536642844125834e-05, "loss": 0.1805, "step": 26745 }, { "epoch": 0.48299496601882147, "grad_norm": 0.5641002655029297, "learning_rate": 1.0533810666750163e-05, "loss": 0.2918, "step": 26750 }, { "epoch": 0.48308524545172216, "grad_norm": 0.4091084897518158, "learning_rate": 1.0530978446433309e-05, "loss": 0.2336, "step": 26755 }, { "epoch": 0.4831755248846229, "grad_norm": 0.3855745494365692, "learning_rate": 1.0528146183403097e-05, "loss": 0.2463, "step": 26760 }, { "epoch": 0.4832658043175236, "grad_norm": 0.48720014095306396, "learning_rate": 1.0525313877887364e-05, "loss": 0.2497, "step": 26765 }, { "epoch": 0.4833560837504243, "grad_norm": 0.22547881305217743, "learning_rate": 1.052248153011395e-05, "loss": 0.2767, "step": 26770 }, { "epoch": 0.483446363183325, "grad_norm": 0.9942919611930847, "learning_rate": 1.0519649140310695e-05, "loss": 0.26, "step": 26775 }, { "epoch": 0.48353664261622575, "grad_norm": 0.4433083236217499, "learning_rate": 1.0516816708705444e-05, "loss": 0.3044, "step": 26780 }, { "epoch": 0.48362692204912644, "grad_norm": 0.5108844637870789, "learning_rate": 1.0513984235526048e-05, "loss": 0.3148, "step": 26785 }, { "epoch": 0.4837172014820272, "grad_norm": 0.5388258695602417, "learning_rate": 1.0511151721000357e-05, "loss": 0.3072, "step": 26790 }, { "epoch": 0.48380748091492787, "grad_norm": 0.4262354373931885, "learning_rate": 1.0508319165356225e-05, "loss": 0.2556, "step": 26795 }, { "epoch": 0.4838977603478286, "grad_norm": 0.3868439197540283, "learning_rate": 1.0505486568821514e-05, "loss": 0.237, "step": 26800 }, { "epoch": 0.4839880397807293, "grad_norm": 0.47711315751075745, "learning_rate": 1.0502653931624083e-05, "loss": 0.3017, "step": 26805 }, { "epoch": 0.48407831921363004, "grad_norm": 0.5847601294517517, "learning_rate": 1.0499821253991797e-05, "loss": 0.3238, "step": 26810 }, { "epoch": 0.4841685986465307, "grad_norm": 0.3355996012687683, "learning_rate": 1.0496988536152526e-05, "loss": 0.2366, "step": 26815 }, { "epoch": 0.48425887807943147, "grad_norm": 0.4323943555355072, "learning_rate": 1.049415577833414e-05, "loss": 0.2569, "step": 26820 }, { "epoch": 0.48434915751233215, "grad_norm": 0.6366611123085022, "learning_rate": 1.0491322980764517e-05, "loss": 0.302, "step": 26825 }, { "epoch": 0.4844394369452329, "grad_norm": 0.78076171875, "learning_rate": 1.048849014367153e-05, "loss": 0.2917, "step": 26830 }, { "epoch": 0.4845297163781336, "grad_norm": 0.4806523025035858, "learning_rate": 1.0485657267283062e-05, "loss": 0.2046, "step": 26835 }, { "epoch": 0.4846199958110343, "grad_norm": 0.6299446821212769, "learning_rate": 1.0482824351827001e-05, "loss": 0.2716, "step": 26840 }, { "epoch": 0.484710275243935, "grad_norm": 0.469390332698822, "learning_rate": 1.047999139753123e-05, "loss": 0.1242, "step": 26845 }, { "epoch": 0.48480055467683575, "grad_norm": 0.6657130718231201, "learning_rate": 1.0477158404623641e-05, "loss": 0.2257, "step": 26850 }, { "epoch": 0.48489083410973643, "grad_norm": 0.371386855840683, "learning_rate": 1.0474325373332129e-05, "loss": 0.2508, "step": 26855 }, { "epoch": 0.4849811135426372, "grad_norm": 0.6997236609458923, "learning_rate": 1.0471492303884585e-05, "loss": 0.1855, "step": 26860 }, { "epoch": 0.48507139297553786, "grad_norm": 0.4666514992713928, "learning_rate": 1.0468659196508916e-05, "loss": 0.3018, "step": 26865 }, { "epoch": 0.4851616724084386, "grad_norm": 0.37955641746520996, "learning_rate": 1.0465826051433023e-05, "loss": 0.2873, "step": 26870 }, { "epoch": 0.4852519518413393, "grad_norm": 0.36574721336364746, "learning_rate": 1.0462992868884808e-05, "loss": 0.2312, "step": 26875 }, { "epoch": 0.48534223127424003, "grad_norm": 0.440373033285141, "learning_rate": 1.0460159649092186e-05, "loss": 0.3481, "step": 26880 }, { "epoch": 0.4854325107071407, "grad_norm": 0.4811852276325226, "learning_rate": 1.0457326392283066e-05, "loss": 0.2859, "step": 26885 }, { "epoch": 0.48552279014004146, "grad_norm": 0.6694021224975586, "learning_rate": 1.0454493098685361e-05, "loss": 0.2588, "step": 26890 }, { "epoch": 0.48561306957294215, "grad_norm": 0.5687317848205566, "learning_rate": 1.045165976852699e-05, "loss": 0.2771, "step": 26895 }, { "epoch": 0.4857033490058429, "grad_norm": 0.4221053123474121, "learning_rate": 1.0448826402035876e-05, "loss": 0.2494, "step": 26900 }, { "epoch": 0.4857936284387436, "grad_norm": 0.496270090341568, "learning_rate": 1.0445992999439942e-05, "loss": 0.3064, "step": 26905 }, { "epoch": 0.4858839078716443, "grad_norm": 0.30873286724090576, "learning_rate": 1.0443159560967112e-05, "loss": 0.198, "step": 26910 }, { "epoch": 0.485974187304545, "grad_norm": 0.4473693072795868, "learning_rate": 1.0440326086845317e-05, "loss": 0.2304, "step": 26915 }, { "epoch": 0.48606446673744574, "grad_norm": 0.3942261040210724, "learning_rate": 1.0437492577302492e-05, "loss": 0.2258, "step": 26920 }, { "epoch": 0.48615474617034643, "grad_norm": 0.438599169254303, "learning_rate": 1.043465903256657e-05, "loss": 0.1977, "step": 26925 }, { "epoch": 0.48624502560324717, "grad_norm": 0.47521236538887024, "learning_rate": 1.0431825452865487e-05, "loss": 0.2347, "step": 26930 }, { "epoch": 0.48633530503614786, "grad_norm": 0.4236838221549988, "learning_rate": 1.0428991838427187e-05, "loss": 0.2296, "step": 26935 }, { "epoch": 0.4864255844690486, "grad_norm": 0.5367502570152283, "learning_rate": 1.0426158189479614e-05, "loss": 0.2377, "step": 26940 }, { "epoch": 0.4865158639019493, "grad_norm": 0.46861499547958374, "learning_rate": 1.0423324506250715e-05, "loss": 0.2945, "step": 26945 }, { "epoch": 0.48660614333485, "grad_norm": 0.32173192501068115, "learning_rate": 1.0420490788968434e-05, "loss": 0.2429, "step": 26950 }, { "epoch": 0.48669642276775077, "grad_norm": 0.5245801210403442, "learning_rate": 1.0417657037860727e-05, "loss": 0.3033, "step": 26955 }, { "epoch": 0.48678670220065146, "grad_norm": 0.40964144468307495, "learning_rate": 1.0414823253155546e-05, "loss": 0.1634, "step": 26960 }, { "epoch": 0.4868769816335522, "grad_norm": 0.4763515591621399, "learning_rate": 1.0411989435080851e-05, "loss": 0.2826, "step": 26965 }, { "epoch": 0.4869672610664529, "grad_norm": 0.547917366027832, "learning_rate": 1.0409155583864604e-05, "loss": 0.2297, "step": 26970 }, { "epoch": 0.4870575404993536, "grad_norm": 0.24356386065483093, "learning_rate": 1.0406321699734761e-05, "loss": 0.2226, "step": 26975 }, { "epoch": 0.4871478199322543, "grad_norm": 0.8425018191337585, "learning_rate": 1.0403487782919295e-05, "loss": 0.2663, "step": 26980 }, { "epoch": 0.48723809936515505, "grad_norm": 0.35435715317726135, "learning_rate": 1.0400653833646171e-05, "loss": 0.27, "step": 26985 }, { "epoch": 0.48732837879805574, "grad_norm": 0.5056458711624146, "learning_rate": 1.0397819852143359e-05, "loss": 0.215, "step": 26990 }, { "epoch": 0.4874186582309565, "grad_norm": 0.5763304829597473, "learning_rate": 1.0394985838638827e-05, "loss": 0.2151, "step": 26995 }, { "epoch": 0.48750893766385717, "grad_norm": 0.5899175405502319, "learning_rate": 1.0392151793360562e-05, "loss": 0.2686, "step": 27000 }, { "epoch": 0.4875992170967579, "grad_norm": 0.5203468799591064, "learning_rate": 1.0389317716536536e-05, "loss": 0.2753, "step": 27005 }, { "epoch": 0.4876894965296586, "grad_norm": 0.5390440821647644, "learning_rate": 1.0386483608394726e-05, "loss": 0.2311, "step": 27010 }, { "epoch": 0.48777977596255934, "grad_norm": 0.39601603150367737, "learning_rate": 1.0383649469163124e-05, "loss": 0.2765, "step": 27015 }, { "epoch": 0.48787005539546, "grad_norm": 0.28642338514328003, "learning_rate": 1.0380815299069709e-05, "loss": 0.2079, "step": 27020 }, { "epoch": 0.48796033482836076, "grad_norm": 0.42527803778648376, "learning_rate": 1.0377981098342473e-05, "loss": 0.2355, "step": 27025 }, { "epoch": 0.48805061426126145, "grad_norm": 0.41193994879722595, "learning_rate": 1.0375146867209408e-05, "loss": 0.214, "step": 27030 }, { "epoch": 0.4881408936941622, "grad_norm": 0.7089897394180298, "learning_rate": 1.0372312605898505e-05, "loss": 0.1538, "step": 27035 }, { "epoch": 0.4882311731270629, "grad_norm": 0.4400389492511749, "learning_rate": 1.0369478314637755e-05, "loss": 0.2489, "step": 27040 }, { "epoch": 0.4883214525599636, "grad_norm": 0.5277612805366516, "learning_rate": 1.0366643993655163e-05, "loss": 0.2024, "step": 27045 }, { "epoch": 0.4884117319928643, "grad_norm": 0.41362714767456055, "learning_rate": 1.0363809643178731e-05, "loss": 0.2166, "step": 27050 }, { "epoch": 0.48850201142576505, "grad_norm": 0.8280554413795471, "learning_rate": 1.0360975263436455e-05, "loss": 0.2197, "step": 27055 }, { "epoch": 0.48859229085866573, "grad_norm": 0.44255056977272034, "learning_rate": 1.0358140854656341e-05, "loss": 0.3865, "step": 27060 }, { "epoch": 0.4886825702915665, "grad_norm": 0.5058305859565735, "learning_rate": 1.0355306417066404e-05, "loss": 0.2712, "step": 27065 }, { "epoch": 0.48877284972446716, "grad_norm": 0.3069395422935486, "learning_rate": 1.0352471950894649e-05, "loss": 0.1268, "step": 27070 }, { "epoch": 0.4888631291573679, "grad_norm": 0.36307913064956665, "learning_rate": 1.0349637456369086e-05, "loss": 0.1912, "step": 27075 }, { "epoch": 0.4889534085902686, "grad_norm": 0.5039262771606445, "learning_rate": 1.0346802933717733e-05, "loss": 0.1844, "step": 27080 }, { "epoch": 0.48904368802316933, "grad_norm": 0.7958729267120361, "learning_rate": 1.0343968383168605e-05, "loss": 0.2095, "step": 27085 }, { "epoch": 0.48913396745607, "grad_norm": 0.4576447308063507, "learning_rate": 1.0341133804949723e-05, "loss": 0.2588, "step": 27090 }, { "epoch": 0.48922424688897076, "grad_norm": 0.3759396970272064, "learning_rate": 1.0338299199289103e-05, "loss": 0.2037, "step": 27095 }, { "epoch": 0.48931452632187145, "grad_norm": 0.4838416576385498, "learning_rate": 1.0335464566414775e-05, "loss": 0.2597, "step": 27100 }, { "epoch": 0.4894048057547722, "grad_norm": 0.28845927119255066, "learning_rate": 1.0332629906554762e-05, "loss": 0.2132, "step": 27105 }, { "epoch": 0.4894950851876729, "grad_norm": 0.4330255091190338, "learning_rate": 1.0329795219937089e-05, "loss": 0.2401, "step": 27110 }, { "epoch": 0.4895853646205736, "grad_norm": 0.5459669828414917, "learning_rate": 1.0326960506789789e-05, "loss": 0.3111, "step": 27115 }, { "epoch": 0.4896756440534743, "grad_norm": 1.0174330472946167, "learning_rate": 1.0324125767340896e-05, "loss": 0.2445, "step": 27120 }, { "epoch": 0.48976592348637504, "grad_norm": 0.327204167842865, "learning_rate": 1.032129100181844e-05, "loss": 0.2275, "step": 27125 }, { "epoch": 0.48985620291927573, "grad_norm": 0.5526273250579834, "learning_rate": 1.0318456210450461e-05, "loss": 0.2523, "step": 27130 }, { "epoch": 0.48994648235217647, "grad_norm": 0.5036475658416748, "learning_rate": 1.0315621393464992e-05, "loss": 0.2304, "step": 27135 }, { "epoch": 0.49003676178507716, "grad_norm": 0.37863689661026, "learning_rate": 1.0312786551090077e-05, "loss": 0.2368, "step": 27140 }, { "epoch": 0.4901270412179779, "grad_norm": 0.600937008857727, "learning_rate": 1.0309951683553758e-05, "loss": 0.3067, "step": 27145 }, { "epoch": 0.4902173206508786, "grad_norm": 0.6421152353286743, "learning_rate": 1.0307116791084082e-05, "loss": 0.2312, "step": 27150 }, { "epoch": 0.4903076000837793, "grad_norm": 0.6270744800567627, "learning_rate": 1.0304281873909093e-05, "loss": 0.2451, "step": 27155 }, { "epoch": 0.49039787951668, "grad_norm": 0.29649534821510315, "learning_rate": 1.0301446932256835e-05, "loss": 0.276, "step": 27160 }, { "epoch": 0.49048815894958075, "grad_norm": 0.5644583702087402, "learning_rate": 1.0298611966355368e-05, "loss": 0.2725, "step": 27165 }, { "epoch": 0.49057843838248144, "grad_norm": 0.5755888223648071, "learning_rate": 1.0295776976432739e-05, "loss": 0.2189, "step": 27170 }, { "epoch": 0.4906687178153822, "grad_norm": 0.3351040184497833, "learning_rate": 1.0292941962717e-05, "loss": 0.1996, "step": 27175 }, { "epoch": 0.49075899724828287, "grad_norm": 0.34694960713386536, "learning_rate": 1.0290106925436212e-05, "loss": 0.2004, "step": 27180 }, { "epoch": 0.4908492766811836, "grad_norm": 0.5113522410392761, "learning_rate": 1.0287271864818431e-05, "loss": 0.2258, "step": 27185 }, { "epoch": 0.4909395561140843, "grad_norm": 0.3528189957141876, "learning_rate": 1.0284436781091718e-05, "loss": 0.3493, "step": 27190 }, { "epoch": 0.49102983554698504, "grad_norm": 0.3386617600917816, "learning_rate": 1.028160167448413e-05, "loss": 0.2776, "step": 27195 }, { "epoch": 0.4911201149798857, "grad_norm": 0.4666965901851654, "learning_rate": 1.0278766545223737e-05, "loss": 0.2439, "step": 27200 }, { "epoch": 0.49121039441278647, "grad_norm": 0.4585404396057129, "learning_rate": 1.0275931393538605e-05, "loss": 0.2297, "step": 27205 }, { "epoch": 0.49130067384568715, "grad_norm": 0.4618760645389557, "learning_rate": 1.0273096219656794e-05, "loss": 0.234, "step": 27210 }, { "epoch": 0.4913909532785879, "grad_norm": 0.3211655616760254, "learning_rate": 1.0270261023806383e-05, "loss": 0.2691, "step": 27215 }, { "epoch": 0.4914812327114886, "grad_norm": 0.3691023290157318, "learning_rate": 1.0267425806215437e-05, "loss": 0.2486, "step": 27220 }, { "epoch": 0.4915715121443893, "grad_norm": 0.24695229530334473, "learning_rate": 1.0264590567112022e-05, "loss": 0.3352, "step": 27225 }, { "epoch": 0.49166179157729, "grad_norm": 0.4610646069049835, "learning_rate": 1.0261755306724226e-05, "loss": 0.2407, "step": 27230 }, { "epoch": 0.49175207101019075, "grad_norm": 0.5048497319221497, "learning_rate": 1.0258920025280119e-05, "loss": 0.2619, "step": 27235 }, { "epoch": 0.49184235044309144, "grad_norm": 0.4628535509109497, "learning_rate": 1.0256084723007775e-05, "loss": 0.2581, "step": 27240 }, { "epoch": 0.4919326298759922, "grad_norm": 0.3776392340660095, "learning_rate": 1.0253249400135279e-05, "loss": 0.2212, "step": 27245 }, { "epoch": 0.49202290930889286, "grad_norm": 0.36695045232772827, "learning_rate": 1.0250414056890711e-05, "loss": 0.3446, "step": 27250 }, { "epoch": 0.4921131887417936, "grad_norm": 0.47604474425315857, "learning_rate": 1.0247578693502152e-05, "loss": 0.2258, "step": 27255 }, { "epoch": 0.4922034681746943, "grad_norm": 0.510190486907959, "learning_rate": 1.0244743310197685e-05, "loss": 0.2477, "step": 27260 }, { "epoch": 0.49229374760759503, "grad_norm": 0.5116159319877625, "learning_rate": 1.0241907907205402e-05, "loss": 0.2524, "step": 27265 }, { "epoch": 0.4923840270404957, "grad_norm": 0.39241963624954224, "learning_rate": 1.0239072484753385e-05, "loss": 0.2668, "step": 27270 }, { "epoch": 0.49247430647339646, "grad_norm": 0.3967352509498596, "learning_rate": 1.0236237043069721e-05, "loss": 0.1858, "step": 27275 }, { "epoch": 0.49256458590629715, "grad_norm": 0.5310468077659607, "learning_rate": 1.023340158238251e-05, "loss": 0.2226, "step": 27280 }, { "epoch": 0.4926548653391979, "grad_norm": 0.6683197021484375, "learning_rate": 1.0230566102919836e-05, "loss": 0.2233, "step": 27285 }, { "epoch": 0.4927451447720986, "grad_norm": 0.5171959400177002, "learning_rate": 1.0227730604909798e-05, "loss": 0.2645, "step": 27290 }, { "epoch": 0.4928354242049993, "grad_norm": 0.35554239153862, "learning_rate": 1.0224895088580484e-05, "loss": 0.225, "step": 27295 }, { "epoch": 0.4929257036379, "grad_norm": 0.5169106721878052, "learning_rate": 1.022205955416e-05, "loss": 0.2111, "step": 27300 }, { "epoch": 0.49301598307080075, "grad_norm": 0.5154178142547607, "learning_rate": 1.0219224001876438e-05, "loss": 0.3203, "step": 27305 }, { "epoch": 0.49310626250370143, "grad_norm": 0.4990963637828827, "learning_rate": 1.0216388431957897e-05, "loss": 0.2666, "step": 27310 }, { "epoch": 0.4931965419366022, "grad_norm": 0.5841171741485596, "learning_rate": 1.0213552844632483e-05, "loss": 0.2205, "step": 27315 }, { "epoch": 0.49328682136950286, "grad_norm": 0.27949053049087524, "learning_rate": 1.0210717240128294e-05, "loss": 0.1742, "step": 27320 }, { "epoch": 0.4933771008024036, "grad_norm": 0.55351322889328, "learning_rate": 1.0207881618673435e-05, "loss": 0.2336, "step": 27325 }, { "epoch": 0.4934673802353043, "grad_norm": 0.5123849511146545, "learning_rate": 1.0205045980496008e-05, "loss": 0.2447, "step": 27330 }, { "epoch": 0.49355765966820503, "grad_norm": 0.5589601397514343, "learning_rate": 1.0202210325824126e-05, "loss": 0.2753, "step": 27335 }, { "epoch": 0.4936479391011057, "grad_norm": 0.7552227973937988, "learning_rate": 1.0199374654885895e-05, "loss": 0.2253, "step": 27340 }, { "epoch": 0.49373821853400646, "grad_norm": 0.46851953864097595, "learning_rate": 1.019653896790942e-05, "loss": 0.2223, "step": 27345 }, { "epoch": 0.4938284979669072, "grad_norm": 0.3763858675956726, "learning_rate": 1.0193703265122814e-05, "loss": 0.2983, "step": 27350 }, { "epoch": 0.4939187773998079, "grad_norm": 0.5535316467285156, "learning_rate": 1.0190867546754191e-05, "loss": 0.254, "step": 27355 }, { "epoch": 0.4940090568327086, "grad_norm": 0.407514750957489, "learning_rate": 1.0188031813031658e-05, "loss": 0.194, "step": 27360 }, { "epoch": 0.4940993362656093, "grad_norm": 0.6036298274993896, "learning_rate": 1.0185196064183337e-05, "loss": 0.2785, "step": 27365 }, { "epoch": 0.49418961569851005, "grad_norm": 0.2379753738641739, "learning_rate": 1.0182360300437338e-05, "loss": 0.173, "step": 27370 }, { "epoch": 0.49427989513141074, "grad_norm": 0.6542347073554993, "learning_rate": 1.0179524522021772e-05, "loss": 0.2448, "step": 27375 }, { "epoch": 0.4943701745643115, "grad_norm": 0.2842325270175934, "learning_rate": 1.0176688729164771e-05, "loss": 0.2481, "step": 27380 }, { "epoch": 0.49446045399721217, "grad_norm": 0.3398384153842926, "learning_rate": 1.0173852922094447e-05, "loss": 0.2973, "step": 27385 }, { "epoch": 0.4945507334301129, "grad_norm": 0.46946772933006287, "learning_rate": 1.0171017101038916e-05, "loss": 0.2458, "step": 27390 }, { "epoch": 0.4946410128630136, "grad_norm": 0.6143315434455872, "learning_rate": 1.0168181266226303e-05, "loss": 0.3659, "step": 27395 }, { "epoch": 0.49473129229591434, "grad_norm": 0.2996760308742523, "learning_rate": 1.0165345417884733e-05, "loss": 0.1668, "step": 27400 }, { "epoch": 0.494821571728815, "grad_norm": 0.8874998092651367, "learning_rate": 1.0162509556242321e-05, "loss": 0.3254, "step": 27405 }, { "epoch": 0.49491185116171577, "grad_norm": 0.5014644265174866, "learning_rate": 1.01596736815272e-05, "loss": 0.3182, "step": 27410 }, { "epoch": 0.49500213059461645, "grad_norm": 0.33739808201789856, "learning_rate": 1.0156837793967494e-05, "loss": 0.2717, "step": 27415 }, { "epoch": 0.4950924100275172, "grad_norm": 0.38993531465530396, "learning_rate": 1.0154001893791325e-05, "loss": 0.2172, "step": 27420 }, { "epoch": 0.4951826894604179, "grad_norm": 0.444154292345047, "learning_rate": 1.0151165981226823e-05, "loss": 0.3275, "step": 27425 }, { "epoch": 0.4952729688933186, "grad_norm": 0.5773775577545166, "learning_rate": 1.0148330056502118e-05, "loss": 0.2471, "step": 27430 }, { "epoch": 0.4953632483262193, "grad_norm": 0.7944090962409973, "learning_rate": 1.014549411984534e-05, "loss": 0.2043, "step": 27435 }, { "epoch": 0.49545352775912005, "grad_norm": 0.41809704899787903, "learning_rate": 1.0142658171484614e-05, "loss": 0.2755, "step": 27440 }, { "epoch": 0.49554380719202074, "grad_norm": 0.4073884189128876, "learning_rate": 1.0139822211648078e-05, "loss": 0.1245, "step": 27445 }, { "epoch": 0.4956340866249215, "grad_norm": 0.5428813695907593, "learning_rate": 1.0136986240563862e-05, "loss": 0.3305, "step": 27450 }, { "epoch": 0.49572436605782216, "grad_norm": 0.27504950761795044, "learning_rate": 1.0134150258460098e-05, "loss": 0.2412, "step": 27455 }, { "epoch": 0.4958146454907229, "grad_norm": 0.2455800175666809, "learning_rate": 1.013131426556492e-05, "loss": 0.3058, "step": 27460 }, { "epoch": 0.4959049249236236, "grad_norm": 0.3766688108444214, "learning_rate": 1.0128478262106468e-05, "loss": 0.28, "step": 27465 }, { "epoch": 0.49599520435652433, "grad_norm": 0.5050990581512451, "learning_rate": 1.012564224831287e-05, "loss": 0.2476, "step": 27470 }, { "epoch": 0.496085483789425, "grad_norm": 0.4720533788204193, "learning_rate": 1.0122806224412267e-05, "loss": 0.2766, "step": 27475 }, { "epoch": 0.49617576322232576, "grad_norm": 0.27817681431770325, "learning_rate": 1.0119970190632797e-05, "loss": 0.308, "step": 27480 }, { "epoch": 0.49626604265522645, "grad_norm": 0.3395104706287384, "learning_rate": 1.0117134147202598e-05, "loss": 0.253, "step": 27485 }, { "epoch": 0.4963563220881272, "grad_norm": 0.28128769993782043, "learning_rate": 1.0114298094349808e-05, "loss": 0.2615, "step": 27490 }, { "epoch": 0.4964466015210279, "grad_norm": 0.35323697328567505, "learning_rate": 1.0111462032302568e-05, "loss": 0.2483, "step": 27495 }, { "epoch": 0.4965368809539286, "grad_norm": 0.5828583836555481, "learning_rate": 1.0108625961289018e-05, "loss": 0.2116, "step": 27500 }, { "epoch": 0.4966271603868293, "grad_norm": 0.48245134949684143, "learning_rate": 1.0105789881537296e-05, "loss": 0.2822, "step": 27505 }, { "epoch": 0.49671743981973004, "grad_norm": 0.30727630853652954, "learning_rate": 1.0102953793275552e-05, "loss": 0.2791, "step": 27510 }, { "epoch": 0.49680771925263073, "grad_norm": 0.3550146222114563, "learning_rate": 1.0100117696731921e-05, "loss": 0.2403, "step": 27515 }, { "epoch": 0.4968979986855315, "grad_norm": 0.4729647934436798, "learning_rate": 1.0097281592134552e-05, "loss": 0.2758, "step": 27520 }, { "epoch": 0.49698827811843216, "grad_norm": 0.3293568789958954, "learning_rate": 1.0094445479711581e-05, "loss": 0.3106, "step": 27525 }, { "epoch": 0.4970785575513329, "grad_norm": 0.4167075455188751, "learning_rate": 1.0091609359691162e-05, "loss": 0.281, "step": 27530 }, { "epoch": 0.4971688369842336, "grad_norm": 0.5646471977233887, "learning_rate": 1.0088773232301434e-05, "loss": 0.3009, "step": 27535 }, { "epoch": 0.49725911641713433, "grad_norm": 0.2843344211578369, "learning_rate": 1.0085937097770544e-05, "loss": 0.2627, "step": 27540 }, { "epoch": 0.497349395850035, "grad_norm": 0.4866458475589752, "learning_rate": 1.0083100956326642e-05, "loss": 0.2308, "step": 27545 }, { "epoch": 0.49743967528293576, "grad_norm": 0.5150853395462036, "learning_rate": 1.0080264808197873e-05, "loss": 0.3069, "step": 27550 }, { "epoch": 0.49752995471583644, "grad_norm": 0.3755788505077362, "learning_rate": 1.007742865361238e-05, "loss": 0.117, "step": 27555 }, { "epoch": 0.4976202341487372, "grad_norm": 0.41021981835365295, "learning_rate": 1.0074592492798317e-05, "loss": 0.2048, "step": 27560 }, { "epoch": 0.49771051358163787, "grad_norm": 0.3073005974292755, "learning_rate": 1.007175632598383e-05, "loss": 0.1914, "step": 27565 }, { "epoch": 0.4978007930145386, "grad_norm": 0.4285506308078766, "learning_rate": 1.006892015339707e-05, "loss": 0.1857, "step": 27570 }, { "epoch": 0.4978910724474393, "grad_norm": 0.5439529418945312, "learning_rate": 1.006608397526618e-05, "loss": 0.1857, "step": 27575 }, { "epoch": 0.49798135188034004, "grad_norm": 0.6931535005569458, "learning_rate": 1.0063247791819319e-05, "loss": 0.2686, "step": 27580 }, { "epoch": 0.4980716313132407, "grad_norm": 0.5035712122917175, "learning_rate": 1.0060411603284634e-05, "loss": 0.1862, "step": 27585 }, { "epoch": 0.49816191074614147, "grad_norm": 0.5458855628967285, "learning_rate": 1.0057575409890272e-05, "loss": 0.2431, "step": 27590 }, { "epoch": 0.49825219017904215, "grad_norm": 0.3478755056858063, "learning_rate": 1.0054739211864387e-05, "loss": 0.3048, "step": 27595 }, { "epoch": 0.4983424696119429, "grad_norm": 0.4352189600467682, "learning_rate": 1.0051903009435133e-05, "loss": 0.2288, "step": 27600 }, { "epoch": 0.4984327490448436, "grad_norm": 0.4167369306087494, "learning_rate": 1.0049066802830655e-05, "loss": 0.1723, "step": 27605 }, { "epoch": 0.4985230284777443, "grad_norm": 1.0207589864730835, "learning_rate": 1.0046230592279112e-05, "loss": 0.2857, "step": 27610 }, { "epoch": 0.498613307910645, "grad_norm": 0.699300229549408, "learning_rate": 1.0043394378008656e-05, "loss": 0.2225, "step": 27615 }, { "epoch": 0.49870358734354575, "grad_norm": 0.4939732849597931, "learning_rate": 1.0040558160247436e-05, "loss": 0.162, "step": 27620 }, { "epoch": 0.49879386677644644, "grad_norm": 0.34428349137306213, "learning_rate": 1.0037721939223603e-05, "loss": 0.2868, "step": 27625 }, { "epoch": 0.4988841462093472, "grad_norm": 0.4207374155521393, "learning_rate": 1.0034885715165318e-05, "loss": 0.1792, "step": 27630 }, { "epoch": 0.49897442564224787, "grad_norm": 0.41916459798812866, "learning_rate": 1.003204948830073e-05, "loss": 0.309, "step": 27635 }, { "epoch": 0.4990647050751486, "grad_norm": 0.4053615927696228, "learning_rate": 1.0029213258857993e-05, "loss": 0.2872, "step": 27640 }, { "epoch": 0.4991549845080493, "grad_norm": 0.37571027874946594, "learning_rate": 1.0026377027065264e-05, "loss": 0.195, "step": 27645 }, { "epoch": 0.49924526394095003, "grad_norm": 0.46607398986816406, "learning_rate": 1.0023540793150694e-05, "loss": 0.1856, "step": 27650 }, { "epoch": 0.4993355433738507, "grad_norm": 0.6066296696662903, "learning_rate": 1.0020704557342438e-05, "loss": 0.2583, "step": 27655 }, { "epoch": 0.49942582280675146, "grad_norm": 0.3336362838745117, "learning_rate": 1.001786831986865e-05, "loss": 0.1857, "step": 27660 }, { "epoch": 0.49951610223965215, "grad_norm": 0.5273229479789734, "learning_rate": 1.0015032080957487e-05, "loss": 0.2838, "step": 27665 }, { "epoch": 0.4996063816725529, "grad_norm": 0.4261868894100189, "learning_rate": 1.0012195840837103e-05, "loss": 0.2801, "step": 27670 }, { "epoch": 0.4996966611054536, "grad_norm": 0.6468798518180847, "learning_rate": 1.0009359599735653e-05, "loss": 0.3093, "step": 27675 }, { "epoch": 0.4997869405383543, "grad_norm": 0.6814336776733398, "learning_rate": 1.0006523357881288e-05, "loss": 0.3179, "step": 27680 }, { "epoch": 0.499877219971255, "grad_norm": 0.4324711263179779, "learning_rate": 1.000368711550217e-05, "loss": 0.2867, "step": 27685 }, { "epoch": 0.49996749940415575, "grad_norm": 0.31334424018859863, "learning_rate": 1.0000850872826447e-05, "loss": 0.2088, "step": 27690 }, { "epoch": 0.5000577788370565, "grad_norm": 0.3834947347640991, "learning_rate": 9.99801463008228e-06, "loss": 0.3434, "step": 27695 }, { "epoch": 0.5001480582699571, "grad_norm": 0.2797144651412964, "learning_rate": 9.995178387497817e-06, "loss": 0.2136, "step": 27700 }, { "epoch": 0.5002383377028579, "grad_norm": 0.48420146107673645, "learning_rate": 9.992342145301222e-06, "loss": 0.3488, "step": 27705 }, { "epoch": 0.5003286171357586, "grad_norm": 0.3981853425502777, "learning_rate": 9.989505903720643e-06, "loss": 0.2418, "step": 27710 }, { "epoch": 0.5004188965686593, "grad_norm": 0.43650510907173157, "learning_rate": 9.986669662984242e-06, "loss": 0.2245, "step": 27715 }, { "epoch": 0.50050917600156, "grad_norm": 0.42851582169532776, "learning_rate": 9.983833423320166e-06, "loss": 0.2556, "step": 27720 }, { "epoch": 0.5005994554344607, "grad_norm": 0.9285907745361328, "learning_rate": 9.980997184956574e-06, "loss": 0.2418, "step": 27725 }, { "epoch": 0.5006897348673615, "grad_norm": 0.2296239137649536, "learning_rate": 9.978160948121623e-06, "loss": 0.2344, "step": 27730 }, { "epoch": 0.5007800143002622, "grad_norm": 0.5208185315132141, "learning_rate": 9.975324713043461e-06, "loss": 0.2269, "step": 27735 }, { "epoch": 0.5008702937331629, "grad_norm": 0.4143800139427185, "learning_rate": 9.972488479950249e-06, "loss": 0.2573, "step": 27740 }, { "epoch": 0.5009605731660636, "grad_norm": 0.3906264901161194, "learning_rate": 9.969652249070137e-06, "loss": 0.1876, "step": 27745 }, { "epoch": 0.5010508525989643, "grad_norm": 0.6202120184898376, "learning_rate": 9.966816020631284e-06, "loss": 0.2853, "step": 27750 }, { "epoch": 0.501141132031865, "grad_norm": 0.5595226883888245, "learning_rate": 9.963979794861836e-06, "loss": 0.2391, "step": 27755 }, { "epoch": 0.5012314114647658, "grad_norm": 0.3227062523365021, "learning_rate": 9.961143571989958e-06, "loss": 0.229, "step": 27760 }, { "epoch": 0.5013216908976664, "grad_norm": 0.565716028213501, "learning_rate": 9.958307352243791e-06, "loss": 0.2199, "step": 27765 }, { "epoch": 0.5014119703305672, "grad_norm": 0.5395566821098328, "learning_rate": 9.955471135851498e-06, "loss": 0.2633, "step": 27770 }, { "epoch": 0.5015022497634679, "grad_norm": 0.39155447483062744, "learning_rate": 9.952634923041225e-06, "loss": 0.2835, "step": 27775 }, { "epoch": 0.5015925291963687, "grad_norm": 0.5883301496505737, "learning_rate": 9.949798714041133e-06, "loss": 0.2501, "step": 27780 }, { "epoch": 0.5016828086292693, "grad_norm": 0.7940782308578491, "learning_rate": 9.946962509079365e-06, "loss": 0.2023, "step": 27785 }, { "epoch": 0.50177308806217, "grad_norm": 0.39916032552719116, "learning_rate": 9.94412630838408e-06, "loss": 0.2586, "step": 27790 }, { "epoch": 0.5018633674950708, "grad_norm": 0.5648788213729858, "learning_rate": 9.941290112183426e-06, "loss": 0.2484, "step": 27795 }, { "epoch": 0.5019536469279715, "grad_norm": 0.41418156027793884, "learning_rate": 9.938453920705558e-06, "loss": 0.242, "step": 27800 }, { "epoch": 0.5020439263608721, "grad_norm": 0.6468842625617981, "learning_rate": 9.935617734178623e-06, "loss": 0.2861, "step": 27805 }, { "epoch": 0.5021342057937729, "grad_norm": 0.4214017689228058, "learning_rate": 9.932781552830773e-06, "loss": 0.2731, "step": 27810 }, { "epoch": 0.5022244852266736, "grad_norm": 0.4069287180900574, "learning_rate": 9.92994537689016e-06, "loss": 0.2039, "step": 27815 }, { "epoch": 0.5023147646595744, "grad_norm": 0.20495177805423737, "learning_rate": 9.927109206584927e-06, "loss": 0.3151, "step": 27820 }, { "epoch": 0.502405044092475, "grad_norm": 0.5869178771972656, "learning_rate": 9.924273042143234e-06, "loss": 0.195, "step": 27825 }, { "epoch": 0.5024953235253757, "grad_norm": 0.4586544632911682, "learning_rate": 9.921436883793221e-06, "loss": 0.246, "step": 27830 }, { "epoch": 0.5025856029582765, "grad_norm": 0.3754526674747467, "learning_rate": 9.91860073176304e-06, "loss": 0.2455, "step": 27835 }, { "epoch": 0.5026758823911772, "grad_norm": 0.3354601562023163, "learning_rate": 9.915764586280838e-06, "loss": 0.2015, "step": 27840 }, { "epoch": 0.5027661618240779, "grad_norm": 0.49308615922927856, "learning_rate": 9.912928447574765e-06, "loss": 0.1974, "step": 27845 }, { "epoch": 0.5028564412569786, "grad_norm": 0.44969847798347473, "learning_rate": 9.910092315872964e-06, "loss": 0.2235, "step": 27850 }, { "epoch": 0.5029467206898793, "grad_norm": 0.46338215470314026, "learning_rate": 9.907256191403582e-06, "loss": 0.3144, "step": 27855 }, { "epoch": 0.5030370001227801, "grad_norm": 0.4935147166252136, "learning_rate": 9.904420074394762e-06, "loss": 0.2248, "step": 27860 }, { "epoch": 0.5031272795556807, "grad_norm": 0.5603335499763489, "learning_rate": 9.90158396507466e-06, "loss": 0.2033, "step": 27865 }, { "epoch": 0.5032175589885814, "grad_norm": 0.5353633165359497, "learning_rate": 9.898747863671404e-06, "loss": 0.296, "step": 27870 }, { "epoch": 0.5033078384214822, "grad_norm": 0.3191150724887848, "learning_rate": 9.895911770413152e-06, "loss": 0.2318, "step": 27875 }, { "epoch": 0.5033981178543829, "grad_norm": 0.6160486936569214, "learning_rate": 9.893075685528037e-06, "loss": 0.2417, "step": 27880 }, { "epoch": 0.5034883972872836, "grad_norm": 0.6783169507980347, "learning_rate": 9.89023960924421e-06, "loss": 0.2349, "step": 27885 }, { "epoch": 0.5035786767201843, "grad_norm": 0.6717947721481323, "learning_rate": 9.887403541789804e-06, "loss": 0.2927, "step": 27890 }, { "epoch": 0.503668956153085, "grad_norm": 0.36918288469314575, "learning_rate": 9.88456748339297e-06, "loss": 0.3937, "step": 27895 }, { "epoch": 0.5037592355859858, "grad_norm": 0.4017901122570038, "learning_rate": 9.881731434281838e-06, "loss": 0.1621, "step": 27900 }, { "epoch": 0.5038495150188864, "grad_norm": 0.5388122797012329, "learning_rate": 9.878895394684554e-06, "loss": 0.3263, "step": 27905 }, { "epoch": 0.5039397944517872, "grad_norm": 0.5903985500335693, "learning_rate": 9.876059364829257e-06, "loss": 0.2077, "step": 27910 }, { "epoch": 0.5040300738846879, "grad_norm": 0.36499443650245667, "learning_rate": 9.873223344944078e-06, "loss": 0.1709, "step": 27915 }, { "epoch": 0.5041203533175886, "grad_norm": 0.5463023781776428, "learning_rate": 9.870387335257162e-06, "loss": 0.2483, "step": 27920 }, { "epoch": 0.5042106327504893, "grad_norm": 0.4573571979999542, "learning_rate": 9.867551335996641e-06, "loss": 0.2362, "step": 27925 }, { "epoch": 0.50430091218339, "grad_norm": 0.3257332444190979, "learning_rate": 9.864715347390656e-06, "loss": 0.2524, "step": 27930 }, { "epoch": 0.5043911916162908, "grad_norm": 0.45857176184654236, "learning_rate": 9.861879369667334e-06, "loss": 0.2533, "step": 27935 }, { "epoch": 0.5044814710491915, "grad_norm": 0.4118990898132324, "learning_rate": 9.859043403054813e-06, "loss": 0.2963, "step": 27940 }, { "epoch": 0.5045717504820921, "grad_norm": 0.3844326436519623, "learning_rate": 9.856207447781225e-06, "loss": 0.2782, "step": 27945 }, { "epoch": 0.5046620299149929, "grad_norm": 0.6469088792800903, "learning_rate": 9.853371504074703e-06, "loss": 0.2838, "step": 27950 }, { "epoch": 0.5047523093478936, "grad_norm": 0.3941665291786194, "learning_rate": 9.850535572163377e-06, "loss": 0.2362, "step": 27955 }, { "epoch": 0.5048425887807944, "grad_norm": 0.4124378263950348, "learning_rate": 9.84769965227538e-06, "loss": 0.3276, "step": 27960 }, { "epoch": 0.504932868213695, "grad_norm": 0.6127313375473022, "learning_rate": 9.844863744638836e-06, "loss": 0.2372, "step": 27965 }, { "epoch": 0.5050231476465957, "grad_norm": 0.522842288017273, "learning_rate": 9.842027849481876e-06, "loss": 0.2822, "step": 27970 }, { "epoch": 0.5051134270794965, "grad_norm": 0.34865602850914, "learning_rate": 9.839191967032626e-06, "loss": 0.2357, "step": 27975 }, { "epoch": 0.5052037065123972, "grad_norm": 0.3709487020969391, "learning_rate": 9.836356097519217e-06, "loss": 0.1587, "step": 27980 }, { "epoch": 0.5052939859452978, "grad_norm": 0.26086875796318054, "learning_rate": 9.833520241169767e-06, "loss": 0.2099, "step": 27985 }, { "epoch": 0.5053842653781986, "grad_norm": 0.40823107957839966, "learning_rate": 9.830684398212403e-06, "loss": 0.2308, "step": 27990 }, { "epoch": 0.5054745448110993, "grad_norm": 0.47882601618766785, "learning_rate": 9.827848568875248e-06, "loss": 0.2709, "step": 27995 }, { "epoch": 0.5055648242440001, "grad_norm": 0.45627081394195557, "learning_rate": 9.825012753386427e-06, "loss": 0.2869, "step": 28000 }, { "epoch": 0.5056551036769007, "grad_norm": 0.8625367879867554, "learning_rate": 9.822176951974058e-06, "loss": 0.2206, "step": 28005 }, { "epoch": 0.5057453831098014, "grad_norm": 0.7245298027992249, "learning_rate": 9.819341164866255e-06, "loss": 0.2835, "step": 28010 }, { "epoch": 0.5058356625427022, "grad_norm": 0.3014034032821655, "learning_rate": 9.816505392291146e-06, "loss": 0.263, "step": 28015 }, { "epoch": 0.5059259419756029, "grad_norm": 0.40651869773864746, "learning_rate": 9.813669634476842e-06, "loss": 0.2666, "step": 28020 }, { "epoch": 0.5060162214085036, "grad_norm": 0.6607047319412231, "learning_rate": 9.810833891651463e-06, "loss": 0.277, "step": 28025 }, { "epoch": 0.5061065008414043, "grad_norm": 0.5369471311569214, "learning_rate": 9.80799816404312e-06, "loss": 0.1842, "step": 28030 }, { "epoch": 0.506196780274305, "grad_norm": 0.5690027475357056, "learning_rate": 9.805162451879931e-06, "loss": 0.2852, "step": 28035 }, { "epoch": 0.5062870597072058, "grad_norm": 0.5109096765518188, "learning_rate": 9.802326755390002e-06, "loss": 0.2249, "step": 28040 }, { "epoch": 0.5063773391401064, "grad_norm": 0.2942725419998169, "learning_rate": 9.799491074801454e-06, "loss": 0.2417, "step": 28045 }, { "epoch": 0.5064676185730072, "grad_norm": 0.667260468006134, "learning_rate": 9.796655410342386e-06, "loss": 0.2583, "step": 28050 }, { "epoch": 0.5065578980059079, "grad_norm": 0.37191522121429443, "learning_rate": 9.793819762240915e-06, "loss": 0.2346, "step": 28055 }, { "epoch": 0.5066481774388086, "grad_norm": 0.5072374939918518, "learning_rate": 9.790984130725144e-06, "loss": 0.2721, "step": 28060 }, { "epoch": 0.5067384568717093, "grad_norm": 0.4050954282283783, "learning_rate": 9.788148516023181e-06, "loss": 0.1833, "step": 28065 }, { "epoch": 0.50682873630461, "grad_norm": 0.5642181038856506, "learning_rate": 9.785312918363129e-06, "loss": 0.1655, "step": 28070 }, { "epoch": 0.5069190157375107, "grad_norm": 0.7335848808288574, "learning_rate": 9.78247733797309e-06, "loss": 0.2526, "step": 28075 }, { "epoch": 0.5070092951704115, "grad_norm": 0.5068244934082031, "learning_rate": 9.779641775081169e-06, "loss": 0.1532, "step": 28080 }, { "epoch": 0.5070995746033121, "grad_norm": 0.5320443511009216, "learning_rate": 9.776806229915466e-06, "loss": 0.1299, "step": 28085 }, { "epoch": 0.5071898540362129, "grad_norm": 0.7844002842903137, "learning_rate": 9.773970702704079e-06, "loss": 0.2342, "step": 28090 }, { "epoch": 0.5072801334691136, "grad_norm": 0.41133514046669006, "learning_rate": 9.771135193675104e-06, "loss": 0.2925, "step": 28095 }, { "epoch": 0.5073704129020143, "grad_norm": 0.359630286693573, "learning_rate": 9.76829970305664e-06, "loss": 0.1802, "step": 28100 }, { "epoch": 0.507460692334915, "grad_norm": 0.6197757720947266, "learning_rate": 9.765464231076778e-06, "loss": 0.3045, "step": 28105 }, { "epoch": 0.5075509717678157, "grad_norm": 0.5487251877784729, "learning_rate": 9.762628777963618e-06, "loss": 0.3134, "step": 28110 }, { "epoch": 0.5076412512007165, "grad_norm": 0.40097513794898987, "learning_rate": 9.759793343945242e-06, "loss": 0.2812, "step": 28115 }, { "epoch": 0.5077315306336172, "grad_norm": 0.41617217659950256, "learning_rate": 9.756957929249747e-06, "loss": 0.389, "step": 28120 }, { "epoch": 0.5078218100665178, "grad_norm": 0.3712722361087799, "learning_rate": 9.75412253410522e-06, "loss": 0.2858, "step": 28125 }, { "epoch": 0.5079120894994186, "grad_norm": 0.520553469657898, "learning_rate": 9.751287158739748e-06, "loss": 0.2226, "step": 28130 }, { "epoch": 0.5080023689323193, "grad_norm": 0.2953408658504486, "learning_rate": 9.748451803381414e-06, "loss": 0.2733, "step": 28135 }, { "epoch": 0.5080926483652201, "grad_norm": 0.4719192683696747, "learning_rate": 9.745616468258305e-06, "loss": 0.1934, "step": 28140 }, { "epoch": 0.5081829277981208, "grad_norm": 0.5132400989532471, "learning_rate": 9.742781153598499e-06, "loss": 0.2645, "step": 28145 }, { "epoch": 0.5082732072310214, "grad_norm": 0.4969528019428253, "learning_rate": 9.739945859630082e-06, "loss": 0.2432, "step": 28150 }, { "epoch": 0.5083634866639222, "grad_norm": 0.42903774976730347, "learning_rate": 9.737110586581125e-06, "loss": 0.1691, "step": 28155 }, { "epoch": 0.5084537660968229, "grad_norm": 0.4738997519016266, "learning_rate": 9.734275334679717e-06, "loss": 0.2364, "step": 28160 }, { "epoch": 0.5085440455297237, "grad_norm": 0.4509764611721039, "learning_rate": 9.731440104153921e-06, "loss": 0.252, "step": 28165 }, { "epoch": 0.5086343249626243, "grad_norm": 0.7997400164604187, "learning_rate": 9.728604895231819e-06, "loss": 0.2634, "step": 28170 }, { "epoch": 0.508724604395525, "grad_norm": 0.5595467686653137, "learning_rate": 9.725769708141476e-06, "loss": 0.3151, "step": 28175 }, { "epoch": 0.5088148838284258, "grad_norm": 0.6390929222106934, "learning_rate": 9.722934543110972e-06, "loss": 0.2556, "step": 28180 }, { "epoch": 0.5089051632613265, "grad_norm": 0.5860655903816223, "learning_rate": 9.720099400368369e-06, "loss": 0.2759, "step": 28185 }, { "epoch": 0.5089954426942271, "grad_norm": 0.4354051649570465, "learning_rate": 9.71726428014173e-06, "loss": 0.3003, "step": 28190 }, { "epoch": 0.5090857221271279, "grad_norm": 0.41822466254234314, "learning_rate": 9.714429182659127e-06, "loss": 0.2826, "step": 28195 }, { "epoch": 0.5091760015600286, "grad_norm": 0.4065977931022644, "learning_rate": 9.711594108148618e-06, "loss": 0.346, "step": 28200 }, { "epoch": 0.5092662809929294, "grad_norm": 0.9190388321876526, "learning_rate": 9.708759056838266e-06, "loss": 0.2788, "step": 28205 }, { "epoch": 0.50935656042583, "grad_norm": 0.689323902130127, "learning_rate": 9.70592402895613e-06, "loss": 0.2443, "step": 28210 }, { "epoch": 0.5094468398587307, "grad_norm": 0.24661704897880554, "learning_rate": 9.703089024730269e-06, "loss": 0.2348, "step": 28215 }, { "epoch": 0.5095371192916315, "grad_norm": 0.7008240818977356, "learning_rate": 9.700254044388735e-06, "loss": 0.2048, "step": 28220 }, { "epoch": 0.5096273987245322, "grad_norm": 0.3344728350639343, "learning_rate": 9.697419088159589e-06, "loss": 0.2509, "step": 28225 }, { "epoch": 0.5097176781574329, "grad_norm": 0.4515267610549927, "learning_rate": 9.694584156270872e-06, "loss": 0.2501, "step": 28230 }, { "epoch": 0.5098079575903336, "grad_norm": 0.46323707699775696, "learning_rate": 9.691749248950642e-06, "loss": 0.2469, "step": 28235 }, { "epoch": 0.5098982370232343, "grad_norm": 0.5240350365638733, "learning_rate": 9.688914366426942e-06, "loss": 0.3135, "step": 28240 }, { "epoch": 0.5099885164561351, "grad_norm": 0.47504279017448425, "learning_rate": 9.686079508927823e-06, "loss": 0.224, "step": 28245 }, { "epoch": 0.5100787958890357, "grad_norm": 0.2030981481075287, "learning_rate": 9.683244676681321e-06, "loss": 0.2984, "step": 28250 }, { "epoch": 0.5101690753219364, "grad_norm": 0.41279366612434387, "learning_rate": 9.680409869915482e-06, "loss": 0.2637, "step": 28255 }, { "epoch": 0.5102593547548372, "grad_norm": 0.7960222959518433, "learning_rate": 9.677575088858346e-06, "loss": 0.3638, "step": 28260 }, { "epoch": 0.5103496341877379, "grad_norm": 0.5688741207122803, "learning_rate": 9.674740333737952e-06, "loss": 0.2385, "step": 28265 }, { "epoch": 0.5104399136206386, "grad_norm": 0.2537517547607422, "learning_rate": 9.671905604782327e-06, "loss": 0.2378, "step": 28270 }, { "epoch": 0.5105301930535393, "grad_norm": 0.549437403678894, "learning_rate": 9.669070902219519e-06, "loss": 0.2271, "step": 28275 }, { "epoch": 0.51062047248644, "grad_norm": 0.6095982193946838, "learning_rate": 9.666236226277548e-06, "loss": 0.3151, "step": 28280 }, { "epoch": 0.5107107519193408, "grad_norm": 0.3918159008026123, "learning_rate": 9.663401577184443e-06, "loss": 0.1602, "step": 28285 }, { "epoch": 0.5108010313522414, "grad_norm": 0.6957646012306213, "learning_rate": 9.660566955168239e-06, "loss": 0.398, "step": 28290 }, { "epoch": 0.5108913107851422, "grad_norm": 0.4322679936885834, "learning_rate": 9.657732360456951e-06, "loss": 0.2227, "step": 28295 }, { "epoch": 0.5109815902180429, "grad_norm": 0.5070502758026123, "learning_rate": 9.654897793278607e-06, "loss": 0.1661, "step": 28300 }, { "epoch": 0.5110718696509436, "grad_norm": 0.2714361846446991, "learning_rate": 9.652063253861227e-06, "loss": 0.27, "step": 28305 }, { "epoch": 0.5111621490838443, "grad_norm": 0.4892715811729431, "learning_rate": 9.649228742432829e-06, "loss": 0.2854, "step": 28310 }, { "epoch": 0.511252428516745, "grad_norm": 0.44520044326782227, "learning_rate": 9.646394259221426e-06, "loss": 0.2486, "step": 28315 }, { "epoch": 0.5113427079496458, "grad_norm": 0.2912124693393707, "learning_rate": 9.643559804455037e-06, "loss": 0.2223, "step": 28320 }, { "epoch": 0.5114329873825465, "grad_norm": 0.2975776493549347, "learning_rate": 9.640725378361666e-06, "loss": 0.1979, "step": 28325 }, { "epoch": 0.5115232668154471, "grad_norm": 0.2630120813846588, "learning_rate": 9.637890981169331e-06, "loss": 0.3285, "step": 28330 }, { "epoch": 0.5116135462483479, "grad_norm": 0.3051246106624603, "learning_rate": 9.635056613106029e-06, "loss": 0.2442, "step": 28335 }, { "epoch": 0.5117038256812486, "grad_norm": 0.4141266942024231, "learning_rate": 9.632222274399773e-06, "loss": 0.2679, "step": 28340 }, { "epoch": 0.5117941051141494, "grad_norm": 0.44740721583366394, "learning_rate": 9.629387965278558e-06, "loss": 0.2184, "step": 28345 }, { "epoch": 0.51188438454705, "grad_norm": 0.31492388248443604, "learning_rate": 9.62655368597039e-06, "loss": 0.2088, "step": 28350 }, { "epoch": 0.5119746639799507, "grad_norm": 0.38207197189331055, "learning_rate": 9.623719436703258e-06, "loss": 0.2585, "step": 28355 }, { "epoch": 0.5120649434128515, "grad_norm": 0.4363836944103241, "learning_rate": 9.620885217705167e-06, "loss": 0.2704, "step": 28360 }, { "epoch": 0.5121552228457522, "grad_norm": 0.4479374289512634, "learning_rate": 9.618051029204101e-06, "loss": 0.2479, "step": 28365 }, { "epoch": 0.5122455022786528, "grad_norm": 0.2569049596786499, "learning_rate": 9.615216871428053e-06, "loss": 0.187, "step": 28370 }, { "epoch": 0.5123357817115536, "grad_norm": 0.3158555030822754, "learning_rate": 9.612382744605012e-06, "loss": 0.2949, "step": 28375 }, { "epoch": 0.5124260611444543, "grad_norm": 0.37251681089401245, "learning_rate": 9.609548648962957e-06, "loss": 0.2353, "step": 28380 }, { "epoch": 0.5125163405773551, "grad_norm": 0.4977036416530609, "learning_rate": 9.606714584729877e-06, "loss": 0.2804, "step": 28385 }, { "epoch": 0.5126066200102557, "grad_norm": 0.6994753479957581, "learning_rate": 9.603880552133746e-06, "loss": 0.2686, "step": 28390 }, { "epoch": 0.5126968994431564, "grad_norm": 0.5312997698783875, "learning_rate": 9.60104655140255e-06, "loss": 0.2027, "step": 28395 }, { "epoch": 0.5127871788760572, "grad_norm": 0.33299341797828674, "learning_rate": 9.598212582764253e-06, "loss": 0.2144, "step": 28400 }, { "epoch": 0.5128774583089579, "grad_norm": 0.3861413300037384, "learning_rate": 9.595378646446837e-06, "loss": 0.2526, "step": 28405 }, { "epoch": 0.5129677377418586, "grad_norm": 0.4368645250797272, "learning_rate": 9.592544742678263e-06, "loss": 0.3002, "step": 28410 }, { "epoch": 0.5130580171747593, "grad_norm": 0.49991172552108765, "learning_rate": 9.589710871686503e-06, "loss": 0.2944, "step": 28415 }, { "epoch": 0.51314829660766, "grad_norm": 0.6610318422317505, "learning_rate": 9.58687703369952e-06, "loss": 0.2078, "step": 28420 }, { "epoch": 0.5132385760405608, "grad_norm": 0.35077694058418274, "learning_rate": 9.584043228945278e-06, "loss": 0.2142, "step": 28425 }, { "epoch": 0.5133288554734614, "grad_norm": 0.5012067556381226, "learning_rate": 9.581209457651732e-06, "loss": 0.2189, "step": 28430 }, { "epoch": 0.5134191349063622, "grad_norm": 0.435333788394928, "learning_rate": 9.578375720046841e-06, "loss": 0.3415, "step": 28435 }, { "epoch": 0.5135094143392629, "grad_norm": 0.35054147243499756, "learning_rate": 9.575542016358556e-06, "loss": 0.1883, "step": 28440 }, { "epoch": 0.5135996937721636, "grad_norm": 0.3902779519557953, "learning_rate": 9.572708346814836e-06, "loss": 0.2516, "step": 28445 }, { "epoch": 0.5136899732050643, "grad_norm": 0.4505479037761688, "learning_rate": 9.569874711643616e-06, "loss": 0.2091, "step": 28450 }, { "epoch": 0.513780252637965, "grad_norm": 0.4965774714946747, "learning_rate": 9.56704111107285e-06, "loss": 0.2457, "step": 28455 }, { "epoch": 0.5138705320708657, "grad_norm": 0.661870539188385, "learning_rate": 9.564207545330482e-06, "loss": 0.1949, "step": 28460 }, { "epoch": 0.5139608115037665, "grad_norm": 0.6075689792633057, "learning_rate": 9.561374014644443e-06, "loss": 0.2481, "step": 28465 }, { "epoch": 0.5140510909366671, "grad_norm": 0.36879128217697144, "learning_rate": 9.55854051924268e-06, "loss": 0.2393, "step": 28470 }, { "epoch": 0.5141413703695679, "grad_norm": 0.6054572463035583, "learning_rate": 9.555707059353118e-06, "loss": 0.1553, "step": 28475 }, { "epoch": 0.5142316498024686, "grad_norm": 0.5598069429397583, "learning_rate": 9.552873635203696e-06, "loss": 0.2656, "step": 28480 }, { "epoch": 0.5143219292353693, "grad_norm": 0.4982486367225647, "learning_rate": 9.550040247022337e-06, "loss": 0.1776, "step": 28485 }, { "epoch": 0.51441220866827, "grad_norm": 0.6931257843971252, "learning_rate": 9.547206895036973e-06, "loss": 0.2757, "step": 28490 }, { "epoch": 0.5145024881011707, "grad_norm": 0.31999334692955017, "learning_rate": 9.544373579475518e-06, "loss": 0.2901, "step": 28495 }, { "epoch": 0.5145927675340715, "grad_norm": 0.4983128309249878, "learning_rate": 9.541540300565897e-06, "loss": 0.0993, "step": 28500 }, { "epoch": 0.5146830469669722, "grad_norm": 0.5988265872001648, "learning_rate": 9.538707058536024e-06, "loss": 0.232, "step": 28505 }, { "epoch": 0.5147733263998728, "grad_norm": 0.5505892634391785, "learning_rate": 9.535873853613818e-06, "loss": 0.3384, "step": 28510 }, { "epoch": 0.5148636058327736, "grad_norm": 0.342400461435318, "learning_rate": 9.533040686027183e-06, "loss": 0.1917, "step": 28515 }, { "epoch": 0.5149538852656743, "grad_norm": 0.3296906054019928, "learning_rate": 9.53020755600403e-06, "loss": 0.2454, "step": 28520 }, { "epoch": 0.5150441646985751, "grad_norm": 0.256753146648407, "learning_rate": 9.527374463772262e-06, "loss": 0.2047, "step": 28525 }, { "epoch": 0.5151344441314757, "grad_norm": 0.350436806678772, "learning_rate": 9.524541409559787e-06, "loss": 0.3151, "step": 28530 }, { "epoch": 0.5152247235643764, "grad_norm": 0.39108237624168396, "learning_rate": 9.52170839359449e-06, "loss": 0.2864, "step": 28535 }, { "epoch": 0.5153150029972772, "grad_norm": 0.3559560477733612, "learning_rate": 9.518875416104284e-06, "loss": 0.1883, "step": 28540 }, { "epoch": 0.5154052824301779, "grad_norm": 0.40671393275260925, "learning_rate": 9.516042477317049e-06, "loss": 0.2448, "step": 28545 }, { "epoch": 0.5154955618630787, "grad_norm": 0.5957432389259338, "learning_rate": 9.513209577460679e-06, "loss": 0.2479, "step": 28550 }, { "epoch": 0.5155858412959793, "grad_norm": 0.4093860685825348, "learning_rate": 9.510376716763062e-06, "loss": 0.256, "step": 28555 }, { "epoch": 0.51567612072888, "grad_norm": 0.5931309461593628, "learning_rate": 9.507543895452072e-06, "loss": 0.2012, "step": 28560 }, { "epoch": 0.5157664001617808, "grad_norm": 0.5393112897872925, "learning_rate": 9.504711113755599e-06, "loss": 0.3817, "step": 28565 }, { "epoch": 0.5158566795946815, "grad_norm": 0.3842506408691406, "learning_rate": 9.501878371901512e-06, "loss": 0.2598, "step": 28570 }, { "epoch": 0.5159469590275821, "grad_norm": 0.44890961050987244, "learning_rate": 9.499045670117693e-06, "loss": 0.2563, "step": 28575 }, { "epoch": 0.5160372384604829, "grad_norm": 0.36904144287109375, "learning_rate": 9.496213008632006e-06, "loss": 0.242, "step": 28580 }, { "epoch": 0.5161275178933836, "grad_norm": 0.4298558235168457, "learning_rate": 9.493380387672318e-06, "loss": 0.3049, "step": 28585 }, { "epoch": 0.5162177973262844, "grad_norm": 0.36553990840911865, "learning_rate": 9.490547807466494e-06, "loss": 0.2364, "step": 28590 }, { "epoch": 0.516308076759185, "grad_norm": 0.6329402327537537, "learning_rate": 9.487715268242399e-06, "loss": 0.2018, "step": 28595 }, { "epoch": 0.5163983561920857, "grad_norm": 0.34179818630218506, "learning_rate": 9.48488277022788e-06, "loss": 0.3315, "step": 28600 }, { "epoch": 0.5164886356249865, "grad_norm": 0.46953728795051575, "learning_rate": 9.482050313650803e-06, "loss": 0.2657, "step": 28605 }, { "epoch": 0.5165789150578872, "grad_norm": 0.31157082319259644, "learning_rate": 9.479217898739007e-06, "loss": 0.2288, "step": 28610 }, { "epoch": 0.5166691944907879, "grad_norm": 0.4610215425491333, "learning_rate": 9.476385525720347e-06, "loss": 0.2543, "step": 28615 }, { "epoch": 0.5167594739236886, "grad_norm": 0.2740088701248169, "learning_rate": 9.473553194822663e-06, "loss": 0.2235, "step": 28620 }, { "epoch": 0.5168497533565893, "grad_norm": 0.5691370964050293, "learning_rate": 9.470720906273802e-06, "loss": 0.2223, "step": 28625 }, { "epoch": 0.5169400327894901, "grad_norm": 2.938239097595215, "learning_rate": 9.46788866030159e-06, "loss": 0.2719, "step": 28630 }, { "epoch": 0.5170303122223907, "grad_norm": 0.3056987524032593, "learning_rate": 9.46505645713387e-06, "loss": 0.2005, "step": 28635 }, { "epoch": 0.5171205916552915, "grad_norm": 0.746619462966919, "learning_rate": 9.462224296998465e-06, "loss": 0.2983, "step": 28640 }, { "epoch": 0.5172108710881922, "grad_norm": 0.5389447808265686, "learning_rate": 9.45939218012321e-06, "loss": 0.3343, "step": 28645 }, { "epoch": 0.5173011505210929, "grad_norm": 0.3819253444671631, "learning_rate": 9.456560106735922e-06, "loss": 0.2068, "step": 28650 }, { "epoch": 0.5173914299539936, "grad_norm": 0.3484255373477936, "learning_rate": 9.453728077064418e-06, "loss": 0.2132, "step": 28655 }, { "epoch": 0.5174817093868943, "grad_norm": 0.3534581661224365, "learning_rate": 9.450896091336525e-06, "loss": 0.1971, "step": 28660 }, { "epoch": 0.517571988819795, "grad_norm": 0.7409288883209229, "learning_rate": 9.448064149780043e-06, "loss": 0.1966, "step": 28665 }, { "epoch": 0.5176622682526958, "grad_norm": 0.36281609535217285, "learning_rate": 9.445232252622792e-06, "loss": 0.253, "step": 28670 }, { "epoch": 0.5177525476855964, "grad_norm": 0.4301600754261017, "learning_rate": 9.44240040009257e-06, "loss": 0.2761, "step": 28675 }, { "epoch": 0.5178428271184972, "grad_norm": 0.47277727723121643, "learning_rate": 9.439568592417182e-06, "loss": 0.2723, "step": 28680 }, { "epoch": 0.5179331065513979, "grad_norm": 0.884819507598877, "learning_rate": 9.436736829824425e-06, "loss": 0.2927, "step": 28685 }, { "epoch": 0.5180233859842986, "grad_norm": 0.37751275300979614, "learning_rate": 9.433905112542098e-06, "loss": 0.2587, "step": 28690 }, { "epoch": 0.5181136654171993, "grad_norm": 0.5852225422859192, "learning_rate": 9.431073440797987e-06, "loss": 0.2184, "step": 28695 }, { "epoch": 0.5182039448501, "grad_norm": 0.4749210476875305, "learning_rate": 9.42824181481988e-06, "loss": 0.3444, "step": 28700 }, { "epoch": 0.5182942242830008, "grad_norm": 0.2995322644710541, "learning_rate": 9.425410234835562e-06, "loss": 0.1988, "step": 28705 }, { "epoch": 0.5183845037159015, "grad_norm": 0.5372695922851562, "learning_rate": 9.422578701072816e-06, "loss": 0.212, "step": 28710 }, { "epoch": 0.5184747831488021, "grad_norm": 0.37080609798431396, "learning_rate": 9.419747213759412e-06, "loss": 0.2895, "step": 28715 }, { "epoch": 0.5185650625817029, "grad_norm": 0.16475968062877655, "learning_rate": 9.416915773123128e-06, "loss": 0.226, "step": 28720 }, { "epoch": 0.5186553420146036, "grad_norm": 0.5332977771759033, "learning_rate": 9.414084379391729e-06, "loss": 0.2959, "step": 28725 }, { "epoch": 0.5187456214475044, "grad_norm": 0.5752127766609192, "learning_rate": 9.411253032792981e-06, "loss": 0.2302, "step": 28730 }, { "epoch": 0.518835900880405, "grad_norm": 0.9700411558151245, "learning_rate": 9.40842173355465e-06, "loss": 0.3014, "step": 28735 }, { "epoch": 0.5189261803133057, "grad_norm": 0.66222083568573, "learning_rate": 9.405590481904485e-06, "loss": 0.2954, "step": 28740 }, { "epoch": 0.5190164597462065, "grad_norm": 0.3779366612434387, "learning_rate": 9.402759278070246e-06, "loss": 0.2753, "step": 28745 }, { "epoch": 0.5191067391791072, "grad_norm": 0.5842838883399963, "learning_rate": 9.399928122279678e-06, "loss": 0.2887, "step": 28750 }, { "epoch": 0.5191970186120078, "grad_norm": 0.7366604208946228, "learning_rate": 9.397097014760535e-06, "loss": 0.2706, "step": 28755 }, { "epoch": 0.5192872980449086, "grad_norm": 0.4361642301082611, "learning_rate": 9.39426595574055e-06, "loss": 0.1711, "step": 28760 }, { "epoch": 0.5193775774778093, "grad_norm": 0.6456303596496582, "learning_rate": 9.391434945447463e-06, "loss": 0.2822, "step": 28765 }, { "epoch": 0.5194678569107101, "grad_norm": 0.5982512831687927, "learning_rate": 9.388603984109011e-06, "loss": 0.2058, "step": 28770 }, { "epoch": 0.5195581363436107, "grad_norm": 0.45432358980178833, "learning_rate": 9.385773071952928e-06, "loss": 0.2887, "step": 28775 }, { "epoch": 0.5196484157765114, "grad_norm": 0.3200981318950653, "learning_rate": 9.38294220920693e-06, "loss": 0.2377, "step": 28780 }, { "epoch": 0.5197386952094122, "grad_norm": 0.5555515289306641, "learning_rate": 9.380111396098748e-06, "loss": 0.2141, "step": 28785 }, { "epoch": 0.5198289746423129, "grad_norm": 0.390817254781723, "learning_rate": 9.377280632856094e-06, "loss": 0.2542, "step": 28790 }, { "epoch": 0.5199192540752136, "grad_norm": 0.46619880199432373, "learning_rate": 9.374449919706687e-06, "loss": 0.2805, "step": 28795 }, { "epoch": 0.5200095335081143, "grad_norm": 0.2127184420824051, "learning_rate": 9.371619256878236e-06, "loss": 0.153, "step": 28800 }, { "epoch": 0.520099812941015, "grad_norm": 0.7713163495063782, "learning_rate": 9.368788644598449e-06, "loss": 0.1596, "step": 28805 }, { "epoch": 0.5201900923739158, "grad_norm": 0.3343033194541931, "learning_rate": 9.365958083095023e-06, "loss": 0.2513, "step": 28810 }, { "epoch": 0.5202803718068164, "grad_norm": 0.5713295936584473, "learning_rate": 9.36312757259566e-06, "loss": 0.2409, "step": 28815 }, { "epoch": 0.5203706512397172, "grad_norm": 0.7380322813987732, "learning_rate": 9.360297113328052e-06, "loss": 0.3282, "step": 28820 }, { "epoch": 0.5204609306726179, "grad_norm": 0.6009113788604736, "learning_rate": 9.357466705519895e-06, "loss": 0.2825, "step": 28825 }, { "epoch": 0.5205512101055186, "grad_norm": 0.42540380358695984, "learning_rate": 9.354636349398868e-06, "loss": 0.205, "step": 28830 }, { "epoch": 0.5206414895384193, "grad_norm": 0.7028429508209229, "learning_rate": 9.35180604519265e-06, "loss": 0.2577, "step": 28835 }, { "epoch": 0.52073176897132, "grad_norm": 0.40496763586997986, "learning_rate": 9.34897579312893e-06, "loss": 0.2601, "step": 28840 }, { "epoch": 0.5208220484042207, "grad_norm": 0.4968245327472687, "learning_rate": 9.346145593435369e-06, "loss": 0.3397, "step": 28845 }, { "epoch": 0.5209123278371215, "grad_norm": 0.33956199884414673, "learning_rate": 9.343315446339643e-06, "loss": 0.2776, "step": 28850 }, { "epoch": 0.5210026072700221, "grad_norm": 0.5444244742393494, "learning_rate": 9.340485352069411e-06, "loss": 0.1788, "step": 28855 }, { "epoch": 0.5210928867029229, "grad_norm": 0.6133355498313904, "learning_rate": 9.33765531085234e-06, "loss": 0.1939, "step": 28860 }, { "epoch": 0.5211831661358236, "grad_norm": 0.6628184914588928, "learning_rate": 9.334825322916084e-06, "loss": 0.2833, "step": 28865 }, { "epoch": 0.5212734455687243, "grad_norm": 0.39197325706481934, "learning_rate": 9.331995388488296e-06, "loss": 0.1877, "step": 28870 }, { "epoch": 0.521363725001625, "grad_norm": 0.42335212230682373, "learning_rate": 9.329165507796619e-06, "loss": 0.3009, "step": 28875 }, { "epoch": 0.5214540044345257, "grad_norm": 0.61272794008255, "learning_rate": 9.326335681068703e-06, "loss": 0.3649, "step": 28880 }, { "epoch": 0.5215442838674265, "grad_norm": 1.6266295909881592, "learning_rate": 9.32350590853218e-06, "loss": 0.2248, "step": 28885 }, { "epoch": 0.5216345633003272, "grad_norm": 0.4243999719619751, "learning_rate": 9.32067619041469e-06, "loss": 0.2775, "step": 28890 }, { "epoch": 0.5217248427332278, "grad_norm": 0.4876992106437683, "learning_rate": 9.31784652694386e-06, "loss": 0.2488, "step": 28895 }, { "epoch": 0.5218151221661286, "grad_norm": 0.3923934996128082, "learning_rate": 9.315016918347318e-06, "loss": 0.2923, "step": 28900 }, { "epoch": 0.5219054015990293, "grad_norm": 0.452089786529541, "learning_rate": 9.312187364852683e-06, "loss": 0.2258, "step": 28905 }, { "epoch": 0.5219956810319301, "grad_norm": 0.619631826877594, "learning_rate": 9.309357866687578e-06, "loss": 0.3225, "step": 28910 }, { "epoch": 0.5220859604648307, "grad_norm": 0.3504837155342102, "learning_rate": 9.306528424079606e-06, "loss": 0.238, "step": 28915 }, { "epoch": 0.5221762398977314, "grad_norm": 0.3095729351043701, "learning_rate": 9.303699037256384e-06, "loss": 0.23, "step": 28920 }, { "epoch": 0.5222665193306322, "grad_norm": 0.48953840136528015, "learning_rate": 9.30086970644551e-06, "loss": 0.2503, "step": 28925 }, { "epoch": 0.5223567987635329, "grad_norm": 0.4810202717781067, "learning_rate": 9.298040431874584e-06, "loss": 0.3123, "step": 28930 }, { "epoch": 0.5224470781964337, "grad_norm": 0.3621257245540619, "learning_rate": 9.295211213771206e-06, "loss": 0.1933, "step": 28935 }, { "epoch": 0.5225373576293343, "grad_norm": 0.3242107331752777, "learning_rate": 9.292382052362955e-06, "loss": 0.2264, "step": 28940 }, { "epoch": 0.522627637062235, "grad_norm": 0.7714591026306152, "learning_rate": 9.289552947877428e-06, "loss": 0.2266, "step": 28945 }, { "epoch": 0.5227179164951358, "grad_norm": 0.3129047751426697, "learning_rate": 9.286723900542199e-06, "loss": 0.2461, "step": 28950 }, { "epoch": 0.5228081959280365, "grad_norm": 0.5419982075691223, "learning_rate": 9.283894910584848e-06, "loss": 0.1536, "step": 28955 }, { "epoch": 0.5228984753609371, "grad_norm": 0.39662978053092957, "learning_rate": 9.28106597823294e-06, "loss": 0.2931, "step": 28960 }, { "epoch": 0.5229887547938379, "grad_norm": 0.45678624510765076, "learning_rate": 9.278237103714053e-06, "loss": 0.2421, "step": 28965 }, { "epoch": 0.5230790342267386, "grad_norm": 0.5336384177207947, "learning_rate": 9.275408287255738e-06, "loss": 0.228, "step": 28970 }, { "epoch": 0.5231693136596394, "grad_norm": 0.5243151187896729, "learning_rate": 9.272579529085564e-06, "loss": 0.2832, "step": 28975 }, { "epoch": 0.52325959309254, "grad_norm": 0.4475562870502472, "learning_rate": 9.269750829431073e-06, "loss": 0.2219, "step": 28980 }, { "epoch": 0.5233498725254407, "grad_norm": 0.4101310670375824, "learning_rate": 9.26692218851982e-06, "loss": 0.2665, "step": 28985 }, { "epoch": 0.5234401519583415, "grad_norm": 0.5185317993164062, "learning_rate": 9.264093606579346e-06, "loss": 0.2303, "step": 28990 }, { "epoch": 0.5235304313912422, "grad_norm": 0.5011386871337891, "learning_rate": 9.261265083837192e-06, "loss": 0.1616, "step": 28995 }, { "epoch": 0.5236207108241429, "grad_norm": 0.2844066917896271, "learning_rate": 9.258436620520888e-06, "loss": 0.2353, "step": 29000 }, { "epoch": 0.5237109902570436, "grad_norm": 0.4489717483520508, "learning_rate": 9.255608216857972e-06, "loss": 0.3063, "step": 29005 }, { "epoch": 0.5238012696899443, "grad_norm": 0.433130145072937, "learning_rate": 9.25277987307596e-06, "loss": 0.2162, "step": 29010 }, { "epoch": 0.5238915491228451, "grad_norm": 0.4645278751850128, "learning_rate": 9.249951589402371e-06, "loss": 0.262, "step": 29015 }, { "epoch": 0.5239818285557457, "grad_norm": 0.46296849846839905, "learning_rate": 9.24712336606473e-06, "loss": 0.2047, "step": 29020 }, { "epoch": 0.5240721079886465, "grad_norm": 0.5372308492660522, "learning_rate": 9.244295203290533e-06, "loss": 0.1853, "step": 29025 }, { "epoch": 0.5241623874215472, "grad_norm": 0.4358402490615845, "learning_rate": 9.241467101307298e-06, "loss": 0.1978, "step": 29030 }, { "epoch": 0.5242526668544479, "grad_norm": 0.730522871017456, "learning_rate": 9.238639060342515e-06, "loss": 0.2102, "step": 29035 }, { "epoch": 0.5243429462873486, "grad_norm": 0.5406123995780945, "learning_rate": 9.235811080623688e-06, "loss": 0.3067, "step": 29040 }, { "epoch": 0.5244332257202493, "grad_norm": 0.4470652639865875, "learning_rate": 9.232983162378301e-06, "loss": 0.2268, "step": 29045 }, { "epoch": 0.52452350515315, "grad_norm": 0.4234684705734253, "learning_rate": 9.230155305833843e-06, "loss": 0.2263, "step": 29050 }, { "epoch": 0.5246137845860508, "grad_norm": 1.0069785118103027, "learning_rate": 9.227327511217792e-06, "loss": 0.2249, "step": 29055 }, { "epoch": 0.5247040640189514, "grad_norm": 0.7058039307594299, "learning_rate": 9.224499778757625e-06, "loss": 0.2124, "step": 29060 }, { "epoch": 0.5247943434518522, "grad_norm": 0.5813343524932861, "learning_rate": 9.221672108680812e-06, "loss": 0.2509, "step": 29065 }, { "epoch": 0.5248846228847529, "grad_norm": 0.24793928861618042, "learning_rate": 9.218844501214824e-06, "loss": 0.2581, "step": 29070 }, { "epoch": 0.5249749023176536, "grad_norm": 0.3823011815547943, "learning_rate": 9.21601695658711e-06, "loss": 0.2139, "step": 29075 }, { "epoch": 0.5250651817505543, "grad_norm": 0.506294846534729, "learning_rate": 9.213189475025133e-06, "loss": 0.2607, "step": 29080 }, { "epoch": 0.525155461183455, "grad_norm": 0.7374309301376343, "learning_rate": 9.210362056756342e-06, "loss": 0.1908, "step": 29085 }, { "epoch": 0.5252457406163558, "grad_norm": 0.4976447820663452, "learning_rate": 9.207534702008187e-06, "loss": 0.3187, "step": 29090 }, { "epoch": 0.5253360200492565, "grad_norm": 1.3869751691818237, "learning_rate": 9.204707411008099e-06, "loss": 0.2927, "step": 29095 }, { "epoch": 0.5254262994821571, "grad_norm": 0.5298295617103577, "learning_rate": 9.201880183983518e-06, "loss": 0.2162, "step": 29100 }, { "epoch": 0.5255165789150579, "grad_norm": 0.2758086025714874, "learning_rate": 9.199053021161876e-06, "loss": 0.2581, "step": 29105 }, { "epoch": 0.5256068583479586, "grad_norm": 0.5744746923446655, "learning_rate": 9.196225922770592e-06, "loss": 0.27, "step": 29110 }, { "epoch": 0.5256971377808594, "grad_norm": 0.5285934805870056, "learning_rate": 9.193398889037088e-06, "loss": 0.3273, "step": 29115 }, { "epoch": 0.52578741721376, "grad_norm": 0.38951054215431213, "learning_rate": 9.19057192018878e-06, "loss": 0.2136, "step": 29120 }, { "epoch": 0.5258776966466607, "grad_norm": 0.5702796578407288, "learning_rate": 9.187745016453074e-06, "loss": 0.2056, "step": 29125 }, { "epoch": 0.5259679760795615, "grad_norm": 0.5056290626525879, "learning_rate": 9.184918178057378e-06, "loss": 0.2869, "step": 29130 }, { "epoch": 0.5260582555124622, "grad_norm": 0.648164689540863, "learning_rate": 9.18209140522909e-06, "loss": 0.2259, "step": 29135 }, { "epoch": 0.5261485349453628, "grad_norm": 0.3163846731185913, "learning_rate": 9.179264698195598e-06, "loss": 0.2527, "step": 29140 }, { "epoch": 0.5262388143782636, "grad_norm": 0.35842669010162354, "learning_rate": 9.176438057184295e-06, "loss": 0.2556, "step": 29145 }, { "epoch": 0.5263290938111643, "grad_norm": 0.9657479524612427, "learning_rate": 9.173611482422562e-06, "loss": 0.3573, "step": 29150 }, { "epoch": 0.5264193732440651, "grad_norm": 0.5299378037452698, "learning_rate": 9.17078497413778e-06, "loss": 0.236, "step": 29155 }, { "epoch": 0.5265096526769657, "grad_norm": 0.2969253957271576, "learning_rate": 9.167958532557313e-06, "loss": 0.1682, "step": 29160 }, { "epoch": 0.5265999321098664, "grad_norm": 0.5945093035697937, "learning_rate": 9.165132157908536e-06, "loss": 0.2323, "step": 29165 }, { "epoch": 0.5266902115427672, "grad_norm": 0.5590220093727112, "learning_rate": 9.162305850418805e-06, "loss": 0.2607, "step": 29170 }, { "epoch": 0.5267804909756679, "grad_norm": 0.3752111494541168, "learning_rate": 9.15947961031548e-06, "loss": 0.1746, "step": 29175 }, { "epoch": 0.5268707704085686, "grad_norm": 0.5085766315460205, "learning_rate": 9.156653437825904e-06, "loss": 0.2402, "step": 29180 }, { "epoch": 0.5269610498414693, "grad_norm": 0.2286229431629181, "learning_rate": 9.153827333177434e-06, "loss": 0.2185, "step": 29185 }, { "epoch": 0.52705132927437, "grad_norm": 0.44430407881736755, "learning_rate": 9.151001296597398e-06, "loss": 0.2004, "step": 29190 }, { "epoch": 0.5271416087072708, "grad_norm": 0.35909855365753174, "learning_rate": 9.148175328313136e-06, "loss": 0.2301, "step": 29195 }, { "epoch": 0.5272318881401714, "grad_norm": 0.3297822177410126, "learning_rate": 9.145349428551979e-06, "loss": 0.2516, "step": 29200 }, { "epoch": 0.5273221675730722, "grad_norm": 0.51425701379776, "learning_rate": 9.142523597541241e-06, "loss": 0.2572, "step": 29205 }, { "epoch": 0.5274124470059729, "grad_norm": 0.804265022277832, "learning_rate": 9.13969783550825e-06, "loss": 0.2921, "step": 29210 }, { "epoch": 0.5275027264388736, "grad_norm": 0.3839067816734314, "learning_rate": 9.136872142680309e-06, "loss": 0.2301, "step": 29215 }, { "epoch": 0.5275930058717743, "grad_norm": 0.7506440281867981, "learning_rate": 9.134046519284733e-06, "loss": 0.1969, "step": 29220 }, { "epoch": 0.527683285304675, "grad_norm": 0.5303971767425537, "learning_rate": 9.131220965548814e-06, "loss": 0.1988, "step": 29225 }, { "epoch": 0.5277735647375758, "grad_norm": 0.47842785716056824, "learning_rate": 9.128395481699855e-06, "loss": 0.2993, "step": 29230 }, { "epoch": 0.5278638441704765, "grad_norm": 0.35133376717567444, "learning_rate": 9.125570067965138e-06, "loss": 0.2038, "step": 29235 }, { "epoch": 0.5279541236033771, "grad_norm": 0.704802393913269, "learning_rate": 9.122744724571956e-06, "loss": 0.205, "step": 29240 }, { "epoch": 0.5280444030362779, "grad_norm": 0.29799526929855347, "learning_rate": 9.119919451747578e-06, "loss": 0.2352, "step": 29245 }, { "epoch": 0.5281346824691786, "grad_norm": 0.538298487663269, "learning_rate": 9.117094249719287e-06, "loss": 0.1288, "step": 29250 }, { "epoch": 0.5282249619020793, "grad_norm": 0.60027015209198, "learning_rate": 9.11426911871434e-06, "loss": 0.2194, "step": 29255 }, { "epoch": 0.52831524133498, "grad_norm": 0.5876868367195129, "learning_rate": 9.111444058960004e-06, "loss": 0.1387, "step": 29260 }, { "epoch": 0.5284055207678807, "grad_norm": 0.32721805572509766, "learning_rate": 9.10861907068353e-06, "loss": 0.2288, "step": 29265 }, { "epoch": 0.5284958002007815, "grad_norm": 0.3952575922012329, "learning_rate": 9.105794154112177e-06, "loss": 0.2973, "step": 29270 }, { "epoch": 0.5285860796336822, "grad_norm": 0.41881096363067627, "learning_rate": 9.102969309473178e-06, "loss": 0.2401, "step": 29275 }, { "epoch": 0.5286763590665828, "grad_norm": 0.6066296696662903, "learning_rate": 9.100144536993777e-06, "loss": 0.2784, "step": 29280 }, { "epoch": 0.5287666384994836, "grad_norm": 0.39961621165275574, "learning_rate": 9.097319836901209e-06, "loss": 0.2199, "step": 29285 }, { "epoch": 0.5288569179323843, "grad_norm": 0.4351910650730133, "learning_rate": 9.094495209422693e-06, "loss": 0.2384, "step": 29290 }, { "epoch": 0.5289471973652851, "grad_norm": 0.47434571385383606, "learning_rate": 9.091670654785455e-06, "loss": 0.207, "step": 29295 }, { "epoch": 0.5290374767981857, "grad_norm": 0.37549877166748047, "learning_rate": 9.088846173216707e-06, "loss": 0.2085, "step": 29300 }, { "epoch": 0.5291277562310864, "grad_norm": 0.46450865268707275, "learning_rate": 9.086021764943664e-06, "loss": 0.227, "step": 29305 }, { "epoch": 0.5292180356639872, "grad_norm": 0.6716872453689575, "learning_rate": 9.083197430193517e-06, "loss": 0.2121, "step": 29310 }, { "epoch": 0.5293083150968879, "grad_norm": 0.34130343794822693, "learning_rate": 9.080373169193477e-06, "loss": 0.2925, "step": 29315 }, { "epoch": 0.5293985945297885, "grad_norm": 0.4936409294605255, "learning_rate": 9.077548982170727e-06, "loss": 0.2717, "step": 29320 }, { "epoch": 0.5294888739626893, "grad_norm": 0.7294759154319763, "learning_rate": 9.074724869352457e-06, "loss": 0.2836, "step": 29325 }, { "epoch": 0.52957915339559, "grad_norm": 0.6342700123786926, "learning_rate": 9.071900830965839e-06, "loss": 0.2111, "step": 29330 }, { "epoch": 0.5296694328284908, "grad_norm": 0.58757084608078, "learning_rate": 9.069076867238058e-06, "loss": 0.3456, "step": 29335 }, { "epoch": 0.5297597122613915, "grad_norm": 0.4558350145816803, "learning_rate": 9.06625297839627e-06, "loss": 0.2422, "step": 29340 }, { "epoch": 0.5298499916942921, "grad_norm": 0.4043460786342621, "learning_rate": 9.063429164667641e-06, "loss": 0.2341, "step": 29345 }, { "epoch": 0.5299402711271929, "grad_norm": 0.34301161766052246, "learning_rate": 9.060605426279327e-06, "loss": 0.3049, "step": 29350 }, { "epoch": 0.5300305505600936, "grad_norm": 0.5473177433013916, "learning_rate": 9.05778176345848e-06, "loss": 0.3061, "step": 29355 }, { "epoch": 0.5301208299929944, "grad_norm": 0.4395565390586853, "learning_rate": 9.054958176432232e-06, "loss": 0.2773, "step": 29360 }, { "epoch": 0.530211109425895, "grad_norm": 0.5470331311225891, "learning_rate": 9.052134665427734e-06, "loss": 0.3942, "step": 29365 }, { "epoch": 0.5303013888587957, "grad_norm": 0.4692026376724243, "learning_rate": 9.049311230672105e-06, "loss": 0.2163, "step": 29370 }, { "epoch": 0.5303916682916965, "grad_norm": 0.401054710149765, "learning_rate": 9.04648787239248e-06, "loss": 0.3039, "step": 29375 }, { "epoch": 0.5304819477245972, "grad_norm": 0.5699111223220825, "learning_rate": 9.043664590815973e-06, "loss": 0.2137, "step": 29380 }, { "epoch": 0.5305722271574979, "grad_norm": 0.4167243242263794, "learning_rate": 9.040841386169694e-06, "loss": 0.1648, "step": 29385 }, { "epoch": 0.5306625065903986, "grad_norm": 0.6566867828369141, "learning_rate": 9.038018258680753e-06, "loss": 0.3274, "step": 29390 }, { "epoch": 0.5307527860232993, "grad_norm": 0.46134153008461, "learning_rate": 9.035195208576247e-06, "loss": 0.2062, "step": 29395 }, { "epoch": 0.5308430654562001, "grad_norm": 0.35193225741386414, "learning_rate": 9.032372236083275e-06, "loss": 0.2503, "step": 29400 }, { "epoch": 0.5309333448891007, "grad_norm": 0.36347219347953796, "learning_rate": 9.029549341428918e-06, "loss": 0.243, "step": 29405 }, { "epoch": 0.5310236243220015, "grad_norm": 0.8077743649482727, "learning_rate": 9.026726524840261e-06, "loss": 0.2307, "step": 29410 }, { "epoch": 0.5311139037549022, "grad_norm": 0.5789394974708557, "learning_rate": 9.02390378654438e-06, "loss": 0.2531, "step": 29415 }, { "epoch": 0.5312041831878029, "grad_norm": 0.4585419297218323, "learning_rate": 9.021081126768344e-06, "loss": 0.1938, "step": 29420 }, { "epoch": 0.5312944626207036, "grad_norm": 0.39548465609550476, "learning_rate": 9.01825854573921e-06, "loss": 0.2606, "step": 29425 }, { "epoch": 0.5313847420536043, "grad_norm": 0.42935436964035034, "learning_rate": 9.01543604368404e-06, "loss": 0.2124, "step": 29430 }, { "epoch": 0.531475021486505, "grad_norm": 0.2676616311073303, "learning_rate": 9.012613620829881e-06, "loss": 0.167, "step": 29435 }, { "epoch": 0.5315653009194058, "grad_norm": 0.3309231698513031, "learning_rate": 9.009791277403782e-06, "loss": 0.2867, "step": 29440 }, { "epoch": 0.5316555803523064, "grad_norm": 0.2971544563770294, "learning_rate": 9.006969013632765e-06, "loss": 0.2161, "step": 29445 }, { "epoch": 0.5317458597852072, "grad_norm": 0.42876797914505005, "learning_rate": 9.00414682974388e-06, "loss": 0.2147, "step": 29450 }, { "epoch": 0.5318361392181079, "grad_norm": 0.47307348251342773, "learning_rate": 9.001324725964138e-06, "loss": 0.2491, "step": 29455 }, { "epoch": 0.5319264186510086, "grad_norm": 0.48468372225761414, "learning_rate": 8.998502702520562e-06, "loss": 0.2941, "step": 29460 }, { "epoch": 0.5320166980839093, "grad_norm": 0.6423856019973755, "learning_rate": 8.995680759640162e-06, "loss": 0.2658, "step": 29465 }, { "epoch": 0.53210697751681, "grad_norm": 0.3446263074874878, "learning_rate": 8.992858897549945e-06, "loss": 0.2299, "step": 29470 }, { "epoch": 0.5321972569497108, "grad_norm": 0.2555557191371918, "learning_rate": 8.990037116476905e-06, "loss": 0.2368, "step": 29475 }, { "epoch": 0.5322875363826115, "grad_norm": 0.36306333541870117, "learning_rate": 8.987215416648033e-06, "loss": 0.2372, "step": 29480 }, { "epoch": 0.5323778158155121, "grad_norm": 0.6085293889045715, "learning_rate": 8.984393798290322e-06, "loss": 0.2426, "step": 29485 }, { "epoch": 0.5324680952484129, "grad_norm": 0.32165977358818054, "learning_rate": 8.981572261630743e-06, "loss": 0.2472, "step": 29490 }, { "epoch": 0.5325583746813136, "grad_norm": 0.3249320983886719, "learning_rate": 8.97875080689627e-06, "loss": 0.3114, "step": 29495 }, { "epoch": 0.5326486541142144, "grad_norm": 0.2133033275604248, "learning_rate": 8.97592943431387e-06, "loss": 0.1943, "step": 29500 }, { "epoch": 0.532738933547115, "grad_norm": 0.600243866443634, "learning_rate": 8.973108144110504e-06, "loss": 0.2675, "step": 29505 }, { "epoch": 0.5328292129800157, "grad_norm": 0.5490859746932983, "learning_rate": 8.970286936513114e-06, "loss": 0.2145, "step": 29510 }, { "epoch": 0.5329194924129165, "grad_norm": 0.24173220992088318, "learning_rate": 8.967465811748663e-06, "loss": 0.1804, "step": 29515 }, { "epoch": 0.5330097718458172, "grad_norm": 0.49922430515289307, "learning_rate": 8.964644770044074e-06, "loss": 0.2265, "step": 29520 }, { "epoch": 0.5331000512787178, "grad_norm": 0.7988704442977905, "learning_rate": 8.961823811626288e-06, "loss": 0.2428, "step": 29525 }, { "epoch": 0.5331903307116186, "grad_norm": 0.539200484752655, "learning_rate": 8.959002936722226e-06, "loss": 0.3329, "step": 29530 }, { "epoch": 0.5332806101445193, "grad_norm": 0.47633564472198486, "learning_rate": 8.956182145558812e-06, "loss": 0.2948, "step": 29535 }, { "epoch": 0.5333708895774201, "grad_norm": 0.5710396766662598, "learning_rate": 8.953361438362953e-06, "loss": 0.2617, "step": 29540 }, { "epoch": 0.5334611690103207, "grad_norm": 0.3195212483406067, "learning_rate": 8.950540815361556e-06, "loss": 0.2382, "step": 29545 }, { "epoch": 0.5335514484432214, "grad_norm": 0.4052313268184662, "learning_rate": 8.947720276781521e-06, "loss": 0.2092, "step": 29550 }, { "epoch": 0.5336417278761222, "grad_norm": 0.5307109951972961, "learning_rate": 8.944899822849742e-06, "loss": 0.255, "step": 29555 }, { "epoch": 0.5337320073090229, "grad_norm": 0.24876217544078827, "learning_rate": 8.942079453793095e-06, "loss": 0.2055, "step": 29560 }, { "epoch": 0.5338222867419236, "grad_norm": 0.6838825941085815, "learning_rate": 8.939259169838468e-06, "loss": 0.3401, "step": 29565 }, { "epoch": 0.5339125661748243, "grad_norm": 0.21149404346942902, "learning_rate": 8.936438971212731e-06, "loss": 0.2823, "step": 29570 }, { "epoch": 0.534002845607725, "grad_norm": 0.6761206388473511, "learning_rate": 8.93361885814274e-06, "loss": 0.3155, "step": 29575 }, { "epoch": 0.5340931250406258, "grad_norm": 0.37084195017814636, "learning_rate": 8.930798830855362e-06, "loss": 0.211, "step": 29580 }, { "epoch": 0.5341834044735264, "grad_norm": 0.25710147619247437, "learning_rate": 8.927978889577442e-06, "loss": 0.299, "step": 29585 }, { "epoch": 0.5342736839064272, "grad_norm": 0.9091326594352722, "learning_rate": 8.925159034535827e-06, "loss": 0.1839, "step": 29590 }, { "epoch": 0.5343639633393279, "grad_norm": 0.28009116649627686, "learning_rate": 8.922339265957352e-06, "loss": 0.1992, "step": 29595 }, { "epoch": 0.5344542427722286, "grad_norm": 0.28533223271369934, "learning_rate": 8.919519584068852e-06, "loss": 0.3137, "step": 29600 }, { "epoch": 0.5345445222051293, "grad_norm": 0.7236104607582092, "learning_rate": 8.91669998909714e-06, "loss": 0.28, "step": 29605 }, { "epoch": 0.53463480163803, "grad_norm": 0.9480448961257935, "learning_rate": 8.91388048126904e-06, "loss": 0.2162, "step": 29610 }, { "epoch": 0.5347250810709308, "grad_norm": 0.6658946871757507, "learning_rate": 8.911061060811357e-06, "loss": 0.2499, "step": 29615 }, { "epoch": 0.5348153605038315, "grad_norm": 0.46006113290786743, "learning_rate": 8.908241727950899e-06, "loss": 0.1842, "step": 29620 }, { "epoch": 0.5349056399367321, "grad_norm": 0.21971310675144196, "learning_rate": 8.90542248291445e-06, "loss": 0.1928, "step": 29625 }, { "epoch": 0.5349959193696329, "grad_norm": 0.33377861976623535, "learning_rate": 8.902603325928807e-06, "loss": 0.2457, "step": 29630 }, { "epoch": 0.5350861988025336, "grad_norm": 0.34162864089012146, "learning_rate": 8.899784257220745e-06, "loss": 0.1545, "step": 29635 }, { "epoch": 0.5351764782354343, "grad_norm": 0.5479598045349121, "learning_rate": 8.896965277017044e-06, "loss": 0.2917, "step": 29640 }, { "epoch": 0.535266757668335, "grad_norm": 0.46999359130859375, "learning_rate": 8.894146385544462e-06, "loss": 0.2553, "step": 29645 }, { "epoch": 0.5353570371012357, "grad_norm": 0.3225659430027008, "learning_rate": 8.89132758302977e-06, "loss": 0.2486, "step": 29650 }, { "epoch": 0.5354473165341365, "grad_norm": 0.4857708811759949, "learning_rate": 8.88850886969971e-06, "loss": 0.1502, "step": 29655 }, { "epoch": 0.5355375959670372, "grad_norm": 0.6933872103691101, "learning_rate": 8.885690245781027e-06, "loss": 0.2487, "step": 29660 }, { "epoch": 0.5356278753999378, "grad_norm": 0.5495598316192627, "learning_rate": 8.882871711500466e-06, "loss": 0.1984, "step": 29665 }, { "epoch": 0.5357181548328386, "grad_norm": 0.4869239330291748, "learning_rate": 8.88005326708475e-06, "loss": 0.3247, "step": 29670 }, { "epoch": 0.5358084342657393, "grad_norm": 0.8459439873695374, "learning_rate": 8.877234912760608e-06, "loss": 0.2102, "step": 29675 }, { "epoch": 0.5358987136986401, "grad_norm": 0.5308342576026917, "learning_rate": 8.874416648754752e-06, "loss": 0.1865, "step": 29680 }, { "epoch": 0.5359889931315407, "grad_norm": 0.32921162247657776, "learning_rate": 8.871598475293896e-06, "loss": 0.2981, "step": 29685 }, { "epoch": 0.5360792725644414, "grad_norm": 0.6725593209266663, "learning_rate": 8.868780392604735e-06, "loss": 0.2554, "step": 29690 }, { "epoch": 0.5361695519973422, "grad_norm": 0.48778754472732544, "learning_rate": 8.86596240091397e-06, "loss": 0.2012, "step": 29695 }, { "epoch": 0.5362598314302429, "grad_norm": 0.4959346055984497, "learning_rate": 8.86314450044828e-06, "loss": 0.3329, "step": 29700 }, { "epoch": 0.5363501108631435, "grad_norm": 0.3672713339328766, "learning_rate": 8.860326691434353e-06, "loss": 0.2391, "step": 29705 }, { "epoch": 0.5364403902960443, "grad_norm": 0.4734179675579071, "learning_rate": 8.857508974098855e-06, "loss": 0.2225, "step": 29710 }, { "epoch": 0.536530669728945, "grad_norm": 0.39586877822875977, "learning_rate": 8.854691348668459e-06, "loss": 0.2175, "step": 29715 }, { "epoch": 0.5366209491618458, "grad_norm": 0.43276047706604004, "learning_rate": 8.851873815369809e-06, "loss": 0.1962, "step": 29720 }, { "epoch": 0.5367112285947465, "grad_norm": 0.5799977779388428, "learning_rate": 8.849056374429567e-06, "loss": 0.2642, "step": 29725 }, { "epoch": 0.5368015080276471, "grad_norm": 0.3819090723991394, "learning_rate": 8.84623902607437e-06, "loss": 0.2946, "step": 29730 }, { "epoch": 0.5368917874605479, "grad_norm": 0.5902660489082336, "learning_rate": 8.843421770530859e-06, "loss": 0.2203, "step": 29735 }, { "epoch": 0.5369820668934486, "grad_norm": 0.4830811321735382, "learning_rate": 8.840604608025651e-06, "loss": 0.2205, "step": 29740 }, { "epoch": 0.5370723463263494, "grad_norm": 0.3830168843269348, "learning_rate": 8.837787538785377e-06, "loss": 0.2467, "step": 29745 }, { "epoch": 0.53716262575925, "grad_norm": 0.691828727722168, "learning_rate": 8.834970563036646e-06, "loss": 0.2306, "step": 29750 }, { "epoch": 0.5372529051921507, "grad_norm": 0.6907765865325928, "learning_rate": 8.83215368100606e-06, "loss": 0.2701, "step": 29755 }, { "epoch": 0.5373431846250515, "grad_norm": 0.5170467495918274, "learning_rate": 8.829336892920222e-06, "loss": 0.2605, "step": 29760 }, { "epoch": 0.5374334640579522, "grad_norm": 1.5071393251419067, "learning_rate": 8.826520199005714e-06, "loss": 0.2684, "step": 29765 }, { "epoch": 0.5375237434908529, "grad_norm": 0.46036243438720703, "learning_rate": 8.823703599489128e-06, "loss": 0.2093, "step": 29770 }, { "epoch": 0.5376140229237536, "grad_norm": 0.4511943757534027, "learning_rate": 8.820887094597034e-06, "loss": 0.2095, "step": 29775 }, { "epoch": 0.5377043023566543, "grad_norm": 0.5065392851829529, "learning_rate": 8.818070684556005e-06, "loss": 0.3288, "step": 29780 }, { "epoch": 0.5377945817895551, "grad_norm": 0.5359420776367188, "learning_rate": 8.815254369592589e-06, "loss": 0.2837, "step": 29785 }, { "epoch": 0.5378848612224557, "grad_norm": 0.6591514348983765, "learning_rate": 8.812438149933347e-06, "loss": 0.1904, "step": 29790 }, { "epoch": 0.5379751406553565, "grad_norm": 0.37006279826164246, "learning_rate": 8.809622025804821e-06, "loss": 0.2878, "step": 29795 }, { "epoch": 0.5380654200882572, "grad_norm": 0.5443669557571411, "learning_rate": 8.806805997433551e-06, "loss": 0.2524, "step": 29800 }, { "epoch": 0.5381556995211579, "grad_norm": 0.41262099146842957, "learning_rate": 8.80399006504606e-06, "loss": 0.2925, "step": 29805 }, { "epoch": 0.5382459789540586, "grad_norm": 0.40744054317474365, "learning_rate": 8.801174228868875e-06, "loss": 0.2424, "step": 29810 }, { "epoch": 0.5383362583869593, "grad_norm": 0.4741535484790802, "learning_rate": 8.798358489128505e-06, "loss": 0.3022, "step": 29815 }, { "epoch": 0.53842653781986, "grad_norm": 0.30072957277297974, "learning_rate": 8.79554284605146e-06, "loss": 0.2032, "step": 29820 }, { "epoch": 0.5385168172527608, "grad_norm": 0.5489832162857056, "learning_rate": 8.792727299864233e-06, "loss": 0.2193, "step": 29825 }, { "epoch": 0.5386070966856614, "grad_norm": 0.5997200012207031, "learning_rate": 8.789911850793318e-06, "loss": 0.2775, "step": 29830 }, { "epoch": 0.5386973761185622, "grad_norm": 0.4494878351688385, "learning_rate": 8.787096499065196e-06, "loss": 0.3104, "step": 29835 }, { "epoch": 0.5387876555514629, "grad_norm": 0.2640324532985687, "learning_rate": 8.784281244906344e-06, "loss": 0.2495, "step": 29840 }, { "epoch": 0.5388779349843636, "grad_norm": 0.6722503900527954, "learning_rate": 8.781466088543228e-06, "loss": 0.2428, "step": 29845 }, { "epoch": 0.5389682144172643, "grad_norm": 0.36204028129577637, "learning_rate": 8.7786510302023e-06, "loss": 0.2151, "step": 29850 }, { "epoch": 0.539058493850165, "grad_norm": 0.4201105237007141, "learning_rate": 8.775836070110022e-06, "loss": 0.1772, "step": 29855 }, { "epoch": 0.5391487732830658, "grad_norm": 0.487784743309021, "learning_rate": 8.773021208492826e-06, "loss": 0.2394, "step": 29860 }, { "epoch": 0.5392390527159665, "grad_norm": 0.3053692877292633, "learning_rate": 8.77020644557716e-06, "loss": 0.2204, "step": 29865 }, { "epoch": 0.5393293321488671, "grad_norm": 0.5000454783439636, "learning_rate": 8.767391781589439e-06, "loss": 0.2511, "step": 29870 }, { "epoch": 0.5394196115817679, "grad_norm": 0.5024951696395874, "learning_rate": 8.764577216756088e-06, "loss": 0.2015, "step": 29875 }, { "epoch": 0.5395098910146686, "grad_norm": 0.5001381039619446, "learning_rate": 8.761762751303517e-06, "loss": 0.1156, "step": 29880 }, { "epoch": 0.5396001704475694, "grad_norm": 0.2666400969028473, "learning_rate": 8.758948385458133e-06, "loss": 0.1681, "step": 29885 }, { "epoch": 0.53969044988047, "grad_norm": 0.47668972611427307, "learning_rate": 8.756134119446323e-06, "loss": 0.3536, "step": 29890 }, { "epoch": 0.5397807293133707, "grad_norm": 0.332539439201355, "learning_rate": 8.753319953494484e-06, "loss": 0.2453, "step": 29895 }, { "epoch": 0.5398710087462715, "grad_norm": 0.5720506310462952, "learning_rate": 8.750505887828985e-06, "loss": 0.2368, "step": 29900 }, { "epoch": 0.5399612881791722, "grad_norm": 0.30869919061660767, "learning_rate": 8.747691922676208e-06, "loss": 0.2336, "step": 29905 }, { "epoch": 0.5400515676120728, "grad_norm": 0.5556881427764893, "learning_rate": 8.744878058262507e-06, "loss": 0.2798, "step": 29910 }, { "epoch": 0.5401418470449736, "grad_norm": 4.185696601867676, "learning_rate": 8.742064294814246e-06, "loss": 0.2371, "step": 29915 }, { "epoch": 0.5402321264778743, "grad_norm": 0.3491530120372772, "learning_rate": 8.73925063255776e-06, "loss": 0.2588, "step": 29920 }, { "epoch": 0.5403224059107751, "grad_norm": 0.5603716373443604, "learning_rate": 8.736437071719397e-06, "loss": 0.2899, "step": 29925 }, { "epoch": 0.5404126853436757, "grad_norm": 0.6623853445053101, "learning_rate": 8.733623612525488e-06, "loss": 0.2577, "step": 29930 }, { "epoch": 0.5405029647765764, "grad_norm": 0.43849846720695496, "learning_rate": 8.730810255202347e-06, "loss": 0.1973, "step": 29935 }, { "epoch": 0.5405932442094772, "grad_norm": 0.5376783609390259, "learning_rate": 8.727996999976295e-06, "loss": 0.2428, "step": 29940 }, { "epoch": 0.5406835236423779, "grad_norm": 0.4216878116130829, "learning_rate": 8.725183847073633e-06, "loss": 0.3253, "step": 29945 }, { "epoch": 0.5407738030752786, "grad_norm": 0.4465843439102173, "learning_rate": 8.722370796720667e-06, "loss": 0.2747, "step": 29950 }, { "epoch": 0.5408640825081793, "grad_norm": 0.5713403224945068, "learning_rate": 8.719557849143676e-06, "loss": 0.1876, "step": 29955 }, { "epoch": 0.54095436194108, "grad_norm": 0.5041961073875427, "learning_rate": 8.716745004568949e-06, "loss": 0.2545, "step": 29960 }, { "epoch": 0.5410446413739808, "grad_norm": 0.3148564398288727, "learning_rate": 8.713932263222755e-06, "loss": 0.1554, "step": 29965 }, { "epoch": 0.5411349208068814, "grad_norm": 0.5915099382400513, "learning_rate": 8.71111962533136e-06, "loss": 0.2664, "step": 29970 }, { "epoch": 0.5412252002397822, "grad_norm": 0.4450978934764862, "learning_rate": 8.70830709112102e-06, "loss": 0.2685, "step": 29975 }, { "epoch": 0.5413154796726829, "grad_norm": 0.4449928402900696, "learning_rate": 8.705494660817987e-06, "loss": 0.2886, "step": 29980 }, { "epoch": 0.5414057591055836, "grad_norm": 0.5779251456260681, "learning_rate": 8.702682334648492e-06, "loss": 0.1615, "step": 29985 }, { "epoch": 0.5414960385384843, "grad_norm": 0.4866010844707489, "learning_rate": 8.699870112838772e-06, "loss": 0.2066, "step": 29990 }, { "epoch": 0.541586317971385, "grad_norm": 0.651261031627655, "learning_rate": 8.697057995615046e-06, "loss": 0.2259, "step": 29995 }, { "epoch": 0.5416765974042858, "grad_norm": 0.5383086204528809, "learning_rate": 8.694245983203538e-06, "loss": 0.2452, "step": 30000 }, { "epoch": 0.5417668768371865, "grad_norm": 0.5351186990737915, "learning_rate": 8.69143407583044e-06, "loss": 0.2553, "step": 30005 }, { "epoch": 0.5418571562700871, "grad_norm": 0.5017138123512268, "learning_rate": 8.68862227372196e-06, "loss": 0.2409, "step": 30010 }, { "epoch": 0.5419474357029879, "grad_norm": 0.5619741082191467, "learning_rate": 8.68581057710428e-06, "loss": 0.3087, "step": 30015 }, { "epoch": 0.5420377151358886, "grad_norm": 0.3829174041748047, "learning_rate": 8.682998986203589e-06, "loss": 0.2271, "step": 30020 }, { "epoch": 0.5421279945687894, "grad_norm": 0.3206920623779297, "learning_rate": 8.680187501246051e-06, "loss": 0.2102, "step": 30025 }, { "epoch": 0.54221827400169, "grad_norm": 0.4522305428981781, "learning_rate": 8.677376122457832e-06, "loss": 0.2373, "step": 30030 }, { "epoch": 0.5423085534345907, "grad_norm": 0.6030825972557068, "learning_rate": 8.67456485006509e-06, "loss": 0.2398, "step": 30035 }, { "epoch": 0.5423988328674915, "grad_norm": 0.4817292392253876, "learning_rate": 8.671753684293964e-06, "loss": 0.249, "step": 30040 }, { "epoch": 0.5424891123003922, "grad_norm": 0.5090333223342896, "learning_rate": 8.668942625370604e-06, "loss": 0.2429, "step": 30045 }, { "epoch": 0.5425793917332928, "grad_norm": 0.4519498944282532, "learning_rate": 8.666131673521126e-06, "loss": 0.2883, "step": 30050 }, { "epoch": 0.5426696711661936, "grad_norm": 0.3915843069553375, "learning_rate": 8.663320828971659e-06, "loss": 0.2167, "step": 30055 }, { "epoch": 0.5427599505990943, "grad_norm": 0.7703441977500916, "learning_rate": 8.660510091948312e-06, "loss": 0.1479, "step": 30060 }, { "epoch": 0.5428502300319951, "grad_norm": 0.649986207485199, "learning_rate": 8.657699462677193e-06, "loss": 0.1445, "step": 30065 }, { "epoch": 0.5429405094648957, "grad_norm": 0.5072412490844727, "learning_rate": 8.654888941384387e-06, "loss": 0.2073, "step": 30070 }, { "epoch": 0.5430307888977964, "grad_norm": 0.32711225748062134, "learning_rate": 8.652078528295989e-06, "loss": 0.2987, "step": 30075 }, { "epoch": 0.5431210683306972, "grad_norm": 0.3813733160495758, "learning_rate": 8.649268223638072e-06, "loss": 0.2443, "step": 30080 }, { "epoch": 0.5432113477635979, "grad_norm": 0.3937578499317169, "learning_rate": 8.646458027636707e-06, "loss": 0.2543, "step": 30085 }, { "epoch": 0.5433016271964985, "grad_norm": 0.19544045627117157, "learning_rate": 8.64364794051795e-06, "loss": 0.1893, "step": 30090 }, { "epoch": 0.5433919066293993, "grad_norm": 0.382412850856781, "learning_rate": 8.640837962507858e-06, "loss": 0.2122, "step": 30095 }, { "epoch": 0.5434821860623, "grad_norm": 0.6230947375297546, "learning_rate": 8.638028093832464e-06, "loss": 0.2841, "step": 30100 }, { "epoch": 0.5435724654952008, "grad_norm": 0.42004403471946716, "learning_rate": 8.635218334717813e-06, "loss": 0.2005, "step": 30105 }, { "epoch": 0.5436627449281014, "grad_norm": 0.46466878056526184, "learning_rate": 8.63240868538992e-06, "loss": 0.1808, "step": 30110 }, { "epoch": 0.5437530243610021, "grad_norm": 0.4530866742134094, "learning_rate": 8.629599146074811e-06, "loss": 0.2173, "step": 30115 }, { "epoch": 0.5438433037939029, "grad_norm": 0.635175347328186, "learning_rate": 8.626789716998482e-06, "loss": 0.2622, "step": 30120 }, { "epoch": 0.5439335832268036, "grad_norm": 0.3577985465526581, "learning_rate": 8.623980398386937e-06, "loss": 0.3134, "step": 30125 }, { "epoch": 0.5440238626597044, "grad_norm": 0.6977483034133911, "learning_rate": 8.621171190466167e-06, "loss": 0.1519, "step": 30130 }, { "epoch": 0.544114142092605, "grad_norm": 0.4724690318107605, "learning_rate": 8.618362093462145e-06, "loss": 0.2143, "step": 30135 }, { "epoch": 0.5442044215255057, "grad_norm": 0.5454923510551453, "learning_rate": 8.615553107600851e-06, "loss": 0.2889, "step": 30140 }, { "epoch": 0.5442947009584065, "grad_norm": 0.5041444301605225, "learning_rate": 8.61274423310824e-06, "loss": 0.2603, "step": 30145 }, { "epoch": 0.5443849803913072, "grad_norm": 0.5674653053283691, "learning_rate": 8.609935470210274e-06, "loss": 0.2271, "step": 30150 }, { "epoch": 0.5444752598242079, "grad_norm": 0.5120981335639954, "learning_rate": 8.607126819132885e-06, "loss": 0.2082, "step": 30155 }, { "epoch": 0.5445655392571086, "grad_norm": 1.0488464832305908, "learning_rate": 8.604318280102024e-06, "loss": 0.28, "step": 30160 }, { "epoch": 0.5446558186900093, "grad_norm": 0.3890886902809143, "learning_rate": 8.601509853343605e-06, "loss": 0.2601, "step": 30165 }, { "epoch": 0.5447460981229101, "grad_norm": 0.5292909741401672, "learning_rate": 8.598701539083552e-06, "loss": 0.2329, "step": 30170 }, { "epoch": 0.5448363775558107, "grad_norm": 0.4403056800365448, "learning_rate": 8.59589333754777e-06, "loss": 0.2538, "step": 30175 }, { "epoch": 0.5449266569887115, "grad_norm": 0.313803493976593, "learning_rate": 8.593085248962165e-06, "loss": 0.2003, "step": 30180 }, { "epoch": 0.5450169364216122, "grad_norm": 0.3524719774723053, "learning_rate": 8.590277273552618e-06, "loss": 0.2941, "step": 30185 }, { "epoch": 0.5451072158545129, "grad_norm": 0.4764222502708435, "learning_rate": 8.587469411545017e-06, "loss": 0.2931, "step": 30190 }, { "epoch": 0.5451974952874136, "grad_norm": 0.8016039729118347, "learning_rate": 8.584661663165228e-06, "loss": 0.2096, "step": 30195 }, { "epoch": 0.5452877747203143, "grad_norm": 0.29778075218200684, "learning_rate": 8.581854028639123e-06, "loss": 0.2752, "step": 30200 }, { "epoch": 0.545378054153215, "grad_norm": 0.4600449204444885, "learning_rate": 8.579046508192546e-06, "loss": 0.2206, "step": 30205 }, { "epoch": 0.5454683335861158, "grad_norm": 0.46086183190345764, "learning_rate": 8.576239102051349e-06, "loss": 0.2971, "step": 30210 }, { "epoch": 0.5455586130190164, "grad_norm": 0.7456693649291992, "learning_rate": 8.573431810441366e-06, "loss": 0.3141, "step": 30215 }, { "epoch": 0.5456488924519172, "grad_norm": 0.4374391436576843, "learning_rate": 8.570624633588416e-06, "loss": 0.2165, "step": 30220 }, { "epoch": 0.5457391718848179, "grad_norm": 0.3902812600135803, "learning_rate": 8.567817571718328e-06, "loss": 0.2213, "step": 30225 }, { "epoch": 0.5458294513177187, "grad_norm": 0.49425461888313293, "learning_rate": 8.565010625056897e-06, "loss": 0.2629, "step": 30230 }, { "epoch": 0.5459197307506193, "grad_norm": 0.27138713002204895, "learning_rate": 8.562203793829933e-06, "loss": 0.2566, "step": 30235 }, { "epoch": 0.54601001018352, "grad_norm": 0.322450190782547, "learning_rate": 8.559397078263217e-06, "loss": 0.3462, "step": 30240 }, { "epoch": 0.5461002896164208, "grad_norm": 0.5785377025604248, "learning_rate": 8.556590478582534e-06, "loss": 0.2137, "step": 30245 }, { "epoch": 0.5461905690493215, "grad_norm": 0.5261700749397278, "learning_rate": 8.55378399501365e-06, "loss": 0.2801, "step": 30250 }, { "epoch": 0.5462808484822221, "grad_norm": 0.8050107955932617, "learning_rate": 8.550977627782332e-06, "loss": 0.1884, "step": 30255 }, { "epoch": 0.5463711279151229, "grad_norm": 0.2777807414531708, "learning_rate": 8.548171377114324e-06, "loss": 0.2244, "step": 30260 }, { "epoch": 0.5464614073480236, "grad_norm": 0.3774738311767578, "learning_rate": 8.545365243235378e-06, "loss": 0.2375, "step": 30265 }, { "epoch": 0.5465516867809244, "grad_norm": 0.5623134970664978, "learning_rate": 8.542559226371217e-06, "loss": 0.2624, "step": 30270 }, { "epoch": 0.546641966213825, "grad_norm": 0.28870970010757446, "learning_rate": 8.539753326747573e-06, "loss": 0.2838, "step": 30275 }, { "epoch": 0.5467322456467257, "grad_norm": 0.5625777244567871, "learning_rate": 8.536947544590153e-06, "loss": 0.2504, "step": 30280 }, { "epoch": 0.5468225250796265, "grad_norm": 0.4875805675983429, "learning_rate": 8.534141880124672e-06, "loss": 0.2305, "step": 30285 }, { "epoch": 0.5469128045125272, "grad_norm": 0.4243050217628479, "learning_rate": 8.53133633357681e-06, "loss": 0.2689, "step": 30290 }, { "epoch": 0.5470030839454278, "grad_norm": 0.5069988369941711, "learning_rate": 8.52853090517227e-06, "loss": 0.2417, "step": 30295 }, { "epoch": 0.5470933633783286, "grad_norm": 0.40005379915237427, "learning_rate": 8.525725595136718e-06, "loss": 0.2321, "step": 30300 }, { "epoch": 0.5471836428112293, "grad_norm": 0.5960733890533447, "learning_rate": 8.52292040369582e-06, "loss": 0.1913, "step": 30305 }, { "epoch": 0.5472739222441301, "grad_norm": 0.4210493266582489, "learning_rate": 8.520115331075241e-06, "loss": 0.1685, "step": 30310 }, { "epoch": 0.5473642016770307, "grad_norm": 0.44696950912475586, "learning_rate": 8.517310377500619e-06, "loss": 0.2598, "step": 30315 }, { "epoch": 0.5474544811099314, "grad_norm": 0.3658466935157776, "learning_rate": 8.514505543197599e-06, "loss": 0.2373, "step": 30320 }, { "epoch": 0.5475447605428322, "grad_norm": 0.5077930688858032, "learning_rate": 8.511700828391807e-06, "loss": 0.2969, "step": 30325 }, { "epoch": 0.5476350399757329, "grad_norm": 0.2905920147895813, "learning_rate": 8.508896233308866e-06, "loss": 0.2063, "step": 30330 }, { "epoch": 0.5477253194086336, "grad_norm": 0.42059463262557983, "learning_rate": 8.506091758174375e-06, "loss": 0.2191, "step": 30335 }, { "epoch": 0.5478155988415343, "grad_norm": 0.4557342529296875, "learning_rate": 8.503287403213944e-06, "loss": 0.2594, "step": 30340 }, { "epoch": 0.547905878274435, "grad_norm": 0.5572250485420227, "learning_rate": 8.500483168653158e-06, "loss": 0.3236, "step": 30345 }, { "epoch": 0.5479961577073358, "grad_norm": 0.5110666155815125, "learning_rate": 8.497679054717603e-06, "loss": 0.2828, "step": 30350 }, { "epoch": 0.5480864371402364, "grad_norm": 0.5500931739807129, "learning_rate": 8.494875061632837e-06, "loss": 0.1782, "step": 30355 }, { "epoch": 0.5481767165731372, "grad_norm": 0.6230002641677856, "learning_rate": 8.492071189624436e-06, "loss": 0.3495, "step": 30360 }, { "epoch": 0.5482669960060379, "grad_norm": 0.5025086998939514, "learning_rate": 8.48926743891794e-06, "loss": 0.1751, "step": 30365 }, { "epoch": 0.5483572754389386, "grad_norm": 0.26423999667167664, "learning_rate": 8.486463809738896e-06, "loss": 0.2421, "step": 30370 }, { "epoch": 0.5484475548718393, "grad_norm": 0.3416552245616913, "learning_rate": 8.483660302312832e-06, "loss": 0.1558, "step": 30375 }, { "epoch": 0.54853783430474, "grad_norm": 0.35250940918922424, "learning_rate": 8.480856916865276e-06, "loss": 0.232, "step": 30380 }, { "epoch": 0.5486281137376408, "grad_norm": 0.19040101766586304, "learning_rate": 8.478053653621733e-06, "loss": 0.2796, "step": 30385 }, { "epoch": 0.5487183931705415, "grad_norm": 0.31666895747184753, "learning_rate": 8.475250512807709e-06, "loss": 0.2196, "step": 30390 }, { "epoch": 0.5488086726034421, "grad_norm": 0.36421525478363037, "learning_rate": 8.472447494648699e-06, "loss": 0.2334, "step": 30395 }, { "epoch": 0.5488989520363429, "grad_norm": 0.36844000220298767, "learning_rate": 8.469644599370174e-06, "loss": 0.2541, "step": 30400 }, { "epoch": 0.5489892314692436, "grad_norm": 0.9060449600219727, "learning_rate": 8.46684182719762e-06, "loss": 0.2211, "step": 30405 }, { "epoch": 0.5490795109021444, "grad_norm": 0.3441360294818878, "learning_rate": 8.46403917835649e-06, "loss": 0.2631, "step": 30410 }, { "epoch": 0.549169790335045, "grad_norm": 0.4760313034057617, "learning_rate": 8.461236653072245e-06, "loss": 0.1989, "step": 30415 }, { "epoch": 0.5492600697679457, "grad_norm": 0.20249541103839874, "learning_rate": 8.458434251570315e-06, "loss": 0.2381, "step": 30420 }, { "epoch": 0.5493503492008465, "grad_norm": 0.4496363699436188, "learning_rate": 8.45563197407615e-06, "loss": 0.2252, "step": 30425 }, { "epoch": 0.5494406286337472, "grad_norm": 0.5233907103538513, "learning_rate": 8.452829820815159e-06, "loss": 0.2399, "step": 30430 }, { "epoch": 0.5495309080666478, "grad_norm": 0.5437569618225098, "learning_rate": 8.450027792012763e-06, "loss": 0.2259, "step": 30435 }, { "epoch": 0.5496211874995486, "grad_norm": 0.4562883973121643, "learning_rate": 8.447225887894358e-06, "loss": 0.1948, "step": 30440 }, { "epoch": 0.5497114669324493, "grad_norm": 0.47309333086013794, "learning_rate": 8.444424108685346e-06, "loss": 0.255, "step": 30445 }, { "epoch": 0.5498017463653501, "grad_norm": 0.4495987296104431, "learning_rate": 8.4416224546111e-06, "loss": 0.2512, "step": 30450 }, { "epoch": 0.5498920257982507, "grad_norm": 0.6639607548713684, "learning_rate": 8.438820925897e-06, "loss": 0.2563, "step": 30455 }, { "epoch": 0.5499823052311514, "grad_norm": 0.5853849649429321, "learning_rate": 8.436019522768402e-06, "loss": 0.2935, "step": 30460 }, { "epoch": 0.5500725846640522, "grad_norm": 0.570285975933075, "learning_rate": 8.433218245450666e-06, "loss": 0.2587, "step": 30465 }, { "epoch": 0.5501628640969529, "grad_norm": 0.42344382405281067, "learning_rate": 8.430417094169128e-06, "loss": 0.1531, "step": 30470 }, { "epoch": 0.5502531435298536, "grad_norm": 0.576492428779602, "learning_rate": 8.427616069149123e-06, "loss": 0.1968, "step": 30475 }, { "epoch": 0.5503434229627543, "grad_norm": 0.40256741642951965, "learning_rate": 8.424815170615972e-06, "loss": 0.2192, "step": 30480 }, { "epoch": 0.550433702395655, "grad_norm": 0.30532220005989075, "learning_rate": 8.42201439879499e-06, "loss": 0.1906, "step": 30485 }, { "epoch": 0.5505239818285558, "grad_norm": 0.29087671637535095, "learning_rate": 8.419213753911478e-06, "loss": 0.2323, "step": 30490 }, { "epoch": 0.5506142612614564, "grad_norm": 0.38298240303993225, "learning_rate": 8.416413236190723e-06, "loss": 0.2341, "step": 30495 }, { "epoch": 0.5507045406943571, "grad_norm": 0.33192840218544006, "learning_rate": 8.41361284585801e-06, "loss": 0.1706, "step": 30500 }, { "epoch": 0.5507948201272579, "grad_norm": 0.38980022072792053, "learning_rate": 8.410812583138608e-06, "loss": 0.2188, "step": 30505 }, { "epoch": 0.5508850995601586, "grad_norm": 0.6038150191307068, "learning_rate": 8.408012448257784e-06, "loss": 0.307, "step": 30510 }, { "epoch": 0.5509753789930594, "grad_norm": 0.39100486040115356, "learning_rate": 8.405212441440778e-06, "loss": 0.3175, "step": 30515 }, { "epoch": 0.55106565842596, "grad_norm": 0.2844029366970062, "learning_rate": 8.402412562912839e-06, "loss": 0.1746, "step": 30520 }, { "epoch": 0.5511559378588607, "grad_norm": 0.5369822382926941, "learning_rate": 8.399612812899191e-06, "loss": 0.2315, "step": 30525 }, { "epoch": 0.5512462172917615, "grad_norm": 0.5778539180755615, "learning_rate": 8.396813191625061e-06, "loss": 0.2859, "step": 30530 }, { "epoch": 0.5513364967246622, "grad_norm": 0.42409980297088623, "learning_rate": 8.39401369931565e-06, "loss": 0.2003, "step": 30535 }, { "epoch": 0.5514267761575629, "grad_norm": 0.32716935873031616, "learning_rate": 8.391214336196162e-06, "loss": 0.2488, "step": 30540 }, { "epoch": 0.5515170555904636, "grad_norm": 0.4676063656806946, "learning_rate": 8.38841510249178e-06, "loss": 0.2995, "step": 30545 }, { "epoch": 0.5516073350233643, "grad_norm": 0.8899534940719604, "learning_rate": 8.38561599842769e-06, "loss": 0.2397, "step": 30550 }, { "epoch": 0.5516976144562651, "grad_norm": 0.37093839049339294, "learning_rate": 8.38281702422905e-06, "loss": 0.2477, "step": 30555 }, { "epoch": 0.5517878938891657, "grad_norm": 0.45466116070747375, "learning_rate": 8.38001818012103e-06, "loss": 0.2345, "step": 30560 }, { "epoch": 0.5518781733220665, "grad_norm": 0.3915674388408661, "learning_rate": 8.377219466328763e-06, "loss": 0.1595, "step": 30565 }, { "epoch": 0.5519684527549672, "grad_norm": 0.4925644099712372, "learning_rate": 8.374420883077395e-06, "loss": 0.286, "step": 30570 }, { "epoch": 0.5520587321878679, "grad_norm": 3.358842134475708, "learning_rate": 8.371622430592048e-06, "loss": 0.22, "step": 30575 }, { "epoch": 0.5521490116207686, "grad_norm": 0.38430550694465637, "learning_rate": 8.368824109097833e-06, "loss": 0.2727, "step": 30580 }, { "epoch": 0.5522392910536693, "grad_norm": 0.36558714509010315, "learning_rate": 8.36602591881986e-06, "loss": 0.1963, "step": 30585 }, { "epoch": 0.55232957048657, "grad_norm": 0.6529437899589539, "learning_rate": 8.36322785998322e-06, "loss": 0.2819, "step": 30590 }, { "epoch": 0.5524198499194708, "grad_norm": 0.334749698638916, "learning_rate": 8.360429932813005e-06, "loss": 0.2205, "step": 30595 }, { "epoch": 0.5525101293523714, "grad_norm": 0.6322199702262878, "learning_rate": 8.357632137534275e-06, "loss": 0.1497, "step": 30600 }, { "epoch": 0.5526004087852722, "grad_norm": 0.41495248675346375, "learning_rate": 8.3548344743721e-06, "loss": 0.2921, "step": 30605 }, { "epoch": 0.5526906882181729, "grad_norm": 0.7760505676269531, "learning_rate": 8.352036943551527e-06, "loss": 0.4004, "step": 30610 }, { "epoch": 0.5527809676510737, "grad_norm": 0.4055958688259125, "learning_rate": 8.349239545297605e-06, "loss": 0.2173, "step": 30615 }, { "epoch": 0.5528712470839743, "grad_norm": 0.4020351469516754, "learning_rate": 8.346442279835356e-06, "loss": 0.1487, "step": 30620 }, { "epoch": 0.552961526516875, "grad_norm": 0.45829981565475464, "learning_rate": 8.343645147389807e-06, "loss": 0.3023, "step": 30625 }, { "epoch": 0.5530518059497758, "grad_norm": 0.6017299890518188, "learning_rate": 8.340848148185959e-06, "loss": 0.2992, "step": 30630 }, { "epoch": 0.5531420853826765, "grad_norm": 0.4230525493621826, "learning_rate": 8.338051282448816e-06, "loss": 0.2808, "step": 30635 }, { "epoch": 0.5532323648155771, "grad_norm": 0.5015904307365417, "learning_rate": 8.335254550403361e-06, "loss": 0.2763, "step": 30640 }, { "epoch": 0.5533226442484779, "grad_norm": 0.5678372979164124, "learning_rate": 8.332457952274578e-06, "loss": 0.202, "step": 30645 }, { "epoch": 0.5534129236813786, "grad_norm": 0.3022325038909912, "learning_rate": 8.329661488287425e-06, "loss": 0.1977, "step": 30650 }, { "epoch": 0.5535032031142794, "grad_norm": 0.541505753993988, "learning_rate": 8.326865158666863e-06, "loss": 0.2346, "step": 30655 }, { "epoch": 0.55359348254718, "grad_norm": 0.3254064917564392, "learning_rate": 8.324068963637832e-06, "loss": 0.1988, "step": 30660 }, { "epoch": 0.5536837619800807, "grad_norm": 0.553368091583252, "learning_rate": 8.321272903425273e-06, "loss": 0.2904, "step": 30665 }, { "epoch": 0.5537740414129815, "grad_norm": 0.5332421660423279, "learning_rate": 8.3184769782541e-06, "loss": 0.2829, "step": 30670 }, { "epoch": 0.5538643208458822, "grad_norm": 0.7650421261787415, "learning_rate": 8.315681188349225e-06, "loss": 0.3384, "step": 30675 }, { "epoch": 0.5539546002787828, "grad_norm": 0.4014601409435272, "learning_rate": 8.312885533935555e-06, "loss": 0.2257, "step": 30680 }, { "epoch": 0.5540448797116836, "grad_norm": 0.764105498790741, "learning_rate": 8.310090015237977e-06, "loss": 0.211, "step": 30685 }, { "epoch": 0.5541351591445843, "grad_norm": 0.3718617558479309, "learning_rate": 8.307294632481373e-06, "loss": 0.2753, "step": 30690 }, { "epoch": 0.5542254385774851, "grad_norm": 0.3817625045776367, "learning_rate": 8.304499385890605e-06, "loss": 0.1838, "step": 30695 }, { "epoch": 0.5543157180103857, "grad_norm": 0.3545447587966919, "learning_rate": 8.301704275690535e-06, "loss": 0.226, "step": 30700 }, { "epoch": 0.5544059974432864, "grad_norm": 0.49707674980163574, "learning_rate": 8.298909302106007e-06, "loss": 0.2114, "step": 30705 }, { "epoch": 0.5544962768761872, "grad_norm": 0.4153108298778534, "learning_rate": 8.296114465361861e-06, "loss": 0.2627, "step": 30710 }, { "epoch": 0.5545865563090879, "grad_norm": 0.6245543956756592, "learning_rate": 8.293319765682915e-06, "loss": 0.2159, "step": 30715 }, { "epoch": 0.5546768357419886, "grad_norm": 0.3766345977783203, "learning_rate": 8.290525203293987e-06, "loss": 0.2383, "step": 30720 }, { "epoch": 0.5547671151748893, "grad_norm": 0.4611589312553406, "learning_rate": 8.287730778419874e-06, "loss": 0.2811, "step": 30725 }, { "epoch": 0.55485739460779, "grad_norm": 0.4506969451904297, "learning_rate": 8.284936491285375e-06, "loss": 0.2702, "step": 30730 }, { "epoch": 0.5549476740406908, "grad_norm": 0.510202169418335, "learning_rate": 8.282142342115262e-06, "loss": 0.1874, "step": 30735 }, { "epoch": 0.5550379534735914, "grad_norm": 0.2654961347579956, "learning_rate": 8.279348331134309e-06, "loss": 0.3008, "step": 30740 }, { "epoch": 0.5551282329064922, "grad_norm": 0.4245106875896454, "learning_rate": 8.27655445856727e-06, "loss": 0.1683, "step": 30745 }, { "epoch": 0.5552185123393929, "grad_norm": 0.4185675382614136, "learning_rate": 8.273760724638898e-06, "loss": 0.2506, "step": 30750 }, { "epoch": 0.5553087917722936, "grad_norm": 0.4537559151649475, "learning_rate": 8.270967129573922e-06, "loss": 0.1892, "step": 30755 }, { "epoch": 0.5553990712051943, "grad_norm": 0.8804592490196228, "learning_rate": 8.268173673597072e-06, "loss": 0.2101, "step": 30760 }, { "epoch": 0.555489350638095, "grad_norm": 0.32881176471710205, "learning_rate": 8.265380356933058e-06, "loss": 0.1157, "step": 30765 }, { "epoch": 0.5555796300709958, "grad_norm": 0.529468834400177, "learning_rate": 8.262587179806579e-06, "loss": 0.2144, "step": 30770 }, { "epoch": 0.5556699095038965, "grad_norm": 0.3170679211616516, "learning_rate": 8.259794142442334e-06, "loss": 0.199, "step": 30775 }, { "epoch": 0.5557601889367971, "grad_norm": 0.4670516848564148, "learning_rate": 8.257001245064992e-06, "loss": 0.3112, "step": 30780 }, { "epoch": 0.5558504683696979, "grad_norm": 0.46530792117118835, "learning_rate": 8.254208487899231e-06, "loss": 0.2668, "step": 30785 }, { "epoch": 0.5559407478025986, "grad_norm": 0.47129592299461365, "learning_rate": 8.251415871169701e-06, "loss": 0.1948, "step": 30790 }, { "epoch": 0.5560310272354994, "grad_norm": 0.5364168882369995, "learning_rate": 8.248623395101056e-06, "loss": 0.2282, "step": 30795 }, { "epoch": 0.5561213066684, "grad_norm": 0.483106791973114, "learning_rate": 8.245831059917921e-06, "loss": 0.2011, "step": 30800 }, { "epoch": 0.5562115861013007, "grad_norm": 0.42646482586860657, "learning_rate": 8.243038865844923e-06, "loss": 0.3091, "step": 30805 }, { "epoch": 0.5563018655342015, "grad_norm": 0.49516451358795166, "learning_rate": 8.240246813106674e-06, "loss": 0.2004, "step": 30810 }, { "epoch": 0.5563921449671022, "grad_norm": 0.41743355989456177, "learning_rate": 8.237454901927775e-06, "loss": 0.252, "step": 30815 }, { "epoch": 0.5564824244000028, "grad_norm": 0.46710988879203796, "learning_rate": 8.234663132532812e-06, "loss": 0.2055, "step": 30820 }, { "epoch": 0.5565727038329036, "grad_norm": 0.48925188183784485, "learning_rate": 8.231871505146371e-06, "loss": 0.2268, "step": 30825 }, { "epoch": 0.5566629832658043, "grad_norm": 0.48817548155784607, "learning_rate": 8.229080019993007e-06, "loss": 0.2631, "step": 30830 }, { "epoch": 0.5567532626987051, "grad_norm": 0.40382134914398193, "learning_rate": 8.22628867729728e-06, "loss": 0.1754, "step": 30835 }, { "epoch": 0.5568435421316057, "grad_norm": 0.6790839433670044, "learning_rate": 8.223497477283732e-06, "loss": 0.2099, "step": 30840 }, { "epoch": 0.5569338215645064, "grad_norm": 0.5202447772026062, "learning_rate": 8.2207064201769e-06, "loss": 0.1921, "step": 30845 }, { "epoch": 0.5570241009974072, "grad_norm": 0.364751398563385, "learning_rate": 8.217915506201295e-06, "loss": 0.2326, "step": 30850 }, { "epoch": 0.5571143804303079, "grad_norm": 0.48742225766181946, "learning_rate": 8.215124735581432e-06, "loss": 0.3181, "step": 30855 }, { "epoch": 0.5572046598632086, "grad_norm": 0.5118826031684875, "learning_rate": 8.21233410854181e-06, "loss": 0.2389, "step": 30860 }, { "epoch": 0.5572949392961093, "grad_norm": 0.7876133322715759, "learning_rate": 8.209543625306906e-06, "loss": 0.2736, "step": 30865 }, { "epoch": 0.55738521872901, "grad_norm": 0.533372700214386, "learning_rate": 8.206753286101203e-06, "loss": 0.1628, "step": 30870 }, { "epoch": 0.5574754981619108, "grad_norm": 0.39007723331451416, "learning_rate": 8.203963091149156e-06, "loss": 0.2262, "step": 30875 }, { "epoch": 0.5575657775948114, "grad_norm": 0.14329008758068085, "learning_rate": 8.201173040675223e-06, "loss": 0.2332, "step": 30880 }, { "epoch": 0.5576560570277121, "grad_norm": 0.5678766369819641, "learning_rate": 8.198383134903839e-06, "loss": 0.2012, "step": 30885 }, { "epoch": 0.5577463364606129, "grad_norm": 0.4618380069732666, "learning_rate": 8.195593374059435e-06, "loss": 0.3058, "step": 30890 }, { "epoch": 0.5578366158935136, "grad_norm": 0.48835116624832153, "learning_rate": 8.192803758366421e-06, "loss": 0.2008, "step": 30895 }, { "epoch": 0.5579268953264143, "grad_norm": 0.4542429745197296, "learning_rate": 8.190014288049208e-06, "loss": 0.2599, "step": 30900 }, { "epoch": 0.558017174759315, "grad_norm": 0.5513837933540344, "learning_rate": 8.187224963332184e-06, "loss": 0.2203, "step": 30905 }, { "epoch": 0.5581074541922157, "grad_norm": 0.3412122130393982, "learning_rate": 8.184435784439735e-06, "loss": 0.2659, "step": 30910 }, { "epoch": 0.5581977336251165, "grad_norm": 0.3571067750453949, "learning_rate": 8.181646751596223e-06, "loss": 0.2321, "step": 30915 }, { "epoch": 0.5582880130580172, "grad_norm": 0.7699437737464905, "learning_rate": 8.17885786502601e-06, "loss": 0.2148, "step": 30920 }, { "epoch": 0.5583782924909179, "grad_norm": 0.4913608729839325, "learning_rate": 8.17606912495344e-06, "loss": 0.2209, "step": 30925 }, { "epoch": 0.5584685719238186, "grad_norm": 0.5387104153633118, "learning_rate": 8.173280531602852e-06, "loss": 0.3064, "step": 30930 }, { "epoch": 0.5585588513567193, "grad_norm": 0.4020043909549713, "learning_rate": 8.170492085198555e-06, "loss": 0.2623, "step": 30935 }, { "epoch": 0.5586491307896201, "grad_norm": 0.4475884735584259, "learning_rate": 8.167703785964877e-06, "loss": 0.2165, "step": 30940 }, { "epoch": 0.5587394102225207, "grad_norm": 0.26421064138412476, "learning_rate": 8.164915634126103e-06, "loss": 0.187, "step": 30945 }, { "epoch": 0.5588296896554215, "grad_norm": 0.43203189969062805, "learning_rate": 8.162127629906524e-06, "loss": 0.2383, "step": 30950 }, { "epoch": 0.5589199690883222, "grad_norm": 0.37303704023361206, "learning_rate": 8.159339773530418e-06, "loss": 0.2566, "step": 30955 }, { "epoch": 0.5590102485212229, "grad_norm": 0.3759598135948181, "learning_rate": 8.15655206522204e-06, "loss": 0.2628, "step": 30960 }, { "epoch": 0.5591005279541236, "grad_norm": 0.2845647633075714, "learning_rate": 8.153764505205647e-06, "loss": 0.2133, "step": 30965 }, { "epoch": 0.5591908073870243, "grad_norm": 0.4346495270729065, "learning_rate": 8.150977093705474e-06, "loss": 0.1625, "step": 30970 }, { "epoch": 0.559281086819925, "grad_norm": 0.5033888816833496, "learning_rate": 8.148189830945755e-06, "loss": 0.2136, "step": 30975 }, { "epoch": 0.5593713662528258, "grad_norm": 0.34858712553977966, "learning_rate": 8.145402717150694e-06, "loss": 0.2012, "step": 30980 }, { "epoch": 0.5594616456857264, "grad_norm": 0.4071477949619293, "learning_rate": 8.142615752544504e-06, "loss": 0.2675, "step": 30985 }, { "epoch": 0.5595519251186272, "grad_norm": 0.15057164430618286, "learning_rate": 8.13982893735137e-06, "loss": 0.2205, "step": 30990 }, { "epoch": 0.5596422045515279, "grad_norm": 0.5766903758049011, "learning_rate": 8.137042271795478e-06, "loss": 0.2327, "step": 30995 }, { "epoch": 0.5597324839844287, "grad_norm": 0.3336317837238312, "learning_rate": 8.134255756100983e-06, "loss": 0.207, "step": 31000 }, { "epoch": 0.5598227634173293, "grad_norm": 0.42386963963508606, "learning_rate": 8.131469390492053e-06, "loss": 0.2551, "step": 31005 }, { "epoch": 0.55991304285023, "grad_norm": 0.5276187062263489, "learning_rate": 8.128683175192821e-06, "loss": 0.2054, "step": 31010 }, { "epoch": 0.5600033222831308, "grad_norm": 0.8299673795700073, "learning_rate": 8.125897110427426e-06, "loss": 0.2904, "step": 31015 }, { "epoch": 0.5600936017160315, "grad_norm": 0.49706336855888367, "learning_rate": 8.123111196419976e-06, "loss": 0.2738, "step": 31020 }, { "epoch": 0.5601838811489321, "grad_norm": 0.8560537695884705, "learning_rate": 8.120325433394593e-06, "loss": 0.2854, "step": 31025 }, { "epoch": 0.5602741605818329, "grad_norm": 0.3300696611404419, "learning_rate": 8.117539821575355e-06, "loss": 0.3097, "step": 31030 }, { "epoch": 0.5603644400147336, "grad_norm": 0.31206315755844116, "learning_rate": 8.114754361186355e-06, "loss": 0.2221, "step": 31035 }, { "epoch": 0.5604547194476344, "grad_norm": 0.5207038521766663, "learning_rate": 8.111969052451661e-06, "loss": 0.1785, "step": 31040 }, { "epoch": 0.560544998880535, "grad_norm": 0.3452952802181244, "learning_rate": 8.109183895595325e-06, "loss": 0.2017, "step": 31045 }, { "epoch": 0.5606352783134357, "grad_norm": 0.5093872547149658, "learning_rate": 8.1063988908414e-06, "loss": 0.2114, "step": 31050 }, { "epoch": 0.5607255577463365, "grad_norm": 0.4056823253631592, "learning_rate": 8.103614038413915e-06, "loss": 0.2386, "step": 31055 }, { "epoch": 0.5608158371792372, "grad_norm": 0.4714640974998474, "learning_rate": 8.100829338536895e-06, "loss": 0.1914, "step": 31060 }, { "epoch": 0.5609061166121379, "grad_norm": 0.435139000415802, "learning_rate": 8.098044791434341e-06, "loss": 0.1605, "step": 31065 }, { "epoch": 0.5609963960450386, "grad_norm": 0.9441424012184143, "learning_rate": 8.09526039733026e-06, "loss": 0.1847, "step": 31070 }, { "epoch": 0.5610866754779393, "grad_norm": 0.369119256734848, "learning_rate": 8.092476156448629e-06, "loss": 0.1681, "step": 31075 }, { "epoch": 0.5611769549108401, "grad_norm": 0.44150403141975403, "learning_rate": 8.089692069013424e-06, "loss": 0.2804, "step": 31080 }, { "epoch": 0.5612672343437407, "grad_norm": 0.5297713279724121, "learning_rate": 8.086908135248599e-06, "loss": 0.2043, "step": 31085 }, { "epoch": 0.5613575137766414, "grad_norm": 0.4562094509601593, "learning_rate": 8.08412435537811e-06, "loss": 0.237, "step": 31090 }, { "epoch": 0.5614477932095422, "grad_norm": 0.5448012351989746, "learning_rate": 8.081340729625883e-06, "loss": 0.2525, "step": 31095 }, { "epoch": 0.5615380726424429, "grad_norm": 0.528515636920929, "learning_rate": 8.078557258215847e-06, "loss": 0.3582, "step": 31100 }, { "epoch": 0.5616283520753436, "grad_norm": 0.4745613932609558, "learning_rate": 8.075773941371907e-06, "loss": 0.2392, "step": 31105 }, { "epoch": 0.5617186315082443, "grad_norm": 0.2186175137758255, "learning_rate": 8.072990779317965e-06, "loss": 0.1902, "step": 31110 }, { "epoch": 0.561808910941145, "grad_norm": 0.5137507319450378, "learning_rate": 8.070207772277902e-06, "loss": 0.1964, "step": 31115 }, { "epoch": 0.5618991903740458, "grad_norm": 0.5128169059753418, "learning_rate": 8.067424920475595e-06, "loss": 0.2262, "step": 31120 }, { "epoch": 0.5619894698069464, "grad_norm": 0.33977770805358887, "learning_rate": 8.064642224134898e-06, "loss": 0.278, "step": 31125 }, { "epoch": 0.5620797492398472, "grad_norm": 0.3779820501804352, "learning_rate": 8.061859683479667e-06, "loss": 0.1944, "step": 31130 }, { "epoch": 0.5621700286727479, "grad_norm": 0.5303663611412048, "learning_rate": 8.059077298733734e-06, "loss": 0.2224, "step": 31135 }, { "epoch": 0.5622603081056486, "grad_norm": 0.5823243260383606, "learning_rate": 8.056295070120917e-06, "loss": 0.2282, "step": 31140 }, { "epoch": 0.5623505875385493, "grad_norm": 0.6812359690666199, "learning_rate": 8.053512997865029e-06, "loss": 0.2798, "step": 31145 }, { "epoch": 0.56244086697145, "grad_norm": 0.3564703166484833, "learning_rate": 8.050731082189869e-06, "loss": 0.1607, "step": 31150 }, { "epoch": 0.5625311464043508, "grad_norm": 0.9905957579612732, "learning_rate": 8.047949323319224e-06, "loss": 0.2861, "step": 31155 }, { "epoch": 0.5626214258372515, "grad_norm": 0.4461768567562103, "learning_rate": 8.045167721476859e-06, "loss": 0.349, "step": 31160 }, { "epoch": 0.5627117052701521, "grad_norm": 0.29345831274986267, "learning_rate": 8.042386276886539e-06, "loss": 0.3028, "step": 31165 }, { "epoch": 0.5628019847030529, "grad_norm": 0.283610999584198, "learning_rate": 8.039604989772008e-06, "loss": 0.2405, "step": 31170 }, { "epoch": 0.5628922641359536, "grad_norm": 0.648016631603241, "learning_rate": 8.036823860357006e-06, "loss": 0.1439, "step": 31175 }, { "epoch": 0.5629825435688544, "grad_norm": 0.426203191280365, "learning_rate": 8.034042888865248e-06, "loss": 0.2108, "step": 31180 }, { "epoch": 0.563072823001755, "grad_norm": 0.3136540949344635, "learning_rate": 8.031262075520448e-06, "loss": 0.2479, "step": 31185 }, { "epoch": 0.5631631024346557, "grad_norm": 0.44544175267219543, "learning_rate": 8.028481420546296e-06, "loss": 0.3021, "step": 31190 }, { "epoch": 0.5632533818675565, "grad_norm": 0.4027963876724243, "learning_rate": 8.025700924166485e-06, "loss": 0.183, "step": 31195 }, { "epoch": 0.5633436613004572, "grad_norm": 0.5416713356971741, "learning_rate": 8.022920586604672e-06, "loss": 0.2933, "step": 31200 }, { "epoch": 0.5634339407333578, "grad_norm": 0.74769526720047, "learning_rate": 8.020140408084532e-06, "loss": 0.1967, "step": 31205 }, { "epoch": 0.5635242201662586, "grad_norm": 0.43441691994667053, "learning_rate": 8.017360388829695e-06, "loss": 0.1863, "step": 31210 }, { "epoch": 0.5636144995991593, "grad_norm": 0.38378357887268066, "learning_rate": 8.014580529063802e-06, "loss": 0.2372, "step": 31215 }, { "epoch": 0.5637047790320601, "grad_norm": 0.321071594953537, "learning_rate": 8.011800829010471e-06, "loss": 0.2101, "step": 31220 }, { "epoch": 0.5637950584649607, "grad_norm": 0.2700953483581543, "learning_rate": 8.009021288893306e-06, "loss": 0.2174, "step": 31225 }, { "epoch": 0.5638853378978614, "grad_norm": 0.5070574879646301, "learning_rate": 8.006241908935904e-06, "loss": 0.2694, "step": 31230 }, { "epoch": 0.5639756173307622, "grad_norm": 0.2650270462036133, "learning_rate": 8.003462689361842e-06, "loss": 0.2226, "step": 31235 }, { "epoch": 0.5640658967636629, "grad_norm": 0.4303354322910309, "learning_rate": 8.000683630394694e-06, "loss": 0.2199, "step": 31240 }, { "epoch": 0.5641561761965636, "grad_norm": 0.4189873933792114, "learning_rate": 7.997904732258007e-06, "loss": 0.1785, "step": 31245 }, { "epoch": 0.5642464556294643, "grad_norm": 0.3254397511482239, "learning_rate": 7.995125995175331e-06, "loss": 0.3304, "step": 31250 }, { "epoch": 0.564336735062365, "grad_norm": 0.48033246397972107, "learning_rate": 7.99234741937019e-06, "loss": 0.2742, "step": 31255 }, { "epoch": 0.5644270144952658, "grad_norm": 0.5633978843688965, "learning_rate": 7.989569005066105e-06, "loss": 0.1938, "step": 31260 }, { "epoch": 0.5645172939281664, "grad_norm": 0.4215300381183624, "learning_rate": 7.986790752486571e-06, "loss": 0.2811, "step": 31265 }, { "epoch": 0.5646075733610671, "grad_norm": 0.49446821212768555, "learning_rate": 7.984012661855088e-06, "loss": 0.2229, "step": 31270 }, { "epoch": 0.5646978527939679, "grad_norm": 0.5683329701423645, "learning_rate": 7.981234733395126e-06, "loss": 0.3294, "step": 31275 }, { "epoch": 0.5647881322268686, "grad_norm": 0.3849588930606842, "learning_rate": 7.978456967330153e-06, "loss": 0.2557, "step": 31280 }, { "epoch": 0.5648784116597693, "grad_norm": 0.6493882536888123, "learning_rate": 7.975679363883617e-06, "loss": 0.2778, "step": 31285 }, { "epoch": 0.56496869109267, "grad_norm": 0.372500479221344, "learning_rate": 7.972901923278964e-06, "loss": 0.1883, "step": 31290 }, { "epoch": 0.5650589705255707, "grad_norm": 0.6053361296653748, "learning_rate": 7.970124645739607e-06, "loss": 0.2206, "step": 31295 }, { "epoch": 0.5651492499584715, "grad_norm": 0.533922016620636, "learning_rate": 7.967347531488967e-06, "loss": 0.3048, "step": 31300 }, { "epoch": 0.5652395293913721, "grad_norm": 0.7627965211868286, "learning_rate": 7.964570580750435e-06, "loss": 0.2308, "step": 31305 }, { "epoch": 0.5653298088242729, "grad_norm": 0.40491706132888794, "learning_rate": 7.961793793747407e-06, "loss": 0.2021, "step": 31310 }, { "epoch": 0.5654200882571736, "grad_norm": 0.258077472448349, "learning_rate": 7.959017170703247e-06, "loss": 0.2252, "step": 31315 }, { "epoch": 0.5655103676900743, "grad_norm": 0.5910612940788269, "learning_rate": 7.956240711841311e-06, "loss": 0.3261, "step": 31320 }, { "epoch": 0.5656006471229751, "grad_norm": 0.22161012887954712, "learning_rate": 7.953464417384959e-06, "loss": 0.2188, "step": 31325 }, { "epoch": 0.5656909265558757, "grad_norm": 0.20220966637134552, "learning_rate": 7.950688287557506e-06, "loss": 0.2747, "step": 31330 }, { "epoch": 0.5657812059887765, "grad_norm": 0.40949514508247375, "learning_rate": 7.947912322582288e-06, "loss": 0.1938, "step": 31335 }, { "epoch": 0.5658714854216772, "grad_norm": 0.718718945980072, "learning_rate": 7.945136522682596e-06, "loss": 0.1716, "step": 31340 }, { "epoch": 0.5659617648545779, "grad_norm": 0.5266116857528687, "learning_rate": 7.942360888081735e-06, "loss": 0.3613, "step": 31345 }, { "epoch": 0.5660520442874786, "grad_norm": 0.4842294454574585, "learning_rate": 7.939585419002979e-06, "loss": 0.2993, "step": 31350 }, { "epoch": 0.5661423237203793, "grad_norm": 0.571361780166626, "learning_rate": 7.936810115669597e-06, "loss": 0.2093, "step": 31355 }, { "epoch": 0.5662326031532801, "grad_norm": 0.5041170120239258, "learning_rate": 7.934034978304838e-06, "loss": 0.2402, "step": 31360 }, { "epoch": 0.5663228825861808, "grad_norm": 0.4833357036113739, "learning_rate": 7.931260007131947e-06, "loss": 0.2389, "step": 31365 }, { "epoch": 0.5664131620190814, "grad_norm": 0.34955894947052, "learning_rate": 7.928485202374143e-06, "loss": 0.2534, "step": 31370 }, { "epoch": 0.5665034414519822, "grad_norm": 0.4219762682914734, "learning_rate": 7.925710564254649e-06, "loss": 0.3005, "step": 31375 }, { "epoch": 0.5665937208848829, "grad_norm": 0.6094689965248108, "learning_rate": 7.922936092996652e-06, "loss": 0.2565, "step": 31380 }, { "epoch": 0.5666840003177837, "grad_norm": 0.30353668332099915, "learning_rate": 7.920161788823348e-06, "loss": 0.2227, "step": 31385 }, { "epoch": 0.5667742797506843, "grad_norm": 0.4556208550930023, "learning_rate": 7.917387651957904e-06, "loss": 0.1577, "step": 31390 }, { "epoch": 0.566864559183585, "grad_norm": 0.29068723320961, "learning_rate": 7.914613682623484e-06, "loss": 0.2559, "step": 31395 }, { "epoch": 0.5669548386164858, "grad_norm": 0.4912876784801483, "learning_rate": 7.911839881043229e-06, "loss": 0.1858, "step": 31400 }, { "epoch": 0.5670451180493865, "grad_norm": 2.5178728103637695, "learning_rate": 7.909066247440276e-06, "loss": 0.2582, "step": 31405 }, { "epoch": 0.5671353974822871, "grad_norm": 0.3810131549835205, "learning_rate": 7.906292782037742e-06, "loss": 0.1843, "step": 31410 }, { "epoch": 0.5672256769151879, "grad_norm": 0.30515459179878235, "learning_rate": 7.903519485058725e-06, "loss": 0.299, "step": 31415 }, { "epoch": 0.5673159563480886, "grad_norm": 0.5835896730422974, "learning_rate": 7.90074635672633e-06, "loss": 0.2202, "step": 31420 }, { "epoch": 0.5674062357809894, "grad_norm": 0.6488636136054993, "learning_rate": 7.897973397263622e-06, "loss": 0.307, "step": 31425 }, { "epoch": 0.56749651521389, "grad_norm": 0.6058459877967834, "learning_rate": 7.89520060689367e-06, "loss": 0.2541, "step": 31430 }, { "epoch": 0.5675867946467907, "grad_norm": 0.4164639711380005, "learning_rate": 7.892427985839528e-06, "loss": 0.228, "step": 31435 }, { "epoch": 0.5676770740796915, "grad_norm": 0.4838501214981079, "learning_rate": 7.889655534324233e-06, "loss": 0.203, "step": 31440 }, { "epoch": 0.5677673535125922, "grad_norm": 0.6691818237304688, "learning_rate": 7.886883252570802e-06, "loss": 0.253, "step": 31445 }, { "epoch": 0.5678576329454929, "grad_norm": 0.18681277334690094, "learning_rate": 7.884111140802253e-06, "loss": 0.1815, "step": 31450 }, { "epoch": 0.5679479123783936, "grad_norm": 0.2960595488548279, "learning_rate": 7.881339199241577e-06, "loss": 0.1957, "step": 31455 }, { "epoch": 0.5680381918112943, "grad_norm": 0.18710044026374817, "learning_rate": 7.87856742811176e-06, "loss": 0.1687, "step": 31460 }, { "epoch": 0.5681284712441951, "grad_norm": 0.5640111565589905, "learning_rate": 7.875795827635765e-06, "loss": 0.2155, "step": 31465 }, { "epoch": 0.5682187506770957, "grad_norm": 0.31908056139945984, "learning_rate": 7.873024398036558e-06, "loss": 0.2998, "step": 31470 }, { "epoch": 0.5683090301099964, "grad_norm": 0.5567411780357361, "learning_rate": 7.870253139537068e-06, "loss": 0.2355, "step": 31475 }, { "epoch": 0.5683993095428972, "grad_norm": 0.582668662071228, "learning_rate": 7.86748205236023e-06, "loss": 0.2492, "step": 31480 }, { "epoch": 0.5684895889757979, "grad_norm": 0.3465553820133209, "learning_rate": 7.864711136728956e-06, "loss": 0.2323, "step": 31485 }, { "epoch": 0.5685798684086986, "grad_norm": 0.5908859372138977, "learning_rate": 7.861940392866149e-06, "loss": 0.2537, "step": 31490 }, { "epoch": 0.5686701478415993, "grad_norm": 0.2922111749649048, "learning_rate": 7.859169820994688e-06, "loss": 0.2066, "step": 31495 }, { "epoch": 0.5687604272745, "grad_norm": 0.44060826301574707, "learning_rate": 7.856399421337451e-06, "loss": 0.3348, "step": 31500 }, { "epoch": 0.5688507067074008, "grad_norm": 0.2694040536880493, "learning_rate": 7.853629194117297e-06, "loss": 0.167, "step": 31505 }, { "epoch": 0.5689409861403014, "grad_norm": 0.3127096891403198, "learning_rate": 7.850859139557066e-06, "loss": 0.1711, "step": 31510 }, { "epoch": 0.5690312655732022, "grad_norm": 0.3559912443161011, "learning_rate": 7.848089257879593e-06, "loss": 0.1844, "step": 31515 }, { "epoch": 0.5691215450061029, "grad_norm": 0.34979164600372314, "learning_rate": 7.84531954930769e-06, "loss": 0.2305, "step": 31520 }, { "epoch": 0.5692118244390036, "grad_norm": 0.8576284050941467, "learning_rate": 7.842550014064166e-06, "loss": 0.2346, "step": 31525 }, { "epoch": 0.5693021038719043, "grad_norm": 0.34732529520988464, "learning_rate": 7.839780652371804e-06, "loss": 0.1926, "step": 31530 }, { "epoch": 0.569392383304805, "grad_norm": 0.3402683734893799, "learning_rate": 7.837011464453388e-06, "loss": 0.1806, "step": 31535 }, { "epoch": 0.5694826627377058, "grad_norm": 0.4833114445209503, "learning_rate": 7.83424245053167e-06, "loss": 0.329, "step": 31540 }, { "epoch": 0.5695729421706065, "grad_norm": 0.35603630542755127, "learning_rate": 7.8314736108294e-06, "loss": 0.1452, "step": 31545 }, { "epoch": 0.5696632216035071, "grad_norm": 0.5631104707717896, "learning_rate": 7.82870494556931e-06, "loss": 0.198, "step": 31550 }, { "epoch": 0.5697535010364079, "grad_norm": 0.36028844118118286, "learning_rate": 7.825936454974126e-06, "loss": 0.3105, "step": 31555 }, { "epoch": 0.5698437804693086, "grad_norm": 0.4279186725616455, "learning_rate": 7.823168139266542e-06, "loss": 0.1424, "step": 31560 }, { "epoch": 0.5699340599022094, "grad_norm": 0.3231222331523895, "learning_rate": 7.820399998669254e-06, "loss": 0.3305, "step": 31565 }, { "epoch": 0.57002433933511, "grad_norm": 0.4611665904521942, "learning_rate": 7.81763203340494e-06, "loss": 0.1877, "step": 31570 }, { "epoch": 0.5701146187680107, "grad_norm": 0.2889973223209381, "learning_rate": 7.814864243696265e-06, "loss": 0.2433, "step": 31575 }, { "epoch": 0.5702048982009115, "grad_norm": 0.38997986912727356, "learning_rate": 7.812096629765869e-06, "loss": 0.2144, "step": 31580 }, { "epoch": 0.5702951776338122, "grad_norm": 0.4267489016056061, "learning_rate": 7.809329191836394e-06, "loss": 0.2195, "step": 31585 }, { "epoch": 0.5703854570667128, "grad_norm": 0.123191699385643, "learning_rate": 7.806561930130458e-06, "loss": 0.2302, "step": 31590 }, { "epoch": 0.5704757364996136, "grad_norm": 0.5885843634605408, "learning_rate": 7.803794844870665e-06, "loss": 0.2811, "step": 31595 }, { "epoch": 0.5705660159325143, "grad_norm": 0.4821052849292755, "learning_rate": 7.80102793627961e-06, "loss": 0.3085, "step": 31600 }, { "epoch": 0.5706562953654151, "grad_norm": 0.4820989668369293, "learning_rate": 7.79826120457987e-06, "loss": 0.2794, "step": 31605 }, { "epoch": 0.5707465747983157, "grad_norm": 0.38792115449905396, "learning_rate": 7.795494649994007e-06, "loss": 0.1767, "step": 31610 }, { "epoch": 0.5708368542312164, "grad_norm": 0.3137624263763428, "learning_rate": 7.79272827274457e-06, "loss": 0.2751, "step": 31615 }, { "epoch": 0.5709271336641172, "grad_norm": 0.3731110692024231, "learning_rate": 7.7899620730541e-06, "loss": 0.2167, "step": 31620 }, { "epoch": 0.5710174130970179, "grad_norm": 0.33237844705581665, "learning_rate": 7.787196051145108e-06, "loss": 0.1972, "step": 31625 }, { "epoch": 0.5711076925299186, "grad_norm": 0.49587783217430115, "learning_rate": 7.784430207240105e-06, "loss": 0.293, "step": 31630 }, { "epoch": 0.5711979719628193, "grad_norm": 0.5755000710487366, "learning_rate": 7.781664541561584e-06, "loss": 0.2391, "step": 31635 }, { "epoch": 0.57128825139572, "grad_norm": 0.3513915538787842, "learning_rate": 7.778899054332024e-06, "loss": 0.2511, "step": 31640 }, { "epoch": 0.5713785308286208, "grad_norm": 0.39008522033691406, "learning_rate": 7.776133745773882e-06, "loss": 0.2076, "step": 31645 }, { "epoch": 0.5714688102615214, "grad_norm": 0.45953187346458435, "learning_rate": 7.773368616109616e-06, "loss": 0.2583, "step": 31650 }, { "epoch": 0.5715590896944222, "grad_norm": 0.24481430649757385, "learning_rate": 7.77060366556165e-06, "loss": 0.2592, "step": 31655 }, { "epoch": 0.5716493691273229, "grad_norm": 0.13567975163459778, "learning_rate": 7.767838894352413e-06, "loss": 0.2453, "step": 31660 }, { "epoch": 0.5717396485602236, "grad_norm": 0.5409976840019226, "learning_rate": 7.765074302704307e-06, "loss": 0.2279, "step": 31665 }, { "epoch": 0.5718299279931243, "grad_norm": 0.4621885120868683, "learning_rate": 7.762309890839726e-06, "loss": 0.2845, "step": 31670 }, { "epoch": 0.571920207426025, "grad_norm": 0.3942146897315979, "learning_rate": 7.75954565898104e-06, "loss": 0.2943, "step": 31675 }, { "epoch": 0.5720104868589257, "grad_norm": 0.6451921463012695, "learning_rate": 7.75678160735062e-06, "loss": 0.2337, "step": 31680 }, { "epoch": 0.5721007662918265, "grad_norm": 0.422530859708786, "learning_rate": 7.754017736170812e-06, "loss": 0.2573, "step": 31685 }, { "epoch": 0.5721910457247271, "grad_norm": 0.36159053444862366, "learning_rate": 7.751254045663943e-06, "loss": 0.2493, "step": 31690 }, { "epoch": 0.5722813251576279, "grad_norm": 0.4430783987045288, "learning_rate": 7.748490536052337e-06, "loss": 0.3274, "step": 31695 }, { "epoch": 0.5723716045905286, "grad_norm": 0.8427174091339111, "learning_rate": 7.745727207558297e-06, "loss": 0.2285, "step": 31700 }, { "epoch": 0.5724618840234293, "grad_norm": 0.6411793231964111, "learning_rate": 7.742964060404117e-06, "loss": 0.2674, "step": 31705 }, { "epoch": 0.5725521634563301, "grad_norm": 0.5644553303718567, "learning_rate": 7.740201094812062e-06, "loss": 0.2603, "step": 31710 }, { "epoch": 0.5726424428892307, "grad_norm": 0.3624364137649536, "learning_rate": 7.737438311004402e-06, "loss": 0.2745, "step": 31715 }, { "epoch": 0.5727327223221315, "grad_norm": 0.46105074882507324, "learning_rate": 7.734675709203377e-06, "loss": 0.3357, "step": 31720 }, { "epoch": 0.5728230017550322, "grad_norm": 0.43723955750465393, "learning_rate": 7.731913289631224e-06, "loss": 0.2978, "step": 31725 }, { "epoch": 0.5729132811879329, "grad_norm": 0.5800166726112366, "learning_rate": 7.729151052510152e-06, "loss": 0.1662, "step": 31730 }, { "epoch": 0.5730035606208336, "grad_norm": 0.2920645773410797, "learning_rate": 7.726388998062372e-06, "loss": 0.2246, "step": 31735 }, { "epoch": 0.5730938400537343, "grad_norm": 0.6474556922912598, "learning_rate": 7.723627126510063e-06, "loss": 0.2873, "step": 31740 }, { "epoch": 0.5731841194866351, "grad_norm": 0.3297922909259796, "learning_rate": 7.720865438075401e-06, "loss": 0.1754, "step": 31745 }, { "epoch": 0.5732743989195358, "grad_norm": 0.5210285782814026, "learning_rate": 7.718103932980545e-06, "loss": 0.2546, "step": 31750 }, { "epoch": 0.5733646783524364, "grad_norm": 0.47570544481277466, "learning_rate": 7.715342611447638e-06, "loss": 0.2637, "step": 31755 }, { "epoch": 0.5734549577853372, "grad_norm": 0.5185160040855408, "learning_rate": 7.712581473698804e-06, "loss": 0.2799, "step": 31760 }, { "epoch": 0.5735452372182379, "grad_norm": 0.340856671333313, "learning_rate": 7.709820519956162e-06, "loss": 0.2269, "step": 31765 }, { "epoch": 0.5736355166511387, "grad_norm": 0.3276570737361908, "learning_rate": 7.707059750441803e-06, "loss": 0.2887, "step": 31770 }, { "epoch": 0.5737257960840393, "grad_norm": 0.26869645714759827, "learning_rate": 7.704299165377822e-06, "loss": 0.264, "step": 31775 }, { "epoch": 0.57381607551694, "grad_norm": 0.4268071949481964, "learning_rate": 7.701538764986281e-06, "loss": 0.242, "step": 31780 }, { "epoch": 0.5739063549498408, "grad_norm": 0.722257137298584, "learning_rate": 7.69877854948923e-06, "loss": 0.2949, "step": 31785 }, { "epoch": 0.5739966343827415, "grad_norm": 0.4261210858821869, "learning_rate": 7.696018519108717e-06, "loss": 0.2736, "step": 31790 }, { "epoch": 0.5740869138156421, "grad_norm": 0.4993356168270111, "learning_rate": 7.69325867406676e-06, "loss": 0.3129, "step": 31795 }, { "epoch": 0.5741771932485429, "grad_norm": 0.5304279923439026, "learning_rate": 7.690499014585375e-06, "loss": 0.2423, "step": 31800 }, { "epoch": 0.5742674726814436, "grad_norm": 0.48582887649536133, "learning_rate": 7.687739540886549e-06, "loss": 0.2367, "step": 31805 }, { "epoch": 0.5743577521143444, "grad_norm": 0.4940268397331238, "learning_rate": 7.684980253192267e-06, "loss": 0.3923, "step": 31810 }, { "epoch": 0.574448031547245, "grad_norm": 0.32182779908180237, "learning_rate": 7.682221151724489e-06, "loss": 0.1695, "step": 31815 }, { "epoch": 0.5745383109801457, "grad_norm": 0.3178272247314453, "learning_rate": 7.679462236705172e-06, "loss": 0.3104, "step": 31820 }, { "epoch": 0.5746285904130465, "grad_norm": 0.24268971383571625, "learning_rate": 7.676703508356243e-06, "loss": 0.2052, "step": 31825 }, { "epoch": 0.5747188698459472, "grad_norm": 0.4058937430381775, "learning_rate": 7.673944966899625e-06, "loss": 0.1739, "step": 31830 }, { "epoch": 0.5748091492788479, "grad_norm": 0.5511011481285095, "learning_rate": 7.671186612557223e-06, "loss": 0.2352, "step": 31835 }, { "epoch": 0.5748994287117486, "grad_norm": 0.37227874994277954, "learning_rate": 7.668428445550929e-06, "loss": 0.2458, "step": 31840 }, { "epoch": 0.5749897081446493, "grad_norm": 0.36259961128234863, "learning_rate": 7.665670466102607e-06, "loss": 0.2716, "step": 31845 }, { "epoch": 0.5750799875775501, "grad_norm": 0.4528560936450958, "learning_rate": 7.66291267443413e-06, "loss": 0.2813, "step": 31850 }, { "epoch": 0.5751702670104507, "grad_norm": 0.5381774306297302, "learning_rate": 7.660155070767333e-06, "loss": 0.1767, "step": 31855 }, { "epoch": 0.5752605464433515, "grad_norm": 0.382550448179245, "learning_rate": 7.657397655324049e-06, "loss": 0.2337, "step": 31860 }, { "epoch": 0.5753508258762522, "grad_norm": 0.32369717955589294, "learning_rate": 7.654640428326095e-06, "loss": 0.2262, "step": 31865 }, { "epoch": 0.5754411053091529, "grad_norm": 0.3207179307937622, "learning_rate": 7.65188338999526e-06, "loss": 0.2847, "step": 31870 }, { "epoch": 0.5755313847420536, "grad_norm": 0.43713873624801636, "learning_rate": 7.649126540553337e-06, "loss": 0.2279, "step": 31875 }, { "epoch": 0.5756216641749543, "grad_norm": 0.9669202566146851, "learning_rate": 7.64636988022209e-06, "loss": 0.2079, "step": 31880 }, { "epoch": 0.575711943607855, "grad_norm": 0.38378670811653137, "learning_rate": 7.643613409223277e-06, "loss": 0.2033, "step": 31885 }, { "epoch": 0.5758022230407558, "grad_norm": 0.5948004126548767, "learning_rate": 7.640857127778627e-06, "loss": 0.2914, "step": 31890 }, { "epoch": 0.5758925024736564, "grad_norm": 0.4891936779022217, "learning_rate": 7.638101036109873e-06, "loss": 0.2132, "step": 31895 }, { "epoch": 0.5759827819065572, "grad_norm": 0.2774111330509186, "learning_rate": 7.635345134438716e-06, "loss": 0.2747, "step": 31900 }, { "epoch": 0.5760730613394579, "grad_norm": 0.4318869411945343, "learning_rate": 7.632589422986853e-06, "loss": 0.226, "step": 31905 }, { "epoch": 0.5761633407723586, "grad_norm": 0.3877994120121002, "learning_rate": 7.62983390197595e-06, "loss": 0.2415, "step": 31910 }, { "epoch": 0.5762536202052593, "grad_norm": 0.6382290124893188, "learning_rate": 7.6270785716276875e-06, "loss": 0.2094, "step": 31915 }, { "epoch": 0.57634389963816, "grad_norm": 0.6943665146827698, "learning_rate": 7.624323432163694e-06, "loss": 0.2478, "step": 31920 }, { "epoch": 0.5764341790710608, "grad_norm": 0.3250424265861511, "learning_rate": 7.621568483805611e-06, "loss": 0.2835, "step": 31925 }, { "epoch": 0.5765244585039615, "grad_norm": 0.36512619256973267, "learning_rate": 7.618813726775048e-06, "loss": 0.2425, "step": 31930 }, { "epoch": 0.5766147379368621, "grad_norm": 0.454352468252182, "learning_rate": 7.6160591612936115e-06, "loss": 0.2637, "step": 31935 }, { "epoch": 0.5767050173697629, "grad_norm": 0.30730774998664856, "learning_rate": 7.613304787582879e-06, "loss": 0.3078, "step": 31940 }, { "epoch": 0.5767952968026636, "grad_norm": 0.3757621943950653, "learning_rate": 7.610550605864425e-06, "loss": 0.2881, "step": 31945 }, { "epoch": 0.5768855762355644, "grad_norm": 0.29341045022010803, "learning_rate": 7.607796616359799e-06, "loss": 0.2679, "step": 31950 }, { "epoch": 0.576975855668465, "grad_norm": 0.3899611830711365, "learning_rate": 7.605042819290549e-06, "loss": 0.2254, "step": 31955 }, { "epoch": 0.5770661351013657, "grad_norm": 0.6977159976959229, "learning_rate": 7.602289214878187e-06, "loss": 0.3099, "step": 31960 }, { "epoch": 0.5771564145342665, "grad_norm": 0.43100249767303467, "learning_rate": 7.5995358033442225e-06, "loss": 0.2899, "step": 31965 }, { "epoch": 0.5772466939671672, "grad_norm": 0.35019803047180176, "learning_rate": 7.596782584910154e-06, "loss": 0.1998, "step": 31970 }, { "epoch": 0.5773369734000678, "grad_norm": 0.3060927987098694, "learning_rate": 7.5940295597974474e-06, "loss": 0.2194, "step": 31975 }, { "epoch": 0.5774272528329686, "grad_norm": 0.3989834487438202, "learning_rate": 7.591276728227576e-06, "loss": 0.2037, "step": 31980 }, { "epoch": 0.5775175322658693, "grad_norm": 0.3755948543548584, "learning_rate": 7.588524090421973e-06, "loss": 0.2055, "step": 31985 }, { "epoch": 0.5776078116987701, "grad_norm": 0.34243911504745483, "learning_rate": 7.585771646602078e-06, "loss": 0.2308, "step": 31990 }, { "epoch": 0.5776980911316707, "grad_norm": 0.3287220895290375, "learning_rate": 7.583019396989299e-06, "loss": 0.2884, "step": 31995 }, { "epoch": 0.5777883705645714, "grad_norm": 0.7058871388435364, "learning_rate": 7.580267341805039e-06, "loss": 0.2626, "step": 32000 }, { "epoch": 0.5778786499974722, "grad_norm": 0.41973498463630676, "learning_rate": 7.577515481270675e-06, "loss": 0.2699, "step": 32005 }, { "epoch": 0.5779689294303729, "grad_norm": 0.3011660873889923, "learning_rate": 7.574763815607579e-06, "loss": 0.1988, "step": 32010 }, { "epoch": 0.5780592088632736, "grad_norm": 0.3949172794818878, "learning_rate": 7.572012345037098e-06, "loss": 0.2731, "step": 32015 }, { "epoch": 0.5781494882961743, "grad_norm": 0.3967495858669281, "learning_rate": 7.569261069780577e-06, "loss": 0.23, "step": 32020 }, { "epoch": 0.578239767729075, "grad_norm": 0.390185683965683, "learning_rate": 7.566509990059323e-06, "loss": 0.197, "step": 32025 }, { "epoch": 0.5783300471619758, "grad_norm": 0.5422402620315552, "learning_rate": 7.563759106094652e-06, "loss": 0.1938, "step": 32030 }, { "epoch": 0.5784203265948764, "grad_norm": 0.44239312410354614, "learning_rate": 7.561008418107843e-06, "loss": 0.2468, "step": 32035 }, { "epoch": 0.5785106060277772, "grad_norm": 0.6479213833808899, "learning_rate": 7.558257926320177e-06, "loss": 0.1806, "step": 32040 }, { "epoch": 0.5786008854606779, "grad_norm": 0.3620591163635254, "learning_rate": 7.555507630952905e-06, "loss": 0.2389, "step": 32045 }, { "epoch": 0.5786911648935786, "grad_norm": 0.3467635214328766, "learning_rate": 7.552757532227276e-06, "loss": 0.2576, "step": 32050 }, { "epoch": 0.5787814443264793, "grad_norm": 0.7262997031211853, "learning_rate": 7.5500076303645076e-06, "loss": 0.3137, "step": 32055 }, { "epoch": 0.57887172375938, "grad_norm": 0.4510249197483063, "learning_rate": 7.54725792558581e-06, "loss": 0.1948, "step": 32060 }, { "epoch": 0.5789620031922807, "grad_norm": 0.4553157687187195, "learning_rate": 7.544508418112384e-06, "loss": 0.2515, "step": 32065 }, { "epoch": 0.5790522826251815, "grad_norm": 0.38665473461151123, "learning_rate": 7.541759108165397e-06, "loss": 0.2272, "step": 32070 }, { "epoch": 0.5791425620580821, "grad_norm": 0.4248069226741791, "learning_rate": 7.53900999596602e-06, "loss": 0.2566, "step": 32075 }, { "epoch": 0.5792328414909829, "grad_norm": 0.4079587757587433, "learning_rate": 7.536261081735393e-06, "loss": 0.2314, "step": 32080 }, { "epoch": 0.5793231209238836, "grad_norm": 0.5409365892410278, "learning_rate": 7.533512365694654e-06, "loss": 0.2204, "step": 32085 }, { "epoch": 0.5794134003567843, "grad_norm": 0.3574941158294678, "learning_rate": 7.530763848064907e-06, "loss": 0.1702, "step": 32090 }, { "epoch": 0.579503679789685, "grad_norm": 0.32814323902130127, "learning_rate": 7.5280155290672575e-06, "loss": 0.183, "step": 32095 }, { "epoch": 0.5795939592225857, "grad_norm": 0.5189509391784668, "learning_rate": 7.525267408922784e-06, "loss": 0.1933, "step": 32100 }, { "epoch": 0.5796842386554865, "grad_norm": 0.5349361300468445, "learning_rate": 7.522519487852558e-06, "loss": 0.2453, "step": 32105 }, { "epoch": 0.5797745180883872, "grad_norm": 0.6736632585525513, "learning_rate": 7.519771766077619e-06, "loss": 0.1977, "step": 32110 }, { "epoch": 0.579864797521288, "grad_norm": 0.2429421991109848, "learning_rate": 7.517024243819017e-06, "loss": 0.1993, "step": 32115 }, { "epoch": 0.5799550769541886, "grad_norm": 0.3097635805606842, "learning_rate": 7.5142769212977565e-06, "loss": 0.1585, "step": 32120 }, { "epoch": 0.5800453563870893, "grad_norm": 0.30012813210487366, "learning_rate": 7.511529798734847e-06, "loss": 0.2413, "step": 32125 }, { "epoch": 0.5801356358199901, "grad_norm": 0.4978336691856384, "learning_rate": 7.508782876351268e-06, "loss": 0.2074, "step": 32130 }, { "epoch": 0.5802259152528908, "grad_norm": 0.287640243768692, "learning_rate": 7.5060361543680005e-06, "loss": 0.2102, "step": 32135 }, { "epoch": 0.5803161946857914, "grad_norm": 0.33799034357070923, "learning_rate": 7.503289633005988e-06, "loss": 0.2385, "step": 32140 }, { "epoch": 0.5804064741186922, "grad_norm": 0.4288344979286194, "learning_rate": 7.500543312486171e-06, "loss": 0.3285, "step": 32145 }, { "epoch": 0.5804967535515929, "grad_norm": 0.4685920178890228, "learning_rate": 7.497797193029475e-06, "loss": 0.2195, "step": 32150 }, { "epoch": 0.5805870329844937, "grad_norm": 0.3669572174549103, "learning_rate": 7.495051274856797e-06, "loss": 0.2371, "step": 32155 }, { "epoch": 0.5806773124173943, "grad_norm": 0.28180867433547974, "learning_rate": 7.492305558189034e-06, "loss": 0.1351, "step": 32160 }, { "epoch": 0.580767591850295, "grad_norm": 0.33606743812561035, "learning_rate": 7.489560043247055e-06, "loss": 0.2492, "step": 32165 }, { "epoch": 0.5808578712831958, "grad_norm": 0.2527913451194763, "learning_rate": 7.486814730251721e-06, "loss": 0.236, "step": 32170 }, { "epoch": 0.5809481507160965, "grad_norm": 0.6620507836341858, "learning_rate": 7.484069619423864e-06, "loss": 0.2081, "step": 32175 }, { "epoch": 0.5810384301489971, "grad_norm": 0.3179631233215332, "learning_rate": 7.481324710984319e-06, "loss": 0.1368, "step": 32180 }, { "epoch": 0.5811287095818979, "grad_norm": 0.49275219440460205, "learning_rate": 7.478580005153884e-06, "loss": 0.2072, "step": 32185 }, { "epoch": 0.5812189890147986, "grad_norm": 0.6345303654670715, "learning_rate": 7.475835502153358e-06, "loss": 0.2794, "step": 32190 }, { "epoch": 0.5813092684476994, "grad_norm": 0.32289382815361023, "learning_rate": 7.47309120220351e-06, "loss": 0.2749, "step": 32195 }, { "epoch": 0.5813995478806, "grad_norm": 0.47582992911338806, "learning_rate": 7.470347105525108e-06, "loss": 0.1932, "step": 32200 }, { "epoch": 0.5814898273135007, "grad_norm": 0.6040963530540466, "learning_rate": 7.467603212338884e-06, "loss": 0.2711, "step": 32205 }, { "epoch": 0.5815801067464015, "grad_norm": 0.3335484564304352, "learning_rate": 7.464859522865572e-06, "loss": 0.2152, "step": 32210 }, { "epoch": 0.5816703861793022, "grad_norm": 0.6475858688354492, "learning_rate": 7.462116037325877e-06, "loss": 0.2821, "step": 32215 }, { "epoch": 0.5817606656122029, "grad_norm": 0.325191855430603, "learning_rate": 7.4593727559404984e-06, "loss": 0.2612, "step": 32220 }, { "epoch": 0.5818509450451036, "grad_norm": 0.5029043555259705, "learning_rate": 7.456629678930105e-06, "loss": 0.2514, "step": 32225 }, { "epoch": 0.5819412244780043, "grad_norm": 0.5356692671775818, "learning_rate": 7.453886806515365e-06, "loss": 0.2775, "step": 32230 }, { "epoch": 0.5820315039109051, "grad_norm": 0.41439715027809143, "learning_rate": 7.45114413891692e-06, "loss": 0.2357, "step": 32235 }, { "epoch": 0.5821217833438057, "grad_norm": 0.6161235570907593, "learning_rate": 7.4484016763553924e-06, "loss": 0.2713, "step": 32240 }, { "epoch": 0.5822120627767065, "grad_norm": 0.5415605902671814, "learning_rate": 7.445659419051403e-06, "loss": 0.1144, "step": 32245 }, { "epoch": 0.5823023422096072, "grad_norm": 0.45095542073249817, "learning_rate": 7.442917367225537e-06, "loss": 0.2546, "step": 32250 }, { "epoch": 0.5823926216425079, "grad_norm": 0.4577171504497528, "learning_rate": 7.44017552109838e-06, "loss": 0.2644, "step": 32255 }, { "epoch": 0.5824829010754086, "grad_norm": 0.5528016686439514, "learning_rate": 7.437433880890488e-06, "loss": 0.2314, "step": 32260 }, { "epoch": 0.5825731805083093, "grad_norm": 0.3215571343898773, "learning_rate": 7.434692446822413e-06, "loss": 0.195, "step": 32265 }, { "epoch": 0.58266345994121, "grad_norm": 0.2885240614414215, "learning_rate": 7.431951219114676e-06, "loss": 0.2912, "step": 32270 }, { "epoch": 0.5827537393741108, "grad_norm": 0.41558414697647095, "learning_rate": 7.4292101979877924e-06, "loss": 0.2877, "step": 32275 }, { "epoch": 0.5828440188070114, "grad_norm": 0.6126312613487244, "learning_rate": 7.426469383662256e-06, "loss": 0.2178, "step": 32280 }, { "epoch": 0.5829342982399122, "grad_norm": 0.39613085985183716, "learning_rate": 7.42372877635855e-06, "loss": 0.2333, "step": 32285 }, { "epoch": 0.5830245776728129, "grad_norm": 0.2641087472438812, "learning_rate": 7.420988376297128e-06, "loss": 0.2064, "step": 32290 }, { "epoch": 0.5831148571057136, "grad_norm": 0.2797715365886688, "learning_rate": 7.418248183698442e-06, "loss": 0.3491, "step": 32295 }, { "epoch": 0.5832051365386143, "grad_norm": 0.3410707712173462, "learning_rate": 7.415508198782917e-06, "loss": 0.1643, "step": 32300 }, { "epoch": 0.583295415971515, "grad_norm": 0.6921510696411133, "learning_rate": 7.412768421770969e-06, "loss": 0.2448, "step": 32305 }, { "epoch": 0.5833856954044158, "grad_norm": 0.3319537341594696, "learning_rate": 7.410028852882986e-06, "loss": 0.2072, "step": 32310 }, { "epoch": 0.5834759748373165, "grad_norm": 0.46647992730140686, "learning_rate": 7.407289492339358e-06, "loss": 0.2774, "step": 32315 }, { "epoch": 0.5835662542702171, "grad_norm": 0.36552879214286804, "learning_rate": 7.404550340360434e-06, "loss": 0.3033, "step": 32320 }, { "epoch": 0.5836565337031179, "grad_norm": 0.6458025574684143, "learning_rate": 7.401811397166566e-06, "loss": 0.2489, "step": 32325 }, { "epoch": 0.5837468131360186, "grad_norm": 0.42477449774742126, "learning_rate": 7.399072662978083e-06, "loss": 0.2144, "step": 32330 }, { "epoch": 0.5838370925689194, "grad_norm": 0.3511560559272766, "learning_rate": 7.3963341380152896e-06, "loss": 0.1876, "step": 32335 }, { "epoch": 0.58392737200182, "grad_norm": 0.27430459856987, "learning_rate": 7.393595822498486e-06, "loss": 0.2967, "step": 32340 }, { "epoch": 0.5840176514347207, "grad_norm": 0.39398086071014404, "learning_rate": 7.3908577166479465e-06, "loss": 0.1741, "step": 32345 }, { "epoch": 0.5841079308676215, "grad_norm": 0.3841555714607239, "learning_rate": 7.3881198206839375e-06, "loss": 0.189, "step": 32350 }, { "epoch": 0.5841982103005222, "grad_norm": 2.1301867961883545, "learning_rate": 7.385382134826695e-06, "loss": 0.2152, "step": 32355 }, { "epoch": 0.5842884897334228, "grad_norm": 0.3354480564594269, "learning_rate": 7.382644659296451e-06, "loss": 0.3059, "step": 32360 }, { "epoch": 0.5843787691663236, "grad_norm": 0.5217795372009277, "learning_rate": 7.379907394313412e-06, "loss": 0.1943, "step": 32365 }, { "epoch": 0.5844690485992243, "grad_norm": 0.4222526252269745, "learning_rate": 7.3771703400977745e-06, "loss": 0.2983, "step": 32370 }, { "epoch": 0.5845593280321251, "grad_norm": 0.5341085195541382, "learning_rate": 7.374433496869713e-06, "loss": 0.2763, "step": 32375 }, { "epoch": 0.5846496074650257, "grad_norm": 2.0725042819976807, "learning_rate": 7.37169686484939e-06, "loss": 0.3016, "step": 32380 }, { "epoch": 0.5847398868979264, "grad_norm": 0.2955710291862488, "learning_rate": 7.368960444256941e-06, "loss": 0.2444, "step": 32385 }, { "epoch": 0.5848301663308272, "grad_norm": 0.47486481070518494, "learning_rate": 7.3662242353124965e-06, "loss": 0.2634, "step": 32390 }, { "epoch": 0.5849204457637279, "grad_norm": 0.4509022831916809, "learning_rate": 7.36348823823616e-06, "loss": 0.2499, "step": 32395 }, { "epoch": 0.5850107251966286, "grad_norm": 0.3450268805027008, "learning_rate": 7.3607524532480325e-06, "loss": 0.2775, "step": 32400 }, { "epoch": 0.5851010046295293, "grad_norm": 0.35095328092575073, "learning_rate": 7.358016880568174e-06, "loss": 0.2789, "step": 32405 }, { "epoch": 0.58519128406243, "grad_norm": 0.37022092938423157, "learning_rate": 7.355281520416652e-06, "loss": 0.2526, "step": 32410 }, { "epoch": 0.5852815634953308, "grad_norm": 0.6604461669921875, "learning_rate": 7.352546373013499e-06, "loss": 0.2138, "step": 32415 }, { "epoch": 0.5853718429282314, "grad_norm": 0.41028112173080444, "learning_rate": 7.349811438578747e-06, "loss": 0.1188, "step": 32420 }, { "epoch": 0.5854621223611322, "grad_norm": 2.1903560161590576, "learning_rate": 7.347076717332393e-06, "loss": 0.154, "step": 32425 }, { "epoch": 0.5855524017940329, "grad_norm": 0.5380918383598328, "learning_rate": 7.344342209494427e-06, "loss": 0.2395, "step": 32430 }, { "epoch": 0.5856426812269336, "grad_norm": 0.6822723150253296, "learning_rate": 7.341607915284824e-06, "loss": 0.2498, "step": 32435 }, { "epoch": 0.5857329606598343, "grad_norm": 0.6931403875350952, "learning_rate": 7.3388738349235335e-06, "loss": 0.2981, "step": 32440 }, { "epoch": 0.585823240092735, "grad_norm": 0.5006046891212463, "learning_rate": 7.336139968630499e-06, "loss": 0.2418, "step": 32445 }, { "epoch": 0.5859135195256358, "grad_norm": 0.38321810960769653, "learning_rate": 7.333406316625631e-06, "loss": 0.1812, "step": 32450 }, { "epoch": 0.5860037989585365, "grad_norm": 0.28258997201919556, "learning_rate": 7.330672879128839e-06, "loss": 0.2136, "step": 32455 }, { "epoch": 0.5860940783914371, "grad_norm": 0.4144040048122406, "learning_rate": 7.3279396563600056e-06, "loss": 0.2228, "step": 32460 }, { "epoch": 0.5861843578243379, "grad_norm": 0.45015108585357666, "learning_rate": 7.325206648539003e-06, "loss": 0.2909, "step": 32465 }, { "epoch": 0.5862746372572386, "grad_norm": 0.3811364769935608, "learning_rate": 7.322473855885672e-06, "loss": 0.247, "step": 32470 }, { "epoch": 0.5863649166901393, "grad_norm": 0.2765819728374481, "learning_rate": 7.319741278619854e-06, "loss": 0.2313, "step": 32475 }, { "epoch": 0.58645519612304, "grad_norm": 0.7468791007995605, "learning_rate": 7.317008916961361e-06, "loss": 0.2351, "step": 32480 }, { "epoch": 0.5865454755559407, "grad_norm": 0.6331852078437805, "learning_rate": 7.3142767711299985e-06, "loss": 0.1694, "step": 32485 }, { "epoch": 0.5866357549888415, "grad_norm": 0.18905432522296906, "learning_rate": 7.311544841345538e-06, "loss": 0.2926, "step": 32490 }, { "epoch": 0.5867260344217422, "grad_norm": 0.6289650797843933, "learning_rate": 7.3088131278277495e-06, "loss": 0.2271, "step": 32495 }, { "epoch": 0.586816313854643, "grad_norm": 0.5063753724098206, "learning_rate": 7.306081630796378e-06, "loss": 0.2422, "step": 32500 }, { "epoch": 0.5869065932875436, "grad_norm": 0.48694613575935364, "learning_rate": 7.303350350471153e-06, "loss": 0.171, "step": 32505 }, { "epoch": 0.5869968727204443, "grad_norm": 0.39464613795280457, "learning_rate": 7.300619287071787e-06, "loss": 0.2704, "step": 32510 }, { "epoch": 0.5870871521533451, "grad_norm": 0.7511200904846191, "learning_rate": 7.297888440817971e-06, "loss": 0.2126, "step": 32515 }, { "epoch": 0.5871774315862458, "grad_norm": 0.36095550656318665, "learning_rate": 7.2951578119293845e-06, "loss": 0.2151, "step": 32520 }, { "epoch": 0.5872677110191464, "grad_norm": 0.692754328250885, "learning_rate": 7.292427400625683e-06, "loss": 0.3606, "step": 32525 }, { "epoch": 0.5873579904520472, "grad_norm": 0.35889407992362976, "learning_rate": 7.2896972071265175e-06, "loss": 0.2457, "step": 32530 }, { "epoch": 0.5874482698849479, "grad_norm": 0.5592451691627502, "learning_rate": 7.2869672316515006e-06, "loss": 0.2534, "step": 32535 }, { "epoch": 0.5875385493178487, "grad_norm": 0.5456913113594055, "learning_rate": 7.284237474420246e-06, "loss": 0.2476, "step": 32540 }, { "epoch": 0.5876288287507493, "grad_norm": 0.44270724058151245, "learning_rate": 7.281507935652339e-06, "loss": 0.2677, "step": 32545 }, { "epoch": 0.58771910818365, "grad_norm": 0.3586839735507965, "learning_rate": 7.278778615567357e-06, "loss": 0.355, "step": 32550 }, { "epoch": 0.5878093876165508, "grad_norm": 0.9107780456542969, "learning_rate": 7.276049514384846e-06, "loss": 0.1568, "step": 32555 }, { "epoch": 0.5878996670494515, "grad_norm": 0.588352382183075, "learning_rate": 7.27332063232435e-06, "loss": 0.2128, "step": 32560 }, { "epoch": 0.5879899464823521, "grad_norm": 0.3175543248653412, "learning_rate": 7.2705919696053815e-06, "loss": 0.1631, "step": 32565 }, { "epoch": 0.5880802259152529, "grad_norm": 0.5141485333442688, "learning_rate": 7.267863526447447e-06, "loss": 0.3225, "step": 32570 }, { "epoch": 0.5881705053481536, "grad_norm": 0.38632819056510925, "learning_rate": 7.265135303070024e-06, "loss": 0.2094, "step": 32575 }, { "epoch": 0.5882607847810544, "grad_norm": 0.459495484828949, "learning_rate": 7.262407299692586e-06, "loss": 0.2506, "step": 32580 }, { "epoch": 0.588351064213955, "grad_norm": 0.9379053711891174, "learning_rate": 7.259679516534573e-06, "loss": 0.2083, "step": 32585 }, { "epoch": 0.5884413436468557, "grad_norm": 0.494588702917099, "learning_rate": 7.256951953815421e-06, "loss": 0.2465, "step": 32590 }, { "epoch": 0.5885316230797565, "grad_norm": 0.6023609638214111, "learning_rate": 7.2542246117545365e-06, "loss": 0.2024, "step": 32595 }, { "epoch": 0.5886219025126572, "grad_norm": 0.2940469980239868, "learning_rate": 7.2514974905713245e-06, "loss": 0.2355, "step": 32600 }, { "epoch": 0.5887121819455579, "grad_norm": 0.33728957176208496, "learning_rate": 7.248770590485153e-06, "loss": 0.2037, "step": 32605 }, { "epoch": 0.5888024613784586, "grad_norm": 0.3679303824901581, "learning_rate": 7.246043911715382e-06, "loss": 0.1134, "step": 32610 }, { "epoch": 0.5888927408113593, "grad_norm": 0.5711658596992493, "learning_rate": 7.24331745448136e-06, "loss": 0.3191, "step": 32615 }, { "epoch": 0.5889830202442601, "grad_norm": 0.6049374341964722, "learning_rate": 7.2405912190024e-06, "loss": 0.1782, "step": 32620 }, { "epoch": 0.5890732996771607, "grad_norm": 0.5340908765792847, "learning_rate": 7.237865205497817e-06, "loss": 0.3328, "step": 32625 }, { "epoch": 0.5891635791100615, "grad_norm": 0.3141300082206726, "learning_rate": 7.235139414186893e-06, "loss": 0.3038, "step": 32630 }, { "epoch": 0.5892538585429622, "grad_norm": 0.36305537819862366, "learning_rate": 7.232413845288904e-06, "loss": 0.1934, "step": 32635 }, { "epoch": 0.5893441379758629, "grad_norm": 0.4140510559082031, "learning_rate": 7.229688499023094e-06, "loss": 0.2361, "step": 32640 }, { "epoch": 0.5894344174087636, "grad_norm": 0.4857751429080963, "learning_rate": 7.226963375608709e-06, "loss": 0.2085, "step": 32645 }, { "epoch": 0.5895246968416643, "grad_norm": 0.5394529700279236, "learning_rate": 7.224238475264953e-06, "loss": 0.2648, "step": 32650 }, { "epoch": 0.589614976274565, "grad_norm": 0.7949308753013611, "learning_rate": 7.221513798211032e-06, "loss": 0.2172, "step": 32655 }, { "epoch": 0.5897052557074658, "grad_norm": 0.6003508567810059, "learning_rate": 7.218789344666122e-06, "loss": 0.3284, "step": 32660 }, { "epoch": 0.5897955351403664, "grad_norm": 0.377872109413147, "learning_rate": 7.216065114849393e-06, "loss": 0.2713, "step": 32665 }, { "epoch": 0.5898858145732672, "grad_norm": 0.4554804265499115, "learning_rate": 7.2133411089799795e-06, "loss": 0.2544, "step": 32670 }, { "epoch": 0.5899760940061679, "grad_norm": 0.4866463541984558, "learning_rate": 7.210617327277015e-06, "loss": 0.2311, "step": 32675 }, { "epoch": 0.5900663734390686, "grad_norm": 0.522548496723175, "learning_rate": 7.207893769959604e-06, "loss": 0.2194, "step": 32680 }, { "epoch": 0.5901566528719693, "grad_norm": 0.4356546103954315, "learning_rate": 7.205170437246843e-06, "loss": 0.2707, "step": 32685 }, { "epoch": 0.59024693230487, "grad_norm": 0.24004743993282318, "learning_rate": 7.202447329357793e-06, "loss": 0.2574, "step": 32690 }, { "epoch": 0.5903372117377708, "grad_norm": 0.28580644726753235, "learning_rate": 7.199724446511522e-06, "loss": 0.1791, "step": 32695 }, { "epoch": 0.5904274911706715, "grad_norm": 0.5693247318267822, "learning_rate": 7.197001788927056e-06, "loss": 0.2817, "step": 32700 }, { "epoch": 0.5905177706035721, "grad_norm": 0.46006160974502563, "learning_rate": 7.194279356823416e-06, "loss": 0.3109, "step": 32705 }, { "epoch": 0.5906080500364729, "grad_norm": 0.38426798582077026, "learning_rate": 7.191557150419605e-06, "loss": 0.2108, "step": 32710 }, { "epoch": 0.5906983294693736, "grad_norm": 1.0084168910980225, "learning_rate": 7.188835169934598e-06, "loss": 0.2506, "step": 32715 }, { "epoch": 0.5907886089022744, "grad_norm": 0.4460688829421997, "learning_rate": 7.1861134155873654e-06, "loss": 0.265, "step": 32720 }, { "epoch": 0.590878888335175, "grad_norm": 1.0033804178237915, "learning_rate": 7.183391887596847e-06, "loss": 0.1897, "step": 32725 }, { "epoch": 0.5909691677680757, "grad_norm": 0.34205472469329834, "learning_rate": 7.180670586181977e-06, "loss": 0.2416, "step": 32730 }, { "epoch": 0.5910594472009765, "grad_norm": 0.7768039107322693, "learning_rate": 7.177949511561655e-06, "loss": 0.2599, "step": 32735 }, { "epoch": 0.5911497266338772, "grad_norm": 0.49444419145584106, "learning_rate": 7.175228663954779e-06, "loss": 0.2712, "step": 32740 }, { "epoch": 0.5912400060667778, "grad_norm": 0.1657266914844513, "learning_rate": 7.172508043580217e-06, "loss": 0.2728, "step": 32745 }, { "epoch": 0.5913302854996786, "grad_norm": 0.5309212803840637, "learning_rate": 7.169787650656828e-06, "loss": 0.3384, "step": 32750 }, { "epoch": 0.5914205649325793, "grad_norm": 0.7677445411682129, "learning_rate": 7.16706748540344e-06, "loss": 0.1973, "step": 32755 }, { "epoch": 0.5915108443654801, "grad_norm": 0.4121074080467224, "learning_rate": 7.164347548038882e-06, "loss": 0.2144, "step": 32760 }, { "epoch": 0.5916011237983807, "grad_norm": 0.5791410207748413, "learning_rate": 7.161627838781941e-06, "loss": 0.2754, "step": 32765 }, { "epoch": 0.5916914032312814, "grad_norm": 0.5827525854110718, "learning_rate": 7.158908357851406e-06, "loss": 0.251, "step": 32770 }, { "epoch": 0.5917816826641822, "grad_norm": 0.7230418920516968, "learning_rate": 7.156189105466034e-06, "loss": 0.2533, "step": 32775 }, { "epoch": 0.5918719620970829, "grad_norm": 0.40231138467788696, "learning_rate": 7.153470081844578e-06, "loss": 0.1542, "step": 32780 }, { "epoch": 0.5919622415299836, "grad_norm": 0.771411657333374, "learning_rate": 7.150751287205754e-06, "loss": 0.2126, "step": 32785 }, { "epoch": 0.5920525209628843, "grad_norm": 0.5192276835441589, "learning_rate": 7.148032721768271e-06, "loss": 0.2392, "step": 32790 }, { "epoch": 0.592142800395785, "grad_norm": 0.5683626532554626, "learning_rate": 7.145314385750824e-06, "loss": 0.3019, "step": 32795 }, { "epoch": 0.5922330798286858, "grad_norm": 0.3925086557865143, "learning_rate": 7.142596279372076e-06, "loss": 0.278, "step": 32800 }, { "epoch": 0.5923233592615864, "grad_norm": 0.2804316580295563, "learning_rate": 7.139878402850683e-06, "loss": 0.156, "step": 32805 }, { "epoch": 0.5924136386944872, "grad_norm": 0.4466037452220917, "learning_rate": 7.137160756405274e-06, "loss": 0.2184, "step": 32810 }, { "epoch": 0.5925039181273879, "grad_norm": 0.38808631896972656, "learning_rate": 7.134443340254474e-06, "loss": 0.1733, "step": 32815 }, { "epoch": 0.5925941975602886, "grad_norm": 0.344827264547348, "learning_rate": 7.131726154616866e-06, "loss": 0.2711, "step": 32820 }, { "epoch": 0.5926844769931893, "grad_norm": 0.4249587953090668, "learning_rate": 7.12900919971104e-06, "loss": 0.2162, "step": 32825 }, { "epoch": 0.59277475642609, "grad_norm": 0.4515315592288971, "learning_rate": 7.1262924757555455e-06, "loss": 0.2211, "step": 32830 }, { "epoch": 0.5928650358589908, "grad_norm": 0.5152904391288757, "learning_rate": 7.12357598296893e-06, "loss": 0.2673, "step": 32835 }, { "epoch": 0.5929553152918915, "grad_norm": 0.4642157554626465, "learning_rate": 7.120859721569711e-06, "loss": 0.2428, "step": 32840 }, { "epoch": 0.5930455947247921, "grad_norm": 0.24216291308403015, "learning_rate": 7.118143691776398e-06, "loss": 0.2074, "step": 32845 }, { "epoch": 0.5931358741576929, "grad_norm": 0.5426397323608398, "learning_rate": 7.115427893807467e-06, "loss": 0.2045, "step": 32850 }, { "epoch": 0.5932261535905936, "grad_norm": 0.6953004598617554, "learning_rate": 7.112712327881393e-06, "loss": 0.2555, "step": 32855 }, { "epoch": 0.5933164330234943, "grad_norm": 0.5227711200714111, "learning_rate": 7.1099969942166156e-06, "loss": 0.2336, "step": 32860 }, { "epoch": 0.593406712456395, "grad_norm": 0.38676440715789795, "learning_rate": 7.107281893031572e-06, "loss": 0.1827, "step": 32865 }, { "epoch": 0.5934969918892957, "grad_norm": 0.4496510922908783, "learning_rate": 7.104567024544665e-06, "loss": 0.1892, "step": 32870 }, { "epoch": 0.5935872713221965, "grad_norm": 0.4076612889766693, "learning_rate": 7.1018523889742906e-06, "loss": 0.1925, "step": 32875 }, { "epoch": 0.5936775507550972, "grad_norm": 0.44218653440475464, "learning_rate": 7.099137986538822e-06, "loss": 0.2463, "step": 32880 }, { "epoch": 0.5937678301879978, "grad_norm": 0.32306867837905884, "learning_rate": 7.096423817456605e-06, "loss": 0.191, "step": 32885 }, { "epoch": 0.5938581096208986, "grad_norm": 0.3924742043018341, "learning_rate": 7.093709881945987e-06, "loss": 0.2466, "step": 32890 }, { "epoch": 0.5939483890537993, "grad_norm": 0.2994252145290375, "learning_rate": 7.090996180225274e-06, "loss": 0.1696, "step": 32895 }, { "epoch": 0.5940386684867001, "grad_norm": 0.3526621162891388, "learning_rate": 7.0882827125127695e-06, "loss": 0.2948, "step": 32900 }, { "epoch": 0.5941289479196008, "grad_norm": 0.3149809241294861, "learning_rate": 7.085569479026748e-06, "loss": 0.2851, "step": 32905 }, { "epoch": 0.5942192273525014, "grad_norm": 0.5721583366394043, "learning_rate": 7.0828564799854786e-06, "loss": 0.2253, "step": 32910 }, { "epoch": 0.5943095067854022, "grad_norm": 0.45385658740997314, "learning_rate": 7.0801437156071884e-06, "loss": 0.1538, "step": 32915 }, { "epoch": 0.5943997862183029, "grad_norm": 0.22235175967216492, "learning_rate": 7.0774311861101105e-06, "loss": 0.3371, "step": 32920 }, { "epoch": 0.5944900656512037, "grad_norm": 0.44551515579223633, "learning_rate": 7.074718891712442e-06, "loss": 0.3237, "step": 32925 }, { "epoch": 0.5945803450841043, "grad_norm": 0.42134904861450195, "learning_rate": 7.072006832632376e-06, "loss": 0.3219, "step": 32930 }, { "epoch": 0.594670624517005, "grad_norm": 0.327347993850708, "learning_rate": 7.069295009088065e-06, "loss": 0.2123, "step": 32935 }, { "epoch": 0.5947609039499058, "grad_norm": 0.300333708524704, "learning_rate": 7.066583421297666e-06, "loss": 0.2104, "step": 32940 }, { "epoch": 0.5948511833828065, "grad_norm": 0.6798262000083923, "learning_rate": 7.063872069479299e-06, "loss": 0.2733, "step": 32945 }, { "epoch": 0.5949414628157071, "grad_norm": 0.3732709586620331, "learning_rate": 7.0611609538510805e-06, "loss": 0.2049, "step": 32950 }, { "epoch": 0.5950317422486079, "grad_norm": 0.3809985816478729, "learning_rate": 7.0584500746310914e-06, "loss": 0.2661, "step": 32955 }, { "epoch": 0.5951220216815086, "grad_norm": 0.35851752758026123, "learning_rate": 7.055739432037412e-06, "loss": 0.2184, "step": 32960 }, { "epoch": 0.5952123011144094, "grad_norm": 0.5948862433433533, "learning_rate": 7.053029026288085e-06, "loss": 0.1534, "step": 32965 }, { "epoch": 0.59530258054731, "grad_norm": 0.4455398917198181, "learning_rate": 7.050318857601147e-06, "loss": 0.1618, "step": 32970 }, { "epoch": 0.5953928599802107, "grad_norm": 0.3599335551261902, "learning_rate": 7.047608926194613e-06, "loss": 0.26, "step": 32975 }, { "epoch": 0.5954831394131115, "grad_norm": 0.37924250960350037, "learning_rate": 7.0448992322864715e-06, "loss": 0.2788, "step": 32980 }, { "epoch": 0.5955734188460122, "grad_norm": 0.3209358751773834, "learning_rate": 7.042189776094704e-06, "loss": 0.1891, "step": 32985 }, { "epoch": 0.5956636982789129, "grad_norm": 0.5446608662605286, "learning_rate": 7.03948055783726e-06, "loss": 0.1934, "step": 32990 }, { "epoch": 0.5957539777118136, "grad_norm": 0.28908249735832214, "learning_rate": 7.0367715777320845e-06, "loss": 0.2733, "step": 32995 }, { "epoch": 0.5958442571447143, "grad_norm": 0.38413670659065247, "learning_rate": 7.034062835997087e-06, "loss": 0.1384, "step": 33000 }, { "epoch": 0.5959345365776151, "grad_norm": 0.5856424570083618, "learning_rate": 7.031354332850171e-06, "loss": 0.1553, "step": 33005 }, { "epoch": 0.5960248160105157, "grad_norm": 0.3369595408439636, "learning_rate": 7.028646068509214e-06, "loss": 0.2167, "step": 33010 }, { "epoch": 0.5961150954434165, "grad_norm": 0.7805834412574768, "learning_rate": 7.02593804319208e-06, "loss": 0.221, "step": 33015 }, { "epoch": 0.5962053748763172, "grad_norm": 0.6036295294761658, "learning_rate": 7.0232302571166025e-06, "loss": 0.1743, "step": 33020 }, { "epoch": 0.5962956543092179, "grad_norm": 0.26456376910209656, "learning_rate": 7.020522710500612e-06, "loss": 0.1735, "step": 33025 }, { "epoch": 0.5963859337421186, "grad_norm": 0.3909691572189331, "learning_rate": 7.017815403561903e-06, "loss": 0.176, "step": 33030 }, { "epoch": 0.5964762131750193, "grad_norm": 0.450097918510437, "learning_rate": 7.015108336518264e-06, "loss": 0.2082, "step": 33035 }, { "epoch": 0.59656649260792, "grad_norm": 0.3938777446746826, "learning_rate": 7.012401509587456e-06, "loss": 0.2654, "step": 33040 }, { "epoch": 0.5966567720408208, "grad_norm": 0.6687896847724915, "learning_rate": 7.009694922987227e-06, "loss": 0.2151, "step": 33045 }, { "epoch": 0.5967470514737214, "grad_norm": 0.6720011234283447, "learning_rate": 7.006988576935296e-06, "loss": 0.2315, "step": 33050 }, { "epoch": 0.5968373309066222, "grad_norm": 0.45662808418273926, "learning_rate": 7.0042824716493774e-06, "loss": 0.2633, "step": 33055 }, { "epoch": 0.5969276103395229, "grad_norm": 0.39432138204574585, "learning_rate": 7.001576607347148e-06, "loss": 0.2786, "step": 33060 }, { "epoch": 0.5970178897724236, "grad_norm": 0.2883530557155609, "learning_rate": 6.998870984246286e-06, "loss": 0.1384, "step": 33065 }, { "epoch": 0.5971081692053243, "grad_norm": 0.6928462982177734, "learning_rate": 6.9961656025644284e-06, "loss": 0.1864, "step": 33070 }, { "epoch": 0.597198448638225, "grad_norm": 0.4874042272567749, "learning_rate": 6.993460462519208e-06, "loss": 0.2796, "step": 33075 }, { "epoch": 0.5972887280711258, "grad_norm": 0.4482225477695465, "learning_rate": 6.990755564328237e-06, "loss": 0.2195, "step": 33080 }, { "epoch": 0.5973790075040265, "grad_norm": 0.43891894817352295, "learning_rate": 6.988050908209096e-06, "loss": 0.1704, "step": 33085 }, { "epoch": 0.5974692869369271, "grad_norm": 0.39821308851242065, "learning_rate": 6.985346494379367e-06, "loss": 0.1956, "step": 33090 }, { "epoch": 0.5975595663698279, "grad_norm": 0.8562854528427124, "learning_rate": 6.98264232305659e-06, "loss": 0.2367, "step": 33095 }, { "epoch": 0.5976498458027286, "grad_norm": 0.511570394039154, "learning_rate": 6.9799383944583e-06, "loss": 0.2898, "step": 33100 }, { "epoch": 0.5977401252356294, "grad_norm": 0.21128128468990326, "learning_rate": 6.977234708802007e-06, "loss": 0.2213, "step": 33105 }, { "epoch": 0.59783040466853, "grad_norm": 0.5206707715988159, "learning_rate": 6.974531266305209e-06, "loss": 0.2196, "step": 33110 }, { "epoch": 0.5979206841014307, "grad_norm": 0.6969338655471802, "learning_rate": 6.971828067185369e-06, "loss": 0.2228, "step": 33115 }, { "epoch": 0.5980109635343315, "grad_norm": 5.248167037963867, "learning_rate": 6.969125111659945e-06, "loss": 0.2213, "step": 33120 }, { "epoch": 0.5981012429672322, "grad_norm": 0.26893338561058044, "learning_rate": 6.966422399946367e-06, "loss": 0.2328, "step": 33125 }, { "epoch": 0.5981915224001328, "grad_norm": 0.29222434759140015, "learning_rate": 6.9637199322620565e-06, "loss": 0.2759, "step": 33130 }, { "epoch": 0.5982818018330336, "grad_norm": 0.2278512418270111, "learning_rate": 6.961017708824395e-06, "loss": 0.2972, "step": 33135 }, { "epoch": 0.5983720812659343, "grad_norm": 0.3829987049102783, "learning_rate": 6.958315729850769e-06, "loss": 0.2152, "step": 33140 }, { "epoch": 0.5984623606988351, "grad_norm": 0.4814859628677368, "learning_rate": 6.9556139955585225e-06, "loss": 0.2729, "step": 33145 }, { "epoch": 0.5985526401317357, "grad_norm": 0.5911121368408203, "learning_rate": 6.952912506164997e-06, "loss": 0.1972, "step": 33150 }, { "epoch": 0.5986429195646364, "grad_norm": 0.5242965817451477, "learning_rate": 6.950211261887509e-06, "loss": 0.2519, "step": 33155 }, { "epoch": 0.5987331989975372, "grad_norm": 0.3182903528213501, "learning_rate": 6.947510262943347e-06, "loss": 0.1171, "step": 33160 }, { "epoch": 0.5988234784304379, "grad_norm": 0.6158583164215088, "learning_rate": 6.944809509549793e-06, "loss": 0.2047, "step": 33165 }, { "epoch": 0.5989137578633386, "grad_norm": 0.6978740692138672, "learning_rate": 6.942109001924098e-06, "loss": 0.1853, "step": 33170 }, { "epoch": 0.5990040372962393, "grad_norm": 0.46398329734802246, "learning_rate": 6.939408740283504e-06, "loss": 0.1128, "step": 33175 }, { "epoch": 0.59909431672914, "grad_norm": 0.4759972393512726, "learning_rate": 6.936708724845221e-06, "loss": 0.2341, "step": 33180 }, { "epoch": 0.5991845961620408, "grad_norm": 0.7083093523979187, "learning_rate": 6.9340089558264505e-06, "loss": 0.2312, "step": 33185 }, { "epoch": 0.5992748755949414, "grad_norm": 0.5469648838043213, "learning_rate": 6.931309433444367e-06, "loss": 0.288, "step": 33190 }, { "epoch": 0.5993651550278422, "grad_norm": 0.8999037146568298, "learning_rate": 6.92861015791613e-06, "loss": 0.2401, "step": 33195 }, { "epoch": 0.5994554344607429, "grad_norm": 1.071241021156311, "learning_rate": 6.925911129458871e-06, "loss": 0.2853, "step": 33200 }, { "epoch": 0.5995457138936436, "grad_norm": 0.4732885956764221, "learning_rate": 6.923212348289714e-06, "loss": 0.2811, "step": 33205 }, { "epoch": 0.5996359933265443, "grad_norm": 0.31907421350479126, "learning_rate": 6.92051381462575e-06, "loss": 0.1849, "step": 33210 }, { "epoch": 0.599726272759445, "grad_norm": 0.7735522389411926, "learning_rate": 6.917815528684062e-06, "loss": 0.2664, "step": 33215 }, { "epoch": 0.5998165521923458, "grad_norm": 0.41051408648490906, "learning_rate": 6.915117490681703e-06, "loss": 0.2254, "step": 33220 }, { "epoch": 0.5999068316252465, "grad_norm": 0.8531498312950134, "learning_rate": 6.912419700835717e-06, "loss": 0.3404, "step": 33225 }, { "epoch": 0.5999971110581471, "grad_norm": 0.40225955843925476, "learning_rate": 6.9097221593631126e-06, "loss": 0.2003, "step": 33230 }, { "epoch": 0.6000873904910479, "grad_norm": 0.3249049782752991, "learning_rate": 6.907024866480895e-06, "loss": 0.2132, "step": 33235 }, { "epoch": 0.6001776699239486, "grad_norm": 0.6028874516487122, "learning_rate": 6.904327822406036e-06, "loss": 0.2482, "step": 33240 }, { "epoch": 0.6002679493568494, "grad_norm": 0.3450670540332794, "learning_rate": 6.9016310273555e-06, "loss": 0.1889, "step": 33245 }, { "epoch": 0.60035822878975, "grad_norm": 0.5652427077293396, "learning_rate": 6.898934481546219e-06, "loss": 0.2245, "step": 33250 }, { "epoch": 0.6004485082226507, "grad_norm": 0.7112207412719727, "learning_rate": 6.89623818519511e-06, "loss": 0.2746, "step": 33255 }, { "epoch": 0.6005387876555515, "grad_norm": 0.5124552249908447, "learning_rate": 6.893542138519076e-06, "loss": 0.2693, "step": 33260 }, { "epoch": 0.6006290670884522, "grad_norm": 0.4290370047092438, "learning_rate": 6.890846341734987e-06, "loss": 0.1819, "step": 33265 }, { "epoch": 0.6007193465213528, "grad_norm": 0.45447850227355957, "learning_rate": 6.888150795059705e-06, "loss": 0.3399, "step": 33270 }, { "epoch": 0.6008096259542536, "grad_norm": 0.28496402502059937, "learning_rate": 6.885455498710065e-06, "loss": 0.2014, "step": 33275 }, { "epoch": 0.6008999053871543, "grad_norm": 0.3123587369918823, "learning_rate": 6.882760452902888e-06, "loss": 0.2066, "step": 33280 }, { "epoch": 0.6009901848200551, "grad_norm": 0.5603709816932678, "learning_rate": 6.880065657854965e-06, "loss": 0.2138, "step": 33285 }, { "epoch": 0.6010804642529558, "grad_norm": 0.3382869362831116, "learning_rate": 6.87737111378308e-06, "loss": 0.2526, "step": 33290 }, { "epoch": 0.6011707436858564, "grad_norm": 0.500745415687561, "learning_rate": 6.874676820903983e-06, "loss": 0.3137, "step": 33295 }, { "epoch": 0.6012610231187572, "grad_norm": 0.3618326187133789, "learning_rate": 6.871982779434412e-06, "loss": 0.2506, "step": 33300 }, { "epoch": 0.6013513025516579, "grad_norm": 0.481738418340683, "learning_rate": 6.869288989591082e-06, "loss": 0.2275, "step": 33305 }, { "epoch": 0.6014415819845587, "grad_norm": 0.4610210657119751, "learning_rate": 6.8665954515906955e-06, "loss": 0.2111, "step": 33310 }, { "epoch": 0.6015318614174593, "grad_norm": 0.583780825138092, "learning_rate": 6.863902165649917e-06, "loss": 0.2995, "step": 33315 }, { "epoch": 0.60162214085036, "grad_norm": 0.4351842701435089, "learning_rate": 6.861209131985412e-06, "loss": 0.206, "step": 33320 }, { "epoch": 0.6017124202832608, "grad_norm": 0.6149066090583801, "learning_rate": 6.858516350813806e-06, "loss": 0.3505, "step": 33325 }, { "epoch": 0.6018026997161615, "grad_norm": 0.3864111304283142, "learning_rate": 6.855823822351726e-06, "loss": 0.3704, "step": 33330 }, { "epoch": 0.6018929791490621, "grad_norm": 0.1760164052248001, "learning_rate": 6.853131546815751e-06, "loss": 0.1318, "step": 33335 }, { "epoch": 0.6019832585819629, "grad_norm": 0.18229033052921295, "learning_rate": 6.85043952442247e-06, "loss": 0.2728, "step": 33340 }, { "epoch": 0.6020735380148636, "grad_norm": 0.3698432445526123, "learning_rate": 6.8477477553884275e-06, "loss": 0.2804, "step": 33345 }, { "epoch": 0.6021638174477644, "grad_norm": 0.6074759364128113, "learning_rate": 6.845056239930157e-06, "loss": 0.231, "step": 33350 }, { "epoch": 0.602254096880665, "grad_norm": 0.4252072274684906, "learning_rate": 6.8423649782641774e-06, "loss": 0.2622, "step": 33355 }, { "epoch": 0.6023443763135657, "grad_norm": 0.35315191745758057, "learning_rate": 6.839673970606973e-06, "loss": 0.256, "step": 33360 }, { "epoch": 0.6024346557464665, "grad_norm": 0.49989739060401917, "learning_rate": 6.836983217175022e-06, "loss": 0.3722, "step": 33365 }, { "epoch": 0.6025249351793672, "grad_norm": 0.5511475205421448, "learning_rate": 6.834292718184772e-06, "loss": 0.2877, "step": 33370 }, { "epoch": 0.6026152146122679, "grad_norm": 0.341680645942688, "learning_rate": 6.831602473852659e-06, "loss": 0.2114, "step": 33375 }, { "epoch": 0.6027054940451686, "grad_norm": 0.4739898145198822, "learning_rate": 6.8289124843950874e-06, "loss": 0.2597, "step": 33380 }, { "epoch": 0.6027957734780693, "grad_norm": 0.3317372798919678, "learning_rate": 6.826222750028453e-06, "loss": 0.195, "step": 33385 }, { "epoch": 0.6028860529109701, "grad_norm": 0.5364915728569031, "learning_rate": 6.823533270969119e-06, "loss": 0.2198, "step": 33390 }, { "epoch": 0.6029763323438707, "grad_norm": 0.280256986618042, "learning_rate": 6.820844047433444e-06, "loss": 0.2108, "step": 33395 }, { "epoch": 0.6030666117767715, "grad_norm": 0.4123166501522064, "learning_rate": 6.818155079637745e-06, "loss": 0.2774, "step": 33400 }, { "epoch": 0.6031568912096722, "grad_norm": 0.2371968924999237, "learning_rate": 6.815466367798338e-06, "loss": 0.2645, "step": 33405 }, { "epoch": 0.6032471706425729, "grad_norm": 0.3424372375011444, "learning_rate": 6.8127779121315075e-06, "loss": 0.2064, "step": 33410 }, { "epoch": 0.6033374500754736, "grad_norm": 0.47352439165115356, "learning_rate": 6.8100897128535206e-06, "loss": 0.2816, "step": 33415 }, { "epoch": 0.6034277295083743, "grad_norm": 0.5806667804718018, "learning_rate": 6.807401770180622e-06, "loss": 0.2438, "step": 33420 }, { "epoch": 0.603518008941275, "grad_norm": 0.4509647488594055, "learning_rate": 6.804714084329043e-06, "loss": 0.2502, "step": 33425 }, { "epoch": 0.6036082883741758, "grad_norm": 0.6704498529434204, "learning_rate": 6.802026655514982e-06, "loss": 0.2453, "step": 33430 }, { "epoch": 0.6036985678070764, "grad_norm": 0.517916202545166, "learning_rate": 6.7993394839546235e-06, "loss": 0.3107, "step": 33435 }, { "epoch": 0.6037888472399772, "grad_norm": 0.42207249999046326, "learning_rate": 6.796652569864135e-06, "loss": 0.2529, "step": 33440 }, { "epoch": 0.6038791266728779, "grad_norm": 0.4871932864189148, "learning_rate": 6.7939659134596534e-06, "loss": 0.1876, "step": 33445 }, { "epoch": 0.6039694061057787, "grad_norm": 0.42853614687919617, "learning_rate": 6.791279514957306e-06, "loss": 0.1931, "step": 33450 }, { "epoch": 0.6040596855386793, "grad_norm": 0.36403781175613403, "learning_rate": 6.78859337457319e-06, "loss": 0.2714, "step": 33455 }, { "epoch": 0.60414996497158, "grad_norm": 0.643302857875824, "learning_rate": 6.785907492523391e-06, "loss": 0.2594, "step": 33460 }, { "epoch": 0.6042402444044808, "grad_norm": 0.5370124578475952, "learning_rate": 6.7832218690239636e-06, "loss": 0.3383, "step": 33465 }, { "epoch": 0.6043305238373815, "grad_norm": 0.5137472152709961, "learning_rate": 6.780536504290949e-06, "loss": 0.2418, "step": 33470 }, { "epoch": 0.6044208032702821, "grad_norm": 0.48361706733703613, "learning_rate": 6.777851398540363e-06, "loss": 0.2246, "step": 33475 }, { "epoch": 0.6045110827031829, "grad_norm": 0.2475363165140152, "learning_rate": 6.775166551988207e-06, "loss": 0.1396, "step": 33480 }, { "epoch": 0.6046013621360836, "grad_norm": 0.5070680975914001, "learning_rate": 6.772481964850453e-06, "loss": 0.3403, "step": 33485 }, { "epoch": 0.6046916415689844, "grad_norm": 0.5311540365219116, "learning_rate": 6.769797637343063e-06, "loss": 0.2733, "step": 33490 }, { "epoch": 0.604781921001885, "grad_norm": 0.44159597158432007, "learning_rate": 6.767113569681963e-06, "loss": 0.1231, "step": 33495 }, { "epoch": 0.6048722004347857, "grad_norm": 0.3537275791168213, "learning_rate": 6.764429762083071e-06, "loss": 0.2573, "step": 33500 }, { "epoch": 0.6049624798676865, "grad_norm": 0.6137586236000061, "learning_rate": 6.761746214762281e-06, "loss": 0.2545, "step": 33505 }, { "epoch": 0.6050527593005872, "grad_norm": 0.5679592490196228, "learning_rate": 6.759062927935466e-06, "loss": 0.2441, "step": 33510 }, { "epoch": 0.6051430387334878, "grad_norm": 0.43349897861480713, "learning_rate": 6.75637990181847e-06, "loss": 0.2459, "step": 33515 }, { "epoch": 0.6052333181663886, "grad_norm": 0.5576727390289307, "learning_rate": 6.7536971366271305e-06, "loss": 0.2069, "step": 33520 }, { "epoch": 0.6053235975992893, "grad_norm": 0.3747663199901581, "learning_rate": 6.7510146325772555e-06, "loss": 0.2893, "step": 33525 }, { "epoch": 0.6054138770321901, "grad_norm": 0.7237586379051208, "learning_rate": 6.748332389884627e-06, "loss": 0.2664, "step": 33530 }, { "epoch": 0.6055041564650907, "grad_norm": 0.44492822885513306, "learning_rate": 6.745650408765017e-06, "loss": 0.2212, "step": 33535 }, { "epoch": 0.6055944358979914, "grad_norm": 0.31927186250686646, "learning_rate": 6.742968689434169e-06, "loss": 0.1558, "step": 33540 }, { "epoch": 0.6056847153308922, "grad_norm": 0.6788272857666016, "learning_rate": 6.7402872321078094e-06, "loss": 0.2176, "step": 33545 }, { "epoch": 0.6057749947637929, "grad_norm": 0.5264540910720825, "learning_rate": 6.737606037001642e-06, "loss": 0.1845, "step": 33550 }, { "epoch": 0.6058652741966936, "grad_norm": 0.5646959543228149, "learning_rate": 6.734925104331351e-06, "loss": 0.2625, "step": 33555 }, { "epoch": 0.6059555536295943, "grad_norm": 0.4939166009426117, "learning_rate": 6.732244434312594e-06, "loss": 0.319, "step": 33560 }, { "epoch": 0.606045833062495, "grad_norm": 0.25671443343162537, "learning_rate": 6.729564027161013e-06, "loss": 0.2953, "step": 33565 }, { "epoch": 0.6061361124953958, "grad_norm": 0.5742185711860657, "learning_rate": 6.7268838830922275e-06, "loss": 0.1592, "step": 33570 }, { "epoch": 0.6062263919282964, "grad_norm": 0.24970318377017975, "learning_rate": 6.724204002321839e-06, "loss": 0.231, "step": 33575 }, { "epoch": 0.6063166713611972, "grad_norm": 0.27477550506591797, "learning_rate": 6.721524385065417e-06, "loss": 0.186, "step": 33580 }, { "epoch": 0.6064069507940979, "grad_norm": 0.4840497374534607, "learning_rate": 6.7188450315385236e-06, "loss": 0.1251, "step": 33585 }, { "epoch": 0.6064972302269986, "grad_norm": 0.38732531666755676, "learning_rate": 6.716165941956689e-06, "loss": 0.1924, "step": 33590 }, { "epoch": 0.6065875096598993, "grad_norm": 0.2982494831085205, "learning_rate": 6.713487116535433e-06, "loss": 0.219, "step": 33595 }, { "epoch": 0.6066777890928, "grad_norm": 0.3711354434490204, "learning_rate": 6.710808555490237e-06, "loss": 0.2206, "step": 33600 }, { "epoch": 0.6067680685257008, "grad_norm": 0.3120940923690796, "learning_rate": 6.708130259036585e-06, "loss": 0.2555, "step": 33605 }, { "epoch": 0.6068583479586015, "grad_norm": 0.3166637420654297, "learning_rate": 6.705452227389914e-06, "loss": 0.2232, "step": 33610 }, { "epoch": 0.6069486273915021, "grad_norm": 0.787291944026947, "learning_rate": 6.70277446076566e-06, "loss": 0.2896, "step": 33615 }, { "epoch": 0.6070389068244029, "grad_norm": 0.4427154064178467, "learning_rate": 6.7000969593792295e-06, "loss": 0.3057, "step": 33620 }, { "epoch": 0.6071291862573036, "grad_norm": 0.3680169880390167, "learning_rate": 6.697419723446001e-06, "loss": 0.2339, "step": 33625 }, { "epoch": 0.6072194656902044, "grad_norm": 0.5025822520256042, "learning_rate": 6.694742753181347e-06, "loss": 0.2132, "step": 33630 }, { "epoch": 0.607309745123105, "grad_norm": 0.5601755976676941, "learning_rate": 6.692066048800605e-06, "loss": 0.1726, "step": 33635 }, { "epoch": 0.6074000245560057, "grad_norm": 0.3275012969970703, "learning_rate": 6.689389610519102e-06, "loss": 0.1813, "step": 33640 }, { "epoch": 0.6074903039889065, "grad_norm": 0.3714092969894409, "learning_rate": 6.6867134385521305e-06, "loss": 0.1963, "step": 33645 }, { "epoch": 0.6075805834218072, "grad_norm": 0.7068204283714294, "learning_rate": 6.684037533114976e-06, "loss": 0.1556, "step": 33650 }, { "epoch": 0.6076708628547078, "grad_norm": 0.4423868656158447, "learning_rate": 6.681361894422889e-06, "loss": 0.2443, "step": 33655 }, { "epoch": 0.6077611422876086, "grad_norm": 1.2866357564926147, "learning_rate": 6.678686522691112e-06, "loss": 0.2191, "step": 33660 }, { "epoch": 0.6078514217205093, "grad_norm": 0.2865354120731354, "learning_rate": 6.676011418134851e-06, "loss": 0.2938, "step": 33665 }, { "epoch": 0.6079417011534101, "grad_norm": 0.48758894205093384, "learning_rate": 6.673336580969311e-06, "loss": 0.16, "step": 33670 }, { "epoch": 0.6080319805863107, "grad_norm": 0.7908018827438354, "learning_rate": 6.6706620114096496e-06, "loss": 0.2236, "step": 33675 }, { "epoch": 0.6081222600192114, "grad_norm": 0.6689500212669373, "learning_rate": 6.667987709671027e-06, "loss": 0.1365, "step": 33680 }, { "epoch": 0.6082125394521122, "grad_norm": 0.6612555384635925, "learning_rate": 6.665313675968564e-06, "loss": 0.3081, "step": 33685 }, { "epoch": 0.6083028188850129, "grad_norm": 0.601243793964386, "learning_rate": 6.662639910517374e-06, "loss": 0.2605, "step": 33690 }, { "epoch": 0.6083930983179137, "grad_norm": 0.5894544124603271, "learning_rate": 6.6599664135325345e-06, "loss": 0.2103, "step": 33695 }, { "epoch": 0.6084833777508143, "grad_norm": 0.5290404558181763, "learning_rate": 6.657293185229115e-06, "loss": 0.2908, "step": 33700 }, { "epoch": 0.608573657183715, "grad_norm": 0.4308032691478729, "learning_rate": 6.654620225822156e-06, "loss": 0.2222, "step": 33705 }, { "epoch": 0.6086639366166158, "grad_norm": 0.30124881863594055, "learning_rate": 6.651947535526672e-06, "loss": 0.202, "step": 33710 }, { "epoch": 0.6087542160495165, "grad_norm": 0.3160736560821533, "learning_rate": 6.64927511455767e-06, "loss": 0.2642, "step": 33715 }, { "epoch": 0.6088444954824171, "grad_norm": 0.3296411335468292, "learning_rate": 6.64660296313012e-06, "loss": 0.2427, "step": 33720 }, { "epoch": 0.6089347749153179, "grad_norm": 0.6400465369224548, "learning_rate": 6.6439310814589856e-06, "loss": 0.1672, "step": 33725 }, { "epoch": 0.6090250543482186, "grad_norm": 0.7744160890579224, "learning_rate": 6.641259469759186e-06, "loss": 0.179, "step": 33730 }, { "epoch": 0.6091153337811194, "grad_norm": 0.37067508697509766, "learning_rate": 6.6385881282456484e-06, "loss": 0.2073, "step": 33735 }, { "epoch": 0.60920561321402, "grad_norm": 0.4492238163948059, "learning_rate": 6.635917057133253e-06, "loss": 0.1919, "step": 33740 }, { "epoch": 0.6092958926469207, "grad_norm": 0.4266092777252197, "learning_rate": 6.633246256636873e-06, "loss": 0.2887, "step": 33745 }, { "epoch": 0.6093861720798215, "grad_norm": 0.5106934905052185, "learning_rate": 6.630575726971351e-06, "loss": 0.304, "step": 33750 }, { "epoch": 0.6094764515127222, "grad_norm": 0.26896047592163086, "learning_rate": 6.627905468351517e-06, "loss": 0.2235, "step": 33755 }, { "epoch": 0.6095667309456229, "grad_norm": 0.3424336016178131, "learning_rate": 6.6252354809921675e-06, "loss": 0.1777, "step": 33760 }, { "epoch": 0.6096570103785236, "grad_norm": 0.37666502594947815, "learning_rate": 6.622565765108087e-06, "loss": 0.2396, "step": 33765 }, { "epoch": 0.6097472898114243, "grad_norm": 0.6772646307945251, "learning_rate": 6.619896320914034e-06, "loss": 0.2772, "step": 33770 }, { "epoch": 0.6098375692443251, "grad_norm": 0.4182206988334656, "learning_rate": 6.61722714862475e-06, "loss": 0.2773, "step": 33775 }, { "epoch": 0.6099278486772257, "grad_norm": 0.5708869099617004, "learning_rate": 6.614558248454942e-06, "loss": 0.1938, "step": 33780 }, { "epoch": 0.6100181281101265, "grad_norm": 0.39538654685020447, "learning_rate": 6.611889620619311e-06, "loss": 0.2765, "step": 33785 }, { "epoch": 0.6101084075430272, "grad_norm": 0.6491618156433105, "learning_rate": 6.609221265332524e-06, "loss": 0.2431, "step": 33790 }, { "epoch": 0.6101986869759279, "grad_norm": 1.528210163116455, "learning_rate": 6.606553182809234e-06, "loss": 0.2736, "step": 33795 }, { "epoch": 0.6102889664088286, "grad_norm": 0.26595795154571533, "learning_rate": 6.603885373264072e-06, "loss": 0.2506, "step": 33800 }, { "epoch": 0.6103792458417293, "grad_norm": 0.43251991271972656, "learning_rate": 6.601217836911633e-06, "loss": 0.2007, "step": 33805 }, { "epoch": 0.61046952527463, "grad_norm": 0.33078595995903015, "learning_rate": 6.598550573966511e-06, "loss": 0.2649, "step": 33810 }, { "epoch": 0.6105598047075308, "grad_norm": 0.6174448728561401, "learning_rate": 6.595883584643261e-06, "loss": 0.2894, "step": 33815 }, { "epoch": 0.6106500841404314, "grad_norm": 0.5917649269104004, "learning_rate": 6.593216869156432e-06, "loss": 0.2367, "step": 33820 }, { "epoch": 0.6107403635733322, "grad_norm": 0.40432947874069214, "learning_rate": 6.590550427720531e-06, "loss": 0.2344, "step": 33825 }, { "epoch": 0.6108306430062329, "grad_norm": 0.26861852407455444, "learning_rate": 6.587884260550062e-06, "loss": 0.1964, "step": 33830 }, { "epoch": 0.6109209224391337, "grad_norm": 0.36993351578712463, "learning_rate": 6.585218367859493e-06, "loss": 0.2807, "step": 33835 }, { "epoch": 0.6110112018720343, "grad_norm": 0.30036458373069763, "learning_rate": 6.582552749863282e-06, "loss": 0.2971, "step": 33840 }, { "epoch": 0.611101481304935, "grad_norm": 0.3073712885379791, "learning_rate": 6.579887406775851e-06, "loss": 0.2099, "step": 33845 }, { "epoch": 0.6111917607378358, "grad_norm": 0.42312800884246826, "learning_rate": 6.577222338811615e-06, "loss": 0.2325, "step": 33850 }, { "epoch": 0.6112820401707365, "grad_norm": 0.3106360137462616, "learning_rate": 6.5745575461849535e-06, "loss": 0.3377, "step": 33855 }, { "epoch": 0.6113723196036371, "grad_norm": 0.40115922689437866, "learning_rate": 6.571893029110234e-06, "loss": 0.2981, "step": 33860 }, { "epoch": 0.6114625990365379, "grad_norm": 0.3473933935165405, "learning_rate": 6.569228787801791e-06, "loss": 0.217, "step": 33865 }, { "epoch": 0.6115528784694386, "grad_norm": 0.3707481026649475, "learning_rate": 6.566564822473955e-06, "loss": 0.2357, "step": 33870 }, { "epoch": 0.6116431579023394, "grad_norm": 0.618390679359436, "learning_rate": 6.563901133341011e-06, "loss": 0.1936, "step": 33875 }, { "epoch": 0.61173343733524, "grad_norm": 0.4362730383872986, "learning_rate": 6.561237720617239e-06, "loss": 0.2656, "step": 33880 }, { "epoch": 0.6118237167681407, "grad_norm": 0.4240482747554779, "learning_rate": 6.558574584516888e-06, "loss": 0.264, "step": 33885 }, { "epoch": 0.6119139962010415, "grad_norm": 0.3510347008705139, "learning_rate": 6.555911725254196e-06, "loss": 0.2546, "step": 33890 }, { "epoch": 0.6120042756339422, "grad_norm": 0.2657509744167328, "learning_rate": 6.553249143043361e-06, "loss": 0.205, "step": 33895 }, { "epoch": 0.6120945550668428, "grad_norm": 0.28988775610923767, "learning_rate": 6.55058683809857e-06, "loss": 0.2574, "step": 33900 }, { "epoch": 0.6121848344997436, "grad_norm": 0.28201043605804443, "learning_rate": 6.5479248106339944e-06, "loss": 0.1787, "step": 33905 }, { "epoch": 0.6122751139326443, "grad_norm": 0.5050878524780273, "learning_rate": 6.545263060863763e-06, "loss": 0.2254, "step": 33910 }, { "epoch": 0.6123653933655451, "grad_norm": 0.5506510734558105, "learning_rate": 6.542601589002002e-06, "loss": 0.2477, "step": 33915 }, { "epoch": 0.6124556727984457, "grad_norm": 0.4561326503753662, "learning_rate": 6.539940395262803e-06, "loss": 0.2256, "step": 33920 }, { "epoch": 0.6125459522313464, "grad_norm": 0.6151211261749268, "learning_rate": 6.537279479860248e-06, "loss": 0.1705, "step": 33925 }, { "epoch": 0.6126362316642472, "grad_norm": 0.33129143714904785, "learning_rate": 6.534618843008374e-06, "loss": 0.1842, "step": 33930 }, { "epoch": 0.6127265110971479, "grad_norm": 0.715975821018219, "learning_rate": 6.5319584849212255e-06, "loss": 0.2435, "step": 33935 }, { "epoch": 0.6128167905300486, "grad_norm": 0.452910840511322, "learning_rate": 6.529298405812799e-06, "loss": 0.2526, "step": 33940 }, { "epoch": 0.6129070699629493, "grad_norm": 0.4404039680957794, "learning_rate": 6.526638605897082e-06, "loss": 0.1775, "step": 33945 }, { "epoch": 0.61299734939585, "grad_norm": 0.2817777097225189, "learning_rate": 6.5239790853880325e-06, "loss": 0.2172, "step": 33950 }, { "epoch": 0.6130876288287508, "grad_norm": 0.4639744758605957, "learning_rate": 6.521319844499599e-06, "loss": 0.215, "step": 33955 }, { "epoch": 0.6131779082616514, "grad_norm": 0.7217711806297302, "learning_rate": 6.518660883445685e-06, "loss": 0.3002, "step": 33960 }, { "epoch": 0.6132681876945522, "grad_norm": 0.5533402562141418, "learning_rate": 6.516002202440195e-06, "loss": 0.2314, "step": 33965 }, { "epoch": 0.6133584671274529, "grad_norm": 0.3430977761745453, "learning_rate": 6.513343801696993e-06, "loss": 0.2209, "step": 33970 }, { "epoch": 0.6134487465603536, "grad_norm": 0.4223589301109314, "learning_rate": 6.510685681429937e-06, "loss": 0.2129, "step": 33975 }, { "epoch": 0.6135390259932543, "grad_norm": 0.3885827660560608, "learning_rate": 6.508027841852844e-06, "loss": 0.2364, "step": 33980 }, { "epoch": 0.613629305426155, "grad_norm": 0.26624947786331177, "learning_rate": 6.505370283179524e-06, "loss": 0.1584, "step": 33985 }, { "epoch": 0.6137195848590558, "grad_norm": 0.5280428528785706, "learning_rate": 6.502713005623758e-06, "loss": 0.2098, "step": 33990 }, { "epoch": 0.6138098642919565, "grad_norm": 0.42223653197288513, "learning_rate": 6.500056009399296e-06, "loss": 0.291, "step": 33995 }, { "epoch": 0.6139001437248571, "grad_norm": 0.4914424419403076, "learning_rate": 6.497399294719888e-06, "loss": 0.2807, "step": 34000 }, { "epoch": 0.6139904231577579, "grad_norm": 0.36946821212768555, "learning_rate": 6.494742861799237e-06, "loss": 0.28, "step": 34005 }, { "epoch": 0.6140807025906586, "grad_norm": 0.6688858866691589, "learning_rate": 6.492086710851039e-06, "loss": 0.327, "step": 34010 }, { "epoch": 0.6141709820235594, "grad_norm": 0.35073772072792053, "learning_rate": 6.489430842088957e-06, "loss": 0.3021, "step": 34015 }, { "epoch": 0.61426126145646, "grad_norm": 0.5166581273078918, "learning_rate": 6.486775255726645e-06, "loss": 0.1812, "step": 34020 }, { "epoch": 0.6143515408893607, "grad_norm": 0.37278056144714355, "learning_rate": 6.484119951977714e-06, "loss": 0.267, "step": 34025 }, { "epoch": 0.6144418203222615, "grad_norm": 0.44880130887031555, "learning_rate": 6.481464931055774e-06, "loss": 0.2423, "step": 34030 }, { "epoch": 0.6145320997551622, "grad_norm": 0.35509809851646423, "learning_rate": 6.478810193174395e-06, "loss": 0.2572, "step": 34035 }, { "epoch": 0.6146223791880628, "grad_norm": 0.5073139667510986, "learning_rate": 6.4761557385471385e-06, "loss": 0.2865, "step": 34040 }, { "epoch": 0.6147126586209636, "grad_norm": 0.4112986624240875, "learning_rate": 6.473501567387528e-06, "loss": 0.1997, "step": 34045 }, { "epoch": 0.6148029380538643, "grad_norm": 0.4614567160606384, "learning_rate": 6.470847679909078e-06, "loss": 0.3304, "step": 34050 }, { "epoch": 0.6148932174867651, "grad_norm": 0.45724689960479736, "learning_rate": 6.468194076325271e-06, "loss": 0.2825, "step": 34055 }, { "epoch": 0.6149834969196657, "grad_norm": 0.29275423288345337, "learning_rate": 6.465540756849575e-06, "loss": 0.2266, "step": 34060 }, { "epoch": 0.6150737763525664, "grad_norm": 0.4016673266887665, "learning_rate": 6.462887721695424e-06, "loss": 0.203, "step": 34065 }, { "epoch": 0.6151640557854672, "grad_norm": 0.3553887903690338, "learning_rate": 6.460234971076243e-06, "loss": 0.2053, "step": 34070 }, { "epoch": 0.6152543352183679, "grad_norm": 0.6590433716773987, "learning_rate": 6.45758250520542e-06, "loss": 0.2523, "step": 34075 }, { "epoch": 0.6153446146512687, "grad_norm": 0.38849779963493347, "learning_rate": 6.454930324296326e-06, "loss": 0.1843, "step": 34080 }, { "epoch": 0.6154348940841693, "grad_norm": 0.812056303024292, "learning_rate": 6.4522784285623165e-06, "loss": 0.1499, "step": 34085 }, { "epoch": 0.61552517351707, "grad_norm": 0.6710259318351746, "learning_rate": 6.449626818216709e-06, "loss": 0.2728, "step": 34090 }, { "epoch": 0.6156154529499708, "grad_norm": 0.46642282605171204, "learning_rate": 6.4469754934728115e-06, "loss": 0.1461, "step": 34095 }, { "epoch": 0.6157057323828715, "grad_norm": 0.583949089050293, "learning_rate": 6.444324454543902e-06, "loss": 0.1903, "step": 34100 }, { "epoch": 0.6157960118157721, "grad_norm": 0.3867447078227997, "learning_rate": 6.441673701643241e-06, "loss": 0.2485, "step": 34105 }, { "epoch": 0.6158862912486729, "grad_norm": 0.44788867235183716, "learning_rate": 6.439023234984054e-06, "loss": 0.1767, "step": 34110 }, { "epoch": 0.6159765706815736, "grad_norm": 0.35031911730766296, "learning_rate": 6.4363730547795615e-06, "loss": 0.2218, "step": 34115 }, { "epoch": 0.6160668501144744, "grad_norm": 0.524443507194519, "learning_rate": 6.433723161242943e-06, "loss": 0.2522, "step": 34120 }, { "epoch": 0.616157129547375, "grad_norm": 1.5180704593658447, "learning_rate": 6.43107355458737e-06, "loss": 0.296, "step": 34125 }, { "epoch": 0.6162474089802757, "grad_norm": 0.5914297699928284, "learning_rate": 6.428424235025979e-06, "loss": 0.2384, "step": 34130 }, { "epoch": 0.6163376884131765, "grad_norm": 0.3615359961986542, "learning_rate": 6.425775202771893e-06, "loss": 0.3157, "step": 34135 }, { "epoch": 0.6164279678460772, "grad_norm": 0.49133241176605225, "learning_rate": 6.4231264580382e-06, "loss": 0.2748, "step": 34140 }, { "epoch": 0.6165182472789779, "grad_norm": 0.3857567310333252, "learning_rate": 6.420478001037982e-06, "loss": 0.2871, "step": 34145 }, { "epoch": 0.6166085267118786, "grad_norm": 0.14246675372123718, "learning_rate": 6.4178298319842795e-06, "loss": 0.2136, "step": 34150 }, { "epoch": 0.6166988061447793, "grad_norm": 0.35812297463417053, "learning_rate": 6.415181951090126e-06, "loss": 0.2314, "step": 34155 }, { "epoch": 0.6167890855776801, "grad_norm": 0.4039628505706787, "learning_rate": 6.412534358568516e-06, "loss": 0.3472, "step": 34160 }, { "epoch": 0.6168793650105807, "grad_norm": 0.3499111533164978, "learning_rate": 6.409887054632436e-06, "loss": 0.2001, "step": 34165 }, { "epoch": 0.6169696444434815, "grad_norm": 0.4685569405555725, "learning_rate": 6.407240039494843e-06, "loss": 0.2612, "step": 34170 }, { "epoch": 0.6170599238763822, "grad_norm": 0.4339657425880432, "learning_rate": 6.404593313368661e-06, "loss": 0.2849, "step": 34175 }, { "epoch": 0.6171502033092829, "grad_norm": 0.20176739990711212, "learning_rate": 6.401946876466809e-06, "loss": 0.2056, "step": 34180 }, { "epoch": 0.6172404827421836, "grad_norm": 0.6576533317565918, "learning_rate": 6.3993007290021695e-06, "loss": 0.3022, "step": 34185 }, { "epoch": 0.6173307621750843, "grad_norm": 0.43607407808303833, "learning_rate": 6.396654871187606e-06, "loss": 0.2136, "step": 34190 }, { "epoch": 0.617421041607985, "grad_norm": 0.7124909162521362, "learning_rate": 6.394009303235959e-06, "loss": 0.227, "step": 34195 }, { "epoch": 0.6175113210408858, "grad_norm": 0.4305925965309143, "learning_rate": 6.39136402536005e-06, "loss": 0.2486, "step": 34200 }, { "epoch": 0.6176016004737864, "grad_norm": 0.7978116869926453, "learning_rate": 6.388719037772662e-06, "loss": 0.3101, "step": 34205 }, { "epoch": 0.6176918799066872, "grad_norm": 0.5606377720832825, "learning_rate": 6.386074340686575e-06, "loss": 0.2684, "step": 34210 }, { "epoch": 0.6177821593395879, "grad_norm": 0.578524112701416, "learning_rate": 6.383429934314526e-06, "loss": 0.1665, "step": 34215 }, { "epoch": 0.6178724387724887, "grad_norm": 0.3661254346370697, "learning_rate": 6.380785818869251e-06, "loss": 0.262, "step": 34220 }, { "epoch": 0.6179627182053893, "grad_norm": 0.30994969606399536, "learning_rate": 6.378141994563438e-06, "loss": 0.2091, "step": 34225 }, { "epoch": 0.61805299763829, "grad_norm": 0.5623824000358582, "learning_rate": 6.37549846160977e-06, "loss": 0.2085, "step": 34230 }, { "epoch": 0.6181432770711908, "grad_norm": 0.6264132261276245, "learning_rate": 6.3728552202208945e-06, "loss": 0.3464, "step": 34235 }, { "epoch": 0.6182335565040915, "grad_norm": 0.47519147396087646, "learning_rate": 6.3702122706094515e-06, "loss": 0.2653, "step": 34240 }, { "epoch": 0.6183238359369921, "grad_norm": 0.36290884017944336, "learning_rate": 6.367569612988033e-06, "loss": 0.2852, "step": 34245 }, { "epoch": 0.6184141153698929, "grad_norm": 0.5039083957672119, "learning_rate": 6.364927247569235e-06, "loss": 0.2122, "step": 34250 }, { "epoch": 0.6185043948027936, "grad_norm": 0.456661581993103, "learning_rate": 6.362285174565605e-06, "loss": 0.1835, "step": 34255 }, { "epoch": 0.6185946742356944, "grad_norm": 0.27523332834243774, "learning_rate": 6.359643394189688e-06, "loss": 0.2324, "step": 34260 }, { "epoch": 0.618684953668595, "grad_norm": 0.6493564248085022, "learning_rate": 6.357001906653993e-06, "loss": 0.1441, "step": 34265 }, { "epoch": 0.6187752331014957, "grad_norm": 0.4140636622905731, "learning_rate": 6.354360712171003e-06, "loss": 0.2299, "step": 34270 }, { "epoch": 0.6188655125343965, "grad_norm": 0.3528628647327423, "learning_rate": 6.35171981095319e-06, "loss": 0.2098, "step": 34275 }, { "epoch": 0.6189557919672972, "grad_norm": 0.40171143412590027, "learning_rate": 6.349079203212989e-06, "loss": 0.2456, "step": 34280 }, { "epoch": 0.6190460714001979, "grad_norm": 0.45002445578575134, "learning_rate": 6.346438889162827e-06, "loss": 0.1707, "step": 34285 }, { "epoch": 0.6191363508330986, "grad_norm": 0.6447219848632812, "learning_rate": 6.343798869015088e-06, "loss": 0.2391, "step": 34290 }, { "epoch": 0.6192266302659993, "grad_norm": 0.49332907795906067, "learning_rate": 6.34115914298215e-06, "loss": 0.1497, "step": 34295 }, { "epoch": 0.6193169096989001, "grad_norm": 0.32314741611480713, "learning_rate": 6.338519711276353e-06, "loss": 0.2177, "step": 34300 }, { "epoch": 0.6194071891318007, "grad_norm": 0.3138463497161865, "learning_rate": 6.3358805741100285e-06, "loss": 0.2568, "step": 34305 }, { "epoch": 0.6194974685647014, "grad_norm": 0.3489329516887665, "learning_rate": 6.333241731695466e-06, "loss": 0.2336, "step": 34310 }, { "epoch": 0.6195877479976022, "grad_norm": 0.290552020072937, "learning_rate": 6.33060318424495e-06, "loss": 0.1592, "step": 34315 }, { "epoch": 0.6196780274305029, "grad_norm": 0.37173569202423096, "learning_rate": 6.3279649319707245e-06, "loss": 0.1762, "step": 34320 }, { "epoch": 0.6197683068634036, "grad_norm": 0.49615028500556946, "learning_rate": 6.325326975085026e-06, "loss": 0.2648, "step": 34325 }, { "epoch": 0.6198585862963043, "grad_norm": 0.5045347213745117, "learning_rate": 6.322689313800052e-06, "loss": 0.3263, "step": 34330 }, { "epoch": 0.619948865729205, "grad_norm": 0.41068151593208313, "learning_rate": 6.320051948327991e-06, "loss": 0.159, "step": 34335 }, { "epoch": 0.6200391451621058, "grad_norm": 0.8643396496772766, "learning_rate": 6.3174148788809895e-06, "loss": 0.2559, "step": 34340 }, { "epoch": 0.6201294245950064, "grad_norm": 0.32451334595680237, "learning_rate": 6.314778105671189e-06, "loss": 0.344, "step": 34345 }, { "epoch": 0.6202197040279072, "grad_norm": 0.6597735285758972, "learning_rate": 6.3121416289106975e-06, "loss": 0.2211, "step": 34350 }, { "epoch": 0.6203099834608079, "grad_norm": 0.42996546626091003, "learning_rate": 6.3095054488115945e-06, "loss": 0.2387, "step": 34355 }, { "epoch": 0.6204002628937086, "grad_norm": 0.4882093369960785, "learning_rate": 6.306869565585949e-06, "loss": 0.242, "step": 34360 }, { "epoch": 0.6204905423266093, "grad_norm": 0.3846128582954407, "learning_rate": 6.304233979445791e-06, "loss": 0.1743, "step": 34365 }, { "epoch": 0.62058082175951, "grad_norm": 0.7323339581489563, "learning_rate": 6.301598690603143e-06, "loss": 0.2092, "step": 34370 }, { "epoch": 0.6206711011924108, "grad_norm": 0.21192646026611328, "learning_rate": 6.298963699269989e-06, "loss": 0.2194, "step": 34375 }, { "epoch": 0.6207613806253115, "grad_norm": 0.49501052498817444, "learning_rate": 6.296329005658295e-06, "loss": 0.1443, "step": 34380 }, { "epoch": 0.6208516600582121, "grad_norm": 0.3888206481933594, "learning_rate": 6.293694609980003e-06, "loss": 0.143, "step": 34385 }, { "epoch": 0.6209419394911129, "grad_norm": 0.5048769116401672, "learning_rate": 6.291060512447036e-06, "loss": 0.2263, "step": 34390 }, { "epoch": 0.6210322189240136, "grad_norm": 0.4002924859523773, "learning_rate": 6.2884267132712805e-06, "loss": 0.2666, "step": 34395 }, { "epoch": 0.6211224983569144, "grad_norm": 0.415455162525177, "learning_rate": 6.285793212664615e-06, "loss": 0.2045, "step": 34400 }, { "epoch": 0.621212777789815, "grad_norm": 0.28391233086586, "learning_rate": 6.283160010838876e-06, "loss": 0.1603, "step": 34405 }, { "epoch": 0.6213030572227157, "grad_norm": 0.6596155762672424, "learning_rate": 6.280527108005892e-06, "loss": 0.2308, "step": 34410 }, { "epoch": 0.6213933366556165, "grad_norm": 0.4737531244754791, "learning_rate": 6.2778945043774566e-06, "loss": 0.2599, "step": 34415 }, { "epoch": 0.6214836160885172, "grad_norm": 0.44785115122795105, "learning_rate": 6.275262200165351e-06, "loss": 0.3414, "step": 34420 }, { "epoch": 0.6215738955214178, "grad_norm": 0.23669376969337463, "learning_rate": 6.272630195581315e-06, "loss": 0.2331, "step": 34425 }, { "epoch": 0.6216641749543186, "grad_norm": 0.2137349247932434, "learning_rate": 6.2699984908370815e-06, "loss": 0.2572, "step": 34430 }, { "epoch": 0.6217544543872193, "grad_norm": 0.5028064846992493, "learning_rate": 6.267367086144348e-06, "loss": 0.2618, "step": 34435 }, { "epoch": 0.6218447338201201, "grad_norm": 0.5181789398193359, "learning_rate": 6.264735981714798e-06, "loss": 0.1782, "step": 34440 }, { "epoch": 0.6219350132530207, "grad_norm": 0.8552795052528381, "learning_rate": 6.262105177760078e-06, "loss": 0.2167, "step": 34445 }, { "epoch": 0.6220252926859214, "grad_norm": 0.4309433102607727, "learning_rate": 6.259474674491816e-06, "loss": 0.189, "step": 34450 }, { "epoch": 0.6221155721188222, "grad_norm": 0.4273078143596649, "learning_rate": 6.256844472121624e-06, "loss": 0.2871, "step": 34455 }, { "epoch": 0.6222058515517229, "grad_norm": 0.40720057487487793, "learning_rate": 6.254214570861075e-06, "loss": 0.2457, "step": 34460 }, { "epoch": 0.6222961309846236, "grad_norm": 0.2771539092063904, "learning_rate": 6.251584970921736e-06, "loss": 0.1588, "step": 34465 }, { "epoch": 0.6223864104175243, "grad_norm": 0.5335532426834106, "learning_rate": 6.2489556725151256e-06, "loss": 0.2535, "step": 34470 }, { "epoch": 0.622476689850425, "grad_norm": 0.37770241498947144, "learning_rate": 6.246326675852763e-06, "loss": 0.1764, "step": 34475 }, { "epoch": 0.6225669692833258, "grad_norm": 0.39951586723327637, "learning_rate": 6.2436979811461244e-06, "loss": 0.2941, "step": 34480 }, { "epoch": 0.6226572487162265, "grad_norm": 0.4991743862628937, "learning_rate": 6.241069588606675e-06, "loss": 0.1905, "step": 34485 }, { "epoch": 0.6227475281491271, "grad_norm": 0.44111600518226624, "learning_rate": 6.238441498445844e-06, "loss": 0.2377, "step": 34490 }, { "epoch": 0.6228378075820279, "grad_norm": 0.6911139488220215, "learning_rate": 6.235813710875048e-06, "loss": 0.2707, "step": 34495 }, { "epoch": 0.6229280870149286, "grad_norm": 0.5214601159095764, "learning_rate": 6.233186226105669e-06, "loss": 0.2626, "step": 34500 }, { "epoch": 0.6230183664478294, "grad_norm": 0.2713903784751892, "learning_rate": 6.2305590443490735e-06, "loss": 0.2807, "step": 34505 }, { "epoch": 0.62310864588073, "grad_norm": 0.38960278034210205, "learning_rate": 6.22793216581659e-06, "loss": 0.2792, "step": 34510 }, { "epoch": 0.6231989253136307, "grad_norm": 0.5309028625488281, "learning_rate": 6.225305590719546e-06, "loss": 0.3215, "step": 34515 }, { "epoch": 0.6232892047465315, "grad_norm": 0.6727641224861145, "learning_rate": 6.222679319269218e-06, "loss": 0.2728, "step": 34520 }, { "epoch": 0.6233794841794322, "grad_norm": 0.6221792697906494, "learning_rate": 6.220053351676876e-06, "loss": 0.266, "step": 34525 }, { "epoch": 0.6234697636123329, "grad_norm": 0.3391434848308563, "learning_rate": 6.217427688153757e-06, "loss": 0.1645, "step": 34530 }, { "epoch": 0.6235600430452336, "grad_norm": 0.24358294904232025, "learning_rate": 6.214802328911085e-06, "loss": 0.1537, "step": 34535 }, { "epoch": 0.6236503224781343, "grad_norm": 0.6168825626373291, "learning_rate": 6.212177274160041e-06, "loss": 0.2049, "step": 34540 }, { "epoch": 0.6237406019110351, "grad_norm": 0.23135057091712952, "learning_rate": 6.209552524111791e-06, "loss": 0.1655, "step": 34545 }, { "epoch": 0.6238308813439357, "grad_norm": 0.3386957049369812, "learning_rate": 6.206928078977488e-06, "loss": 0.2538, "step": 34550 }, { "epoch": 0.6239211607768365, "grad_norm": 0.35345205664634705, "learning_rate": 6.204303938968237e-06, "loss": 0.2356, "step": 34555 }, { "epoch": 0.6240114402097372, "grad_norm": 0.3981238007545471, "learning_rate": 6.20168010429514e-06, "loss": 0.3016, "step": 34560 }, { "epoch": 0.6241017196426379, "grad_norm": 0.4604952931404114, "learning_rate": 6.19905657516926e-06, "loss": 0.2168, "step": 34565 }, { "epoch": 0.6241919990755386, "grad_norm": 0.7224426865577698, "learning_rate": 6.196433351801647e-06, "loss": 0.215, "step": 34570 }, { "epoch": 0.6242822785084393, "grad_norm": 0.6132074594497681, "learning_rate": 6.193810434403309e-06, "loss": 0.2833, "step": 34575 }, { "epoch": 0.6243725579413401, "grad_norm": 0.41222119331359863, "learning_rate": 6.191187823185256e-06, "loss": 0.2253, "step": 34580 }, { "epoch": 0.6244628373742408, "grad_norm": 0.7935979962348938, "learning_rate": 6.188565518358445e-06, "loss": 0.194, "step": 34585 }, { "epoch": 0.6245531168071414, "grad_norm": 0.7769051790237427, "learning_rate": 6.185943520133828e-06, "loss": 0.1693, "step": 34590 }, { "epoch": 0.6246433962400422, "grad_norm": 0.3631523549556732, "learning_rate": 6.1833218287223215e-06, "loss": 0.2517, "step": 34595 }, { "epoch": 0.6247336756729429, "grad_norm": 0.35102444887161255, "learning_rate": 6.180700444334828e-06, "loss": 0.1909, "step": 34600 }, { "epoch": 0.6248239551058437, "grad_norm": 0.8616628050804138, "learning_rate": 6.178079367182212e-06, "loss": 0.2941, "step": 34605 }, { "epoch": 0.6249142345387443, "grad_norm": 0.4506857693195343, "learning_rate": 6.175458597475324e-06, "loss": 0.2509, "step": 34610 }, { "epoch": 0.625004513971645, "grad_norm": 0.40034666657447815, "learning_rate": 6.172838135424982e-06, "loss": 0.2744, "step": 34615 }, { "epoch": 0.6250947934045458, "grad_norm": 0.41605767607688904, "learning_rate": 6.17021798124199e-06, "loss": 0.2696, "step": 34620 }, { "epoch": 0.6251850728374465, "grad_norm": 0.5841496586799622, "learning_rate": 6.167598135137114e-06, "loss": 0.2184, "step": 34625 }, { "epoch": 0.6252753522703471, "grad_norm": 0.3614466190338135, "learning_rate": 6.164978597321104e-06, "loss": 0.2185, "step": 34630 }, { "epoch": 0.6253656317032479, "grad_norm": 0.471843421459198, "learning_rate": 6.162359368004685e-06, "loss": 0.2819, "step": 34635 }, { "epoch": 0.6254559111361486, "grad_norm": 0.696699321269989, "learning_rate": 6.159740447398546e-06, "loss": 0.2147, "step": 34640 }, { "epoch": 0.6255461905690494, "grad_norm": 0.5258233547210693, "learning_rate": 6.157121835713373e-06, "loss": 0.3034, "step": 34645 }, { "epoch": 0.62563647000195, "grad_norm": 0.2687463164329529, "learning_rate": 6.154503533159806e-06, "loss": 0.2262, "step": 34650 }, { "epoch": 0.6257267494348507, "grad_norm": 0.4536837935447693, "learning_rate": 6.15188553994847e-06, "loss": 0.3113, "step": 34655 }, { "epoch": 0.6258170288677515, "grad_norm": 0.2425563782453537, "learning_rate": 6.149267856289962e-06, "loss": 0.2513, "step": 34660 }, { "epoch": 0.6259073083006522, "grad_norm": 0.4896921217441559, "learning_rate": 6.146650482394861e-06, "loss": 0.2983, "step": 34665 }, { "epoch": 0.6259975877335529, "grad_norm": 0.4157010018825531, "learning_rate": 6.14403341847371e-06, "loss": 0.2094, "step": 34670 }, { "epoch": 0.6260878671664536, "grad_norm": 0.6023677587509155, "learning_rate": 6.141416664737034e-06, "loss": 0.2272, "step": 34675 }, { "epoch": 0.6261781465993543, "grad_norm": 0.8046973347663879, "learning_rate": 6.138800221395333e-06, "loss": 0.292, "step": 34680 }, { "epoch": 0.6262684260322551, "grad_norm": 0.3939763009548187, "learning_rate": 6.136184088659084e-06, "loss": 0.3303, "step": 34685 }, { "epoch": 0.6263587054651557, "grad_norm": 0.28620174527168274, "learning_rate": 6.133568266738729e-06, "loss": 0.2183, "step": 34690 }, { "epoch": 0.6264489848980564, "grad_norm": 0.5315285921096802, "learning_rate": 6.130952755844697e-06, "loss": 0.1653, "step": 34695 }, { "epoch": 0.6265392643309572, "grad_norm": 0.244839608669281, "learning_rate": 6.128337556187383e-06, "loss": 0.2486, "step": 34700 }, { "epoch": 0.6266295437638579, "grad_norm": 0.4421430230140686, "learning_rate": 6.125722667977163e-06, "loss": 0.2309, "step": 34705 }, { "epoch": 0.6267198231967586, "grad_norm": 0.4121013879776001, "learning_rate": 6.123108091424386e-06, "loss": 0.1869, "step": 34710 }, { "epoch": 0.6268101026296593, "grad_norm": 0.3209613561630249, "learning_rate": 6.120493826739378e-06, "loss": 0.2081, "step": 34715 }, { "epoch": 0.62690038206256, "grad_norm": 0.30417075753211975, "learning_rate": 6.117879874132434e-06, "loss": 0.192, "step": 34720 }, { "epoch": 0.6269906614954608, "grad_norm": 0.34549078345298767, "learning_rate": 6.115266233813825e-06, "loss": 0.2372, "step": 34725 }, { "epoch": 0.6270809409283614, "grad_norm": 0.3664049804210663, "learning_rate": 6.112652905993808e-06, "loss": 0.1923, "step": 34730 }, { "epoch": 0.6271712203612622, "grad_norm": 0.5543047189712524, "learning_rate": 6.110039890882598e-06, "loss": 0.199, "step": 34735 }, { "epoch": 0.6272614997941629, "grad_norm": 0.4151439964771271, "learning_rate": 6.1074271886903955e-06, "loss": 0.1908, "step": 34740 }, { "epoch": 0.6273517792270636, "grad_norm": 0.684883713722229, "learning_rate": 6.104814799627373e-06, "loss": 0.208, "step": 34745 }, { "epoch": 0.6274420586599643, "grad_norm": 0.5946511626243591, "learning_rate": 6.102202723903683e-06, "loss": 0.246, "step": 34750 }, { "epoch": 0.627532338092865, "grad_norm": 0.48237690329551697, "learning_rate": 6.09959096172944e-06, "loss": 0.2516, "step": 34755 }, { "epoch": 0.6276226175257658, "grad_norm": 0.6267902255058289, "learning_rate": 6.096979513314749e-06, "loss": 0.2279, "step": 34760 }, { "epoch": 0.6277128969586665, "grad_norm": 0.45958006381988525, "learning_rate": 6.094368378869675e-06, "loss": 0.2476, "step": 34765 }, { "epoch": 0.6278031763915671, "grad_norm": 0.35172709822654724, "learning_rate": 6.091757558604272e-06, "loss": 0.2897, "step": 34770 }, { "epoch": 0.6278934558244679, "grad_norm": 0.9599870443344116, "learning_rate": 6.089147052728552e-06, "loss": 0.1377, "step": 34775 }, { "epoch": 0.6279837352573686, "grad_norm": 0.48359227180480957, "learning_rate": 6.086536861452523e-06, "loss": 0.2726, "step": 34780 }, { "epoch": 0.6280740146902694, "grad_norm": 0.8267931342124939, "learning_rate": 6.083926984986146e-06, "loss": 0.362, "step": 34785 }, { "epoch": 0.62816429412317, "grad_norm": 0.3220060467720032, "learning_rate": 6.081317423539373e-06, "loss": 0.2479, "step": 34790 }, { "epoch": 0.6282545735560707, "grad_norm": 0.25943776965141296, "learning_rate": 6.078708177322119e-06, "loss": 0.1505, "step": 34795 }, { "epoch": 0.6283448529889715, "grad_norm": 0.4073909819126129, "learning_rate": 6.076099246544286e-06, "loss": 0.1963, "step": 34800 }, { "epoch": 0.6284351324218722, "grad_norm": 0.362216591835022, "learning_rate": 6.073490631415735e-06, "loss": 0.1303, "step": 34805 }, { "epoch": 0.6285254118547728, "grad_norm": 1.4766465425491333, "learning_rate": 6.070882332146315e-06, "loss": 0.2623, "step": 34810 }, { "epoch": 0.6286156912876736, "grad_norm": 0.3296297788619995, "learning_rate": 6.0682743489458475e-06, "loss": 0.2353, "step": 34815 }, { "epoch": 0.6287059707205743, "grad_norm": 0.3773016631603241, "learning_rate": 6.065666682024119e-06, "loss": 0.2789, "step": 34820 }, { "epoch": 0.6287962501534751, "grad_norm": 0.39737921953201294, "learning_rate": 6.063059331590899e-06, "loss": 0.23, "step": 34825 }, { "epoch": 0.6288865295863757, "grad_norm": 0.46278733015060425, "learning_rate": 6.060452297855932e-06, "loss": 0.2963, "step": 34830 }, { "epoch": 0.6289768090192764, "grad_norm": 0.2169886976480484, "learning_rate": 6.057845581028938e-06, "loss": 0.2966, "step": 34835 }, { "epoch": 0.6290670884521772, "grad_norm": 0.5035675764083862, "learning_rate": 6.0552391813196e-06, "loss": 0.2011, "step": 34840 }, { "epoch": 0.6291573678850779, "grad_norm": 0.46862685680389404, "learning_rate": 6.0526330989375925e-06, "loss": 0.2631, "step": 34845 }, { "epoch": 0.6292476473179786, "grad_norm": 0.3086468577384949, "learning_rate": 6.050027334092549e-06, "loss": 0.1986, "step": 34850 }, { "epoch": 0.6293379267508793, "grad_norm": 0.5292624831199646, "learning_rate": 6.0474218869940905e-06, "loss": 0.2383, "step": 34855 }, { "epoch": 0.62942820618378, "grad_norm": 0.3100101947784424, "learning_rate": 6.044816757851799e-06, "loss": 0.2109, "step": 34860 }, { "epoch": 0.6295184856166808, "grad_norm": 0.2324603945016861, "learning_rate": 6.042211946875248e-06, "loss": 0.2094, "step": 34865 }, { "epoch": 0.6296087650495814, "grad_norm": 0.5259659886360168, "learning_rate": 6.039607454273966e-06, "loss": 0.2353, "step": 34870 }, { "epoch": 0.6296990444824822, "grad_norm": 0.48725876212120056, "learning_rate": 6.037003280257471e-06, "loss": 0.1715, "step": 34875 }, { "epoch": 0.6297893239153829, "grad_norm": 0.34312328696250916, "learning_rate": 6.034399425035247e-06, "loss": 0.1883, "step": 34880 }, { "epoch": 0.6298796033482836, "grad_norm": 0.6141727566719055, "learning_rate": 6.031795888816758e-06, "loss": 0.1919, "step": 34885 }, { "epoch": 0.6299698827811844, "grad_norm": 0.3491286337375641, "learning_rate": 6.029192671811437e-06, "loss": 0.1581, "step": 34890 }, { "epoch": 0.630060162214085, "grad_norm": 0.33346256613731384, "learning_rate": 6.026589774228696e-06, "loss": 0.2257, "step": 34895 }, { "epoch": 0.6301504416469857, "grad_norm": 0.2053835690021515, "learning_rate": 6.0239871962779166e-06, "loss": 0.1951, "step": 34900 }, { "epoch": 0.6302407210798865, "grad_norm": 0.17909343540668488, "learning_rate": 6.0213849381684595e-06, "loss": 0.1976, "step": 34905 }, { "epoch": 0.6303310005127872, "grad_norm": 0.5308035016059875, "learning_rate": 6.01878300010966e-06, "loss": 0.2853, "step": 34910 }, { "epoch": 0.6304212799456879, "grad_norm": 0.5771810412406921, "learning_rate": 6.016181382310818e-06, "loss": 0.2938, "step": 34915 }, { "epoch": 0.6305115593785886, "grad_norm": 0.31199419498443604, "learning_rate": 6.0135800849812196e-06, "loss": 0.1956, "step": 34920 }, { "epoch": 0.6306018388114893, "grad_norm": 0.8073880076408386, "learning_rate": 6.010979108330118e-06, "loss": 0.2518, "step": 34925 }, { "epoch": 0.6306921182443901, "grad_norm": 0.8024955987930298, "learning_rate": 6.008378452566747e-06, "loss": 0.2603, "step": 34930 }, { "epoch": 0.6307823976772907, "grad_norm": 0.5539519190788269, "learning_rate": 6.005778117900306e-06, "loss": 0.2635, "step": 34935 }, { "epoch": 0.6308726771101915, "grad_norm": 0.6516796350479126, "learning_rate": 6.003178104539975e-06, "loss": 0.2426, "step": 34940 }, { "epoch": 0.6309629565430922, "grad_norm": 0.5461468696594238, "learning_rate": 6.000578412694904e-06, "loss": 0.1898, "step": 34945 }, { "epoch": 0.6310532359759929, "grad_norm": 0.231127068400383, "learning_rate": 5.997979042574225e-06, "loss": 0.2876, "step": 34950 }, { "epoch": 0.6311435154088936, "grad_norm": 0.5740653276443481, "learning_rate": 5.995379994387032e-06, "loss": 0.2504, "step": 34955 }, { "epoch": 0.6312337948417943, "grad_norm": 0.3960036635398865, "learning_rate": 5.992781268342403e-06, "loss": 0.2665, "step": 34960 }, { "epoch": 0.6313240742746951, "grad_norm": 0.2456202656030655, "learning_rate": 5.990182864649382e-06, "loss": 0.2269, "step": 34965 }, { "epoch": 0.6314143537075958, "grad_norm": 0.3660065233707428, "learning_rate": 5.987584783516999e-06, "loss": 0.1913, "step": 34970 }, { "epoch": 0.6315046331404964, "grad_norm": 0.7571065425872803, "learning_rate": 5.984987025154245e-06, "loss": 0.2416, "step": 34975 }, { "epoch": 0.6315949125733972, "grad_norm": 0.5113848447799683, "learning_rate": 5.982389589770097e-06, "loss": 0.2439, "step": 34980 }, { "epoch": 0.6316851920062979, "grad_norm": 0.5383720397949219, "learning_rate": 5.979792477573491e-06, "loss": 0.188, "step": 34985 }, { "epoch": 0.6317754714391987, "grad_norm": 0.3642173111438751, "learning_rate": 5.977195688773352e-06, "loss": 0.1801, "step": 34990 }, { "epoch": 0.6318657508720993, "grad_norm": 0.46961209177970886, "learning_rate": 5.974599223578574e-06, "loss": 0.1936, "step": 34995 }, { "epoch": 0.631956030305, "grad_norm": 0.27093908190727234, "learning_rate": 5.9720030821980165e-06, "loss": 0.2676, "step": 35000 }, { "epoch": 0.6320463097379008, "grad_norm": 0.48034536838531494, "learning_rate": 5.969407264840528e-06, "loss": 0.3011, "step": 35005 }, { "epoch": 0.6321365891708015, "grad_norm": 0.4448093771934509, "learning_rate": 5.966811771714918e-06, "loss": 0.277, "step": 35010 }, { "epoch": 0.6322268686037021, "grad_norm": 0.5743049383163452, "learning_rate": 5.96421660302998e-06, "loss": 0.2075, "step": 35015 }, { "epoch": 0.6323171480366029, "grad_norm": 0.6041221618652344, "learning_rate": 5.961621758994468e-06, "loss": 0.2375, "step": 35020 }, { "epoch": 0.6324074274695036, "grad_norm": 0.5226449966430664, "learning_rate": 5.959027239817127e-06, "loss": 0.3159, "step": 35025 }, { "epoch": 0.6324977069024044, "grad_norm": 0.4296029210090637, "learning_rate": 5.956433045706662e-06, "loss": 0.2542, "step": 35030 }, { "epoch": 0.632587986335305, "grad_norm": 0.3556138873100281, "learning_rate": 5.95383917687176e-06, "loss": 0.2856, "step": 35035 }, { "epoch": 0.6326782657682057, "grad_norm": 0.5710278749465942, "learning_rate": 5.951245633521077e-06, "loss": 0.2282, "step": 35040 }, { "epoch": 0.6327685452011065, "grad_norm": 0.3186838626861572, "learning_rate": 5.948652415863248e-06, "loss": 0.165, "step": 35045 }, { "epoch": 0.6328588246340072, "grad_norm": 0.25505760312080383, "learning_rate": 5.946059524106873e-06, "loss": 0.2014, "step": 35050 }, { "epoch": 0.6329491040669079, "grad_norm": 0.33168452978134155, "learning_rate": 5.943466958460536e-06, "loss": 0.2486, "step": 35055 }, { "epoch": 0.6330393834998086, "grad_norm": 0.6071575880050659, "learning_rate": 5.940874719132786e-06, "loss": 0.2014, "step": 35060 }, { "epoch": 0.6331296629327093, "grad_norm": 0.7817810773849487, "learning_rate": 5.938282806332156e-06, "loss": 0.3459, "step": 35065 }, { "epoch": 0.6332199423656101, "grad_norm": 0.4133113920688629, "learning_rate": 5.93569122026714e-06, "loss": 0.3335, "step": 35070 }, { "epoch": 0.6333102217985107, "grad_norm": 0.5362765192985535, "learning_rate": 5.933099961146216e-06, "loss": 0.2565, "step": 35075 }, { "epoch": 0.6334005012314115, "grad_norm": 0.37904590368270874, "learning_rate": 5.9305090291778285e-06, "loss": 0.2446, "step": 35080 }, { "epoch": 0.6334907806643122, "grad_norm": 0.6799839735031128, "learning_rate": 5.927918424570407e-06, "loss": 0.2036, "step": 35085 }, { "epoch": 0.6335810600972129, "grad_norm": 0.33658117055892944, "learning_rate": 5.925328147532339e-06, "loss": 0.2155, "step": 35090 }, { "epoch": 0.6336713395301136, "grad_norm": 0.25472602248191833, "learning_rate": 5.922738198271993e-06, "loss": 0.3382, "step": 35095 }, { "epoch": 0.6337616189630143, "grad_norm": 0.5699844360351562, "learning_rate": 5.920148576997716e-06, "loss": 0.3147, "step": 35100 }, { "epoch": 0.633851898395915, "grad_norm": 0.49318039417266846, "learning_rate": 5.917559283917822e-06, "loss": 0.2488, "step": 35105 }, { "epoch": 0.6339421778288158, "grad_norm": 0.5778481364250183, "learning_rate": 5.914970319240605e-06, "loss": 0.2135, "step": 35110 }, { "epoch": 0.6340324572617164, "grad_norm": 0.3343014717102051, "learning_rate": 5.9123816831743195e-06, "loss": 0.1618, "step": 35115 }, { "epoch": 0.6341227366946172, "grad_norm": 0.5119194388389587, "learning_rate": 5.9097933759272105e-06, "loss": 0.2919, "step": 35120 }, { "epoch": 0.6342130161275179, "grad_norm": 0.7334364652633667, "learning_rate": 5.907205397707485e-06, "loss": 0.1894, "step": 35125 }, { "epoch": 0.6343032955604186, "grad_norm": 0.5871245265007019, "learning_rate": 5.90461774872333e-06, "loss": 0.2589, "step": 35130 }, { "epoch": 0.6343935749933193, "grad_norm": 0.3996553421020508, "learning_rate": 5.902030429182898e-06, "loss": 0.1758, "step": 35135 }, { "epoch": 0.63448385442622, "grad_norm": 0.40675926208496094, "learning_rate": 5.899443439294323e-06, "loss": 0.2498, "step": 35140 }, { "epoch": 0.6345741338591208, "grad_norm": 0.3976016342639923, "learning_rate": 5.896856779265709e-06, "loss": 0.1763, "step": 35145 }, { "epoch": 0.6346644132920215, "grad_norm": 0.29940494894981384, "learning_rate": 5.89427044930514e-06, "loss": 0.276, "step": 35150 }, { "epoch": 0.6347546927249221, "grad_norm": 0.5581749081611633, "learning_rate": 5.891684449620655e-06, "loss": 0.2797, "step": 35155 }, { "epoch": 0.6348449721578229, "grad_norm": 0.27761101722717285, "learning_rate": 5.889098780420288e-06, "loss": 0.1851, "step": 35160 }, { "epoch": 0.6349352515907236, "grad_norm": 0.3999343514442444, "learning_rate": 5.886513441912034e-06, "loss": 0.2203, "step": 35165 }, { "epoch": 0.6350255310236244, "grad_norm": 0.5790286064147949, "learning_rate": 5.883928434303867e-06, "loss": 0.2046, "step": 35170 }, { "epoch": 0.635115810456525, "grad_norm": 0.4353305995464325, "learning_rate": 5.881343757803729e-06, "loss": 0.1458, "step": 35175 }, { "epoch": 0.6352060898894257, "grad_norm": 0.4601106643676758, "learning_rate": 5.878759412619544e-06, "loss": 0.162, "step": 35180 }, { "epoch": 0.6352963693223265, "grad_norm": 0.46851181983947754, "learning_rate": 5.876175398959198e-06, "loss": 0.2171, "step": 35185 }, { "epoch": 0.6353866487552272, "grad_norm": 1.0405105352401733, "learning_rate": 5.873591717030557e-06, "loss": 0.1599, "step": 35190 }, { "epoch": 0.6354769281881278, "grad_norm": 0.7821049690246582, "learning_rate": 5.871008367041463e-06, "loss": 0.2537, "step": 35195 }, { "epoch": 0.6355672076210286, "grad_norm": 0.5013983249664307, "learning_rate": 5.868425349199723e-06, "loss": 0.2079, "step": 35200 }, { "epoch": 0.6356574870539293, "grad_norm": 0.41896483302116394, "learning_rate": 5.865842663713124e-06, "loss": 0.2, "step": 35205 }, { "epoch": 0.6357477664868301, "grad_norm": 0.5238785743713379, "learning_rate": 5.863260310789423e-06, "loss": 0.2898, "step": 35210 }, { "epoch": 0.6358380459197307, "grad_norm": 0.2647916078567505, "learning_rate": 5.860678290636358e-06, "loss": 0.2858, "step": 35215 }, { "epoch": 0.6359283253526314, "grad_norm": 0.6003150343894958, "learning_rate": 5.858096603461625e-06, "loss": 0.308, "step": 35220 }, { "epoch": 0.6360186047855322, "grad_norm": 0.3803805708885193, "learning_rate": 5.855515249472906e-06, "loss": 0.2548, "step": 35225 }, { "epoch": 0.6361088842184329, "grad_norm": 0.696205735206604, "learning_rate": 5.852934228877851e-06, "loss": 0.2959, "step": 35230 }, { "epoch": 0.6361991636513336, "grad_norm": 0.7358081936836243, "learning_rate": 5.850353541884086e-06, "loss": 0.1871, "step": 35235 }, { "epoch": 0.6362894430842343, "grad_norm": 0.40561583638191223, "learning_rate": 5.847773188699207e-06, "loss": 0.2418, "step": 35240 }, { "epoch": 0.636379722517135, "grad_norm": 0.368505597114563, "learning_rate": 5.8451931695307894e-06, "loss": 0.3055, "step": 35245 }, { "epoch": 0.6364700019500358, "grad_norm": 0.36378854513168335, "learning_rate": 5.842613484586368e-06, "loss": 0.2551, "step": 35250 }, { "epoch": 0.6365602813829364, "grad_norm": 0.6695896983146667, "learning_rate": 5.840034134073468e-06, "loss": 0.1631, "step": 35255 }, { "epoch": 0.6366505608158372, "grad_norm": 0.5897035598754883, "learning_rate": 5.837455118199573e-06, "loss": 0.2288, "step": 35260 }, { "epoch": 0.6367408402487379, "grad_norm": 0.573806643486023, "learning_rate": 5.834876437172154e-06, "loss": 0.2627, "step": 35265 }, { "epoch": 0.6368311196816386, "grad_norm": 0.38577497005462646, "learning_rate": 5.832298091198637e-06, "loss": 0.1708, "step": 35270 }, { "epoch": 0.6369213991145394, "grad_norm": 0.7552957534790039, "learning_rate": 5.82972008048644e-06, "loss": 0.2758, "step": 35275 }, { "epoch": 0.63701167854744, "grad_norm": 0.4966082274913788, "learning_rate": 5.82714240524294e-06, "loss": 0.2313, "step": 35280 }, { "epoch": 0.6371019579803407, "grad_norm": 0.39154505729675293, "learning_rate": 5.824565065675493e-06, "loss": 0.1762, "step": 35285 }, { "epoch": 0.6371922374132415, "grad_norm": 0.5730046629905701, "learning_rate": 5.821988061991432e-06, "loss": 0.3759, "step": 35290 }, { "epoch": 0.6372825168461422, "grad_norm": 0.6496819853782654, "learning_rate": 5.819411394398049e-06, "loss": 0.2163, "step": 35295 }, { "epoch": 0.6373727962790429, "grad_norm": 0.45367497205734253, "learning_rate": 5.81683506310263e-06, "loss": 0.1538, "step": 35300 }, { "epoch": 0.6374630757119436, "grad_norm": 0.6378582715988159, "learning_rate": 5.814259068312409e-06, "loss": 0.1712, "step": 35305 }, { "epoch": 0.6375533551448443, "grad_norm": 0.36133307218551636, "learning_rate": 5.811683410234614e-06, "loss": 0.1385, "step": 35310 }, { "epoch": 0.6376436345777451, "grad_norm": 0.29456639289855957, "learning_rate": 5.809108089076436e-06, "loss": 0.2176, "step": 35315 }, { "epoch": 0.6377339140106457, "grad_norm": 0.5693657994270325, "learning_rate": 5.806533105045047e-06, "loss": 0.2002, "step": 35320 }, { "epoch": 0.6378241934435465, "grad_norm": 0.6943349242210388, "learning_rate": 5.803958458347576e-06, "loss": 0.169, "step": 35325 }, { "epoch": 0.6379144728764472, "grad_norm": 0.31143563985824585, "learning_rate": 5.8013841491911415e-06, "loss": 0.2483, "step": 35330 }, { "epoch": 0.6380047523093479, "grad_norm": 0.5593167543411255, "learning_rate": 5.798810177782822e-06, "loss": 0.2721, "step": 35335 }, { "epoch": 0.6380950317422486, "grad_norm": 0.2441977560520172, "learning_rate": 5.796236544329682e-06, "loss": 0.3029, "step": 35340 }, { "epoch": 0.6381853111751493, "grad_norm": 0.4800540804862976, "learning_rate": 5.793663249038743e-06, "loss": 0.1755, "step": 35345 }, { "epoch": 0.6382755906080501, "grad_norm": 0.4255560636520386, "learning_rate": 5.7910902921170145e-06, "loss": 0.2864, "step": 35350 }, { "epoch": 0.6383658700409508, "grad_norm": 0.7475911974906921, "learning_rate": 5.788517673771469e-06, "loss": 0.2881, "step": 35355 }, { "epoch": 0.6384561494738514, "grad_norm": 0.3153039216995239, "learning_rate": 5.7859453942090605e-06, "loss": 0.3081, "step": 35360 }, { "epoch": 0.6385464289067522, "grad_norm": 0.5710461735725403, "learning_rate": 5.783373453636705e-06, "loss": 0.1815, "step": 35365 }, { "epoch": 0.6386367083396529, "grad_norm": 0.4108995199203491, "learning_rate": 5.780801852261295e-06, "loss": 0.3069, "step": 35370 }, { "epoch": 0.6387269877725537, "grad_norm": 0.7596222162246704, "learning_rate": 5.778230590289697e-06, "loss": 0.2432, "step": 35375 }, { "epoch": 0.6388172672054543, "grad_norm": 0.41312968730926514, "learning_rate": 5.775659667928755e-06, "loss": 0.1997, "step": 35380 }, { "epoch": 0.638907546638355, "grad_norm": 0.4853145480155945, "learning_rate": 5.773089085385281e-06, "loss": 0.2101, "step": 35385 }, { "epoch": 0.6389978260712558, "grad_norm": 0.7995417714118958, "learning_rate": 5.770518842866055e-06, "loss": 0.2198, "step": 35390 }, { "epoch": 0.6390881055041565, "grad_norm": 0.8485028743743896, "learning_rate": 5.767948940577841e-06, "loss": 0.1597, "step": 35395 }, { "epoch": 0.6391783849370571, "grad_norm": 0.4845674932003021, "learning_rate": 5.76537937872736e-06, "loss": 0.2332, "step": 35400 }, { "epoch": 0.6392686643699579, "grad_norm": 0.41197094321250916, "learning_rate": 5.762810157521324e-06, "loss": 0.253, "step": 35405 }, { "epoch": 0.6393589438028586, "grad_norm": 0.4559381306171417, "learning_rate": 5.760241277166399e-06, "loss": 0.2397, "step": 35410 }, { "epoch": 0.6394492232357594, "grad_norm": 0.22541190683841705, "learning_rate": 5.757672737869237e-06, "loss": 0.2344, "step": 35415 }, { "epoch": 0.63953950266866, "grad_norm": 0.3318548798561096, "learning_rate": 5.755104539836458e-06, "loss": 0.2098, "step": 35420 }, { "epoch": 0.6396297821015607, "grad_norm": 0.6875835657119751, "learning_rate": 5.7525366832746595e-06, "loss": 0.2797, "step": 35425 }, { "epoch": 0.6397200615344615, "grad_norm": 0.4598506689071655, "learning_rate": 5.749969168390399e-06, "loss": 0.2842, "step": 35430 }, { "epoch": 0.6398103409673622, "grad_norm": 0.5909374952316284, "learning_rate": 5.747401995390223e-06, "loss": 0.2285, "step": 35435 }, { "epoch": 0.6399006204002629, "grad_norm": 0.507408857345581, "learning_rate": 5.744835164480633e-06, "loss": 0.2556, "step": 35440 }, { "epoch": 0.6399908998331636, "grad_norm": 0.3172169029712677, "learning_rate": 5.742268675868116e-06, "loss": 0.2692, "step": 35445 }, { "epoch": 0.6400811792660643, "grad_norm": 0.5158324837684631, "learning_rate": 5.739702529759126e-06, "loss": 0.2005, "step": 35450 }, { "epoch": 0.6401714586989651, "grad_norm": 0.4424983263015747, "learning_rate": 5.7371367263600975e-06, "loss": 0.2113, "step": 35455 }, { "epoch": 0.6402617381318657, "grad_norm": 0.4412218928337097, "learning_rate": 5.734571265877424e-06, "loss": 0.2653, "step": 35460 }, { "epoch": 0.6403520175647665, "grad_norm": 0.3951146602630615, "learning_rate": 5.732006148517477e-06, "loss": 0.2267, "step": 35465 }, { "epoch": 0.6404422969976672, "grad_norm": 0.5053501725196838, "learning_rate": 5.729441374486609e-06, "loss": 0.3119, "step": 35470 }, { "epoch": 0.6405325764305679, "grad_norm": 0.397300124168396, "learning_rate": 5.726876943991125e-06, "loss": 0.2443, "step": 35475 }, { "epoch": 0.6406228558634686, "grad_norm": 0.59889155626297, "learning_rate": 5.724312857237324e-06, "loss": 0.262, "step": 35480 }, { "epoch": 0.6407131352963693, "grad_norm": 0.43211787939071655, "learning_rate": 5.7217491144314674e-06, "loss": 0.1797, "step": 35485 }, { "epoch": 0.64080341472927, "grad_norm": 0.5028895735740662, "learning_rate": 5.7191857157797914e-06, "loss": 0.372, "step": 35490 }, { "epoch": 0.6408936941621708, "grad_norm": 0.4395959675312042, "learning_rate": 5.716622661488497e-06, "loss": 0.266, "step": 35495 }, { "epoch": 0.6409839735950714, "grad_norm": 0.29293063282966614, "learning_rate": 5.714059951763768e-06, "loss": 0.231, "step": 35500 }, { "epoch": 0.6410742530279722, "grad_norm": 0.9143898487091064, "learning_rate": 5.711497586811754e-06, "loss": 0.1584, "step": 35505 }, { "epoch": 0.6411645324608729, "grad_norm": 0.4413289427757263, "learning_rate": 5.708935566838576e-06, "loss": 0.1866, "step": 35510 }, { "epoch": 0.6412548118937736, "grad_norm": 0.4569292962551117, "learning_rate": 5.706373892050332e-06, "loss": 0.2071, "step": 35515 }, { "epoch": 0.6413450913266743, "grad_norm": 0.42200034856796265, "learning_rate": 5.703812562653097e-06, "loss": 0.1951, "step": 35520 }, { "epoch": 0.641435370759575, "grad_norm": 0.44226059317588806, "learning_rate": 5.7012515788529e-06, "loss": 0.2704, "step": 35525 }, { "epoch": 0.6415256501924758, "grad_norm": 0.565065860748291, "learning_rate": 5.698690940855762e-06, "loss": 0.2008, "step": 35530 }, { "epoch": 0.6416159296253765, "grad_norm": 0.20949938893318176, "learning_rate": 5.6961306488676605e-06, "loss": 0.2107, "step": 35535 }, { "epoch": 0.6417062090582771, "grad_norm": 0.994368314743042, "learning_rate": 5.6935707030945595e-06, "loss": 0.2555, "step": 35540 }, { "epoch": 0.6417964884911779, "grad_norm": 0.2960260808467865, "learning_rate": 5.691011103742379e-06, "loss": 0.2338, "step": 35545 }, { "epoch": 0.6418867679240786, "grad_norm": 0.31084543466567993, "learning_rate": 5.688451851017032e-06, "loss": 0.2752, "step": 35550 }, { "epoch": 0.6419770473569794, "grad_norm": 0.46179312467575073, "learning_rate": 5.685892945124385e-06, "loss": 0.2412, "step": 35555 }, { "epoch": 0.64206732678988, "grad_norm": 0.6409241557121277, "learning_rate": 5.683334386270282e-06, "loss": 0.2783, "step": 35560 }, { "epoch": 0.6421576062227807, "grad_norm": 0.3262818455696106, "learning_rate": 5.680776174660545e-06, "loss": 0.2545, "step": 35565 }, { "epoch": 0.6422478856556815, "grad_norm": 0.27425482869148254, "learning_rate": 5.678218310500957e-06, "loss": 0.2485, "step": 35570 }, { "epoch": 0.6423381650885822, "grad_norm": 0.5403268337249756, "learning_rate": 5.675660793997284e-06, "loss": 0.263, "step": 35575 }, { "epoch": 0.6424284445214828, "grad_norm": 0.38726648688316345, "learning_rate": 5.673103625355258e-06, "loss": 0.2678, "step": 35580 }, { "epoch": 0.6425187239543836, "grad_norm": 0.4004386365413666, "learning_rate": 5.670546804780593e-06, "loss": 0.3211, "step": 35585 }, { "epoch": 0.6426090033872843, "grad_norm": 0.6058290004730225, "learning_rate": 5.667990332478952e-06, "loss": 0.1706, "step": 35590 }, { "epoch": 0.6426992828201851, "grad_norm": 0.6734848022460938, "learning_rate": 5.665434208655997e-06, "loss": 0.314, "step": 35595 }, { "epoch": 0.6427895622530857, "grad_norm": 0.4511653482913971, "learning_rate": 5.66287843351734e-06, "loss": 0.1464, "step": 35600 }, { "epoch": 0.6428798416859864, "grad_norm": 0.32747411727905273, "learning_rate": 5.660323007268585e-06, "loss": 0.195, "step": 35605 }, { "epoch": 0.6429701211188872, "grad_norm": 0.6119348406791687, "learning_rate": 5.657767930115285e-06, "loss": 0.2925, "step": 35610 }, { "epoch": 0.6430604005517879, "grad_norm": 0.4540742039680481, "learning_rate": 5.655213202262986e-06, "loss": 0.2207, "step": 35615 }, { "epoch": 0.6431506799846886, "grad_norm": 0.2162233144044876, "learning_rate": 5.652658823917194e-06, "loss": 0.1626, "step": 35620 }, { "epoch": 0.6432409594175893, "grad_norm": 0.5270794630050659, "learning_rate": 5.650104795283396e-06, "loss": 0.2382, "step": 35625 }, { "epoch": 0.64333123885049, "grad_norm": 0.35142067074775696, "learning_rate": 5.647551116567036e-06, "loss": 0.2793, "step": 35630 }, { "epoch": 0.6434215182833908, "grad_norm": 0.3540780246257782, "learning_rate": 5.644997787973548e-06, "loss": 0.2156, "step": 35635 }, { "epoch": 0.6435117977162914, "grad_norm": 0.4894404709339142, "learning_rate": 5.642444809708321e-06, "loss": 0.1685, "step": 35640 }, { "epoch": 0.6436020771491922, "grad_norm": 0.5173498392105103, "learning_rate": 5.639892181976725e-06, "loss": 0.3287, "step": 35645 }, { "epoch": 0.6436923565820929, "grad_norm": 0.39868810772895813, "learning_rate": 5.637339904984105e-06, "loss": 0.2852, "step": 35650 }, { "epoch": 0.6437826360149936, "grad_norm": 0.41706356406211853, "learning_rate": 5.634787978935767e-06, "loss": 0.1923, "step": 35655 }, { "epoch": 0.6438729154478943, "grad_norm": 0.5662165880203247, "learning_rate": 5.632236404037002e-06, "loss": 0.2293, "step": 35660 }, { "epoch": 0.643963194880795, "grad_norm": 0.6363428235054016, "learning_rate": 5.629685180493058e-06, "loss": 0.2729, "step": 35665 }, { "epoch": 0.6440534743136958, "grad_norm": 0.7945840358734131, "learning_rate": 5.62713430850917e-06, "loss": 0.3457, "step": 35670 }, { "epoch": 0.6441437537465965, "grad_norm": 0.8168322443962097, "learning_rate": 5.624583788290529e-06, "loss": 0.3418, "step": 35675 }, { "epoch": 0.6442340331794972, "grad_norm": 0.5742879509925842, "learning_rate": 5.6220336200423086e-06, "loss": 0.1267, "step": 35680 }, { "epoch": 0.6443243126123979, "grad_norm": 0.7505717277526855, "learning_rate": 5.6194838039696545e-06, "loss": 0.2254, "step": 35685 }, { "epoch": 0.6444145920452986, "grad_norm": 0.7047513723373413, "learning_rate": 5.616934340277682e-06, "loss": 0.2286, "step": 35690 }, { "epoch": 0.6445048714781993, "grad_norm": 0.304597407579422, "learning_rate": 5.6143852291714705e-06, "loss": 0.2411, "step": 35695 }, { "epoch": 0.6445951509111001, "grad_norm": 0.34877634048461914, "learning_rate": 5.611836470856083e-06, "loss": 0.2565, "step": 35700 }, { "epoch": 0.6446854303440007, "grad_norm": 0.276822566986084, "learning_rate": 5.609288065536545e-06, "loss": 0.1882, "step": 35705 }, { "epoch": 0.6447757097769015, "grad_norm": 0.40141189098358154, "learning_rate": 5.606740013417858e-06, "loss": 0.2008, "step": 35710 }, { "epoch": 0.6448659892098022, "grad_norm": 0.4635002613067627, "learning_rate": 5.604192314704993e-06, "loss": 0.2469, "step": 35715 }, { "epoch": 0.644956268642703, "grad_norm": 0.3973727524280548, "learning_rate": 5.601644969602899e-06, "loss": 0.2499, "step": 35720 }, { "epoch": 0.6450465480756036, "grad_norm": 0.29262813925743103, "learning_rate": 5.599097978316487e-06, "loss": 0.1604, "step": 35725 }, { "epoch": 0.6451368275085043, "grad_norm": 0.32524144649505615, "learning_rate": 5.596551341050648e-06, "loss": 0.207, "step": 35730 }, { "epoch": 0.6452271069414051, "grad_norm": 0.5345089435577393, "learning_rate": 5.594005058010237e-06, "loss": 0.2494, "step": 35735 }, { "epoch": 0.6453173863743058, "grad_norm": 0.4536992013454437, "learning_rate": 5.591459129400081e-06, "loss": 0.207, "step": 35740 }, { "epoch": 0.6454076658072064, "grad_norm": 0.3381335735321045, "learning_rate": 5.588913555424984e-06, "loss": 0.1777, "step": 35745 }, { "epoch": 0.6454979452401072, "grad_norm": 0.40819457173347473, "learning_rate": 5.58636833628972e-06, "loss": 0.2212, "step": 35750 }, { "epoch": 0.6455882246730079, "grad_norm": 0.2871498167514801, "learning_rate": 5.583823472199037e-06, "loss": 0.1725, "step": 35755 }, { "epoch": 0.6456785041059087, "grad_norm": 0.32077890634536743, "learning_rate": 5.581278963357644e-06, "loss": 0.1816, "step": 35760 }, { "epoch": 0.6457687835388093, "grad_norm": 0.40499499440193176, "learning_rate": 5.578734809970234e-06, "loss": 0.3163, "step": 35765 }, { "epoch": 0.64585906297171, "grad_norm": 0.7485264539718628, "learning_rate": 5.5761910122414595e-06, "loss": 0.2407, "step": 35770 }, { "epoch": 0.6459493424046108, "grad_norm": 0.5958467125892639, "learning_rate": 5.573647570375954e-06, "loss": 0.2845, "step": 35775 }, { "epoch": 0.6460396218375115, "grad_norm": 0.40125805139541626, "learning_rate": 5.571104484578318e-06, "loss": 0.1859, "step": 35780 }, { "epoch": 0.6461299012704121, "grad_norm": 0.4314284026622772, "learning_rate": 5.56856175505313e-06, "loss": 0.3109, "step": 35785 }, { "epoch": 0.6462201807033129, "grad_norm": 0.4031558334827423, "learning_rate": 5.566019382004925e-06, "loss": 0.2476, "step": 35790 }, { "epoch": 0.6463104601362136, "grad_norm": 0.6834647059440613, "learning_rate": 5.563477365638226e-06, "loss": 0.2054, "step": 35795 }, { "epoch": 0.6464007395691144, "grad_norm": 0.2620033621788025, "learning_rate": 5.560935706157512e-06, "loss": 0.2788, "step": 35800 }, { "epoch": 0.646491019002015, "grad_norm": 0.7414353489875793, "learning_rate": 5.55839440376725e-06, "loss": 0.3292, "step": 35805 }, { "epoch": 0.6465812984349157, "grad_norm": 0.2887881398200989, "learning_rate": 5.555853458671856e-06, "loss": 0.2007, "step": 35810 }, { "epoch": 0.6466715778678165, "grad_norm": 0.6315146684646606, "learning_rate": 5.553312871075748e-06, "loss": 0.318, "step": 35815 }, { "epoch": 0.6467618573007172, "grad_norm": 0.4119872450828552, "learning_rate": 5.550772641183284e-06, "loss": 0.2603, "step": 35820 }, { "epoch": 0.6468521367336179, "grad_norm": 0.4186643064022064, "learning_rate": 5.548232769198818e-06, "loss": 0.209, "step": 35825 }, { "epoch": 0.6469424161665186, "grad_norm": 0.44736814498901367, "learning_rate": 5.5456932553266565e-06, "loss": 0.3051, "step": 35830 }, { "epoch": 0.6470326955994193, "grad_norm": 0.4290599822998047, "learning_rate": 5.543154099771082e-06, "loss": 0.1853, "step": 35835 }, { "epoch": 0.6471229750323201, "grad_norm": 0.40984511375427246, "learning_rate": 5.540615302736358e-06, "loss": 0.2482, "step": 35840 }, { "epoch": 0.6472132544652207, "grad_norm": 0.4099291265010834, "learning_rate": 5.538076864426708e-06, "loss": 0.2854, "step": 35845 }, { "epoch": 0.6473035338981215, "grad_norm": 0.34876224398612976, "learning_rate": 5.535538785046338e-06, "loss": 0.2069, "step": 35850 }, { "epoch": 0.6473938133310222, "grad_norm": 0.20505110919475555, "learning_rate": 5.533001064799408e-06, "loss": 0.2722, "step": 35855 }, { "epoch": 0.6474840927639229, "grad_norm": 0.4657360911369324, "learning_rate": 5.53046370389007e-06, "loss": 0.215, "step": 35860 }, { "epoch": 0.6475743721968236, "grad_norm": 0.4829479157924652, "learning_rate": 5.527926702522425e-06, "loss": 0.2516, "step": 35865 }, { "epoch": 0.6476646516297243, "grad_norm": 0.5626475214958191, "learning_rate": 5.525390060900566e-06, "loss": 0.2351, "step": 35870 }, { "epoch": 0.647754931062625, "grad_norm": 0.4846144914627075, "learning_rate": 5.522853779228534e-06, "loss": 0.3207, "step": 35875 }, { "epoch": 0.6478452104955258, "grad_norm": 0.6386345624923706, "learning_rate": 5.520317857710374e-06, "loss": 0.2153, "step": 35880 }, { "epoch": 0.6479354899284264, "grad_norm": 0.36993545293807983, "learning_rate": 5.517782296550065e-06, "loss": 0.1596, "step": 35885 }, { "epoch": 0.6480257693613272, "grad_norm": 0.34195250272750854, "learning_rate": 5.515247095951587e-06, "loss": 0.2695, "step": 35890 }, { "epoch": 0.6481160487942279, "grad_norm": 0.3296082317829132, "learning_rate": 5.5127122561188685e-06, "loss": 0.1724, "step": 35895 }, { "epoch": 0.6482063282271286, "grad_norm": 0.5959557890892029, "learning_rate": 5.510177777255827e-06, "loss": 0.2495, "step": 35900 }, { "epoch": 0.6482966076600293, "grad_norm": 0.47018685936927795, "learning_rate": 5.507643659566335e-06, "loss": 0.1836, "step": 35905 }, { "epoch": 0.64838688709293, "grad_norm": 0.61186283826828, "learning_rate": 5.505109903254249e-06, "loss": 0.1373, "step": 35910 }, { "epoch": 0.6484771665258308, "grad_norm": 0.5220193266868591, "learning_rate": 5.50257650852339e-06, "loss": 0.2653, "step": 35915 }, { "epoch": 0.6485674459587315, "grad_norm": 0.5862143635749817, "learning_rate": 5.500043475577555e-06, "loss": 0.2105, "step": 35920 }, { "epoch": 0.6486577253916321, "grad_norm": 0.48991987109184265, "learning_rate": 5.497510804620505e-06, "loss": 0.1884, "step": 35925 }, { "epoch": 0.6487480048245329, "grad_norm": 1.1300134658813477, "learning_rate": 5.4949784958559695e-06, "loss": 0.3095, "step": 35930 }, { "epoch": 0.6488382842574336, "grad_norm": 0.5286988615989685, "learning_rate": 5.492446549487664e-06, "loss": 0.2464, "step": 35935 }, { "epoch": 0.6489285636903344, "grad_norm": 0.49857473373413086, "learning_rate": 5.489914965719253e-06, "loss": 0.3035, "step": 35940 }, { "epoch": 0.649018843123235, "grad_norm": 0.523838996887207, "learning_rate": 5.4873837447543985e-06, "loss": 0.253, "step": 35945 }, { "epoch": 0.6491091225561357, "grad_norm": 0.28994113206863403, "learning_rate": 5.484852886796709e-06, "loss": 0.2581, "step": 35950 }, { "epoch": 0.6491994019890365, "grad_norm": 0.33836638927459717, "learning_rate": 5.482322392049779e-06, "loss": 0.2047, "step": 35955 }, { "epoch": 0.6492896814219372, "grad_norm": 0.3340393006801605, "learning_rate": 5.479792260717162e-06, "loss": 0.1438, "step": 35960 }, { "epoch": 0.6493799608548378, "grad_norm": 0.3953821361064911, "learning_rate": 5.477262493002399e-06, "loss": 0.3415, "step": 35965 }, { "epoch": 0.6494702402877386, "grad_norm": 0.4580709934234619, "learning_rate": 5.474733089108979e-06, "loss": 0.2731, "step": 35970 }, { "epoch": 0.6495605197206393, "grad_norm": 0.3954589366912842, "learning_rate": 5.472204049240381e-06, "loss": 0.4242, "step": 35975 }, { "epoch": 0.6496507991535401, "grad_norm": 0.43547070026397705, "learning_rate": 5.469675373600046e-06, "loss": 0.2182, "step": 35980 }, { "epoch": 0.6497410785864407, "grad_norm": 0.6323509812355042, "learning_rate": 5.467147062391392e-06, "loss": 0.2219, "step": 35985 }, { "epoch": 0.6498313580193414, "grad_norm": 0.5227145552635193, "learning_rate": 5.464619115817798e-06, "loss": 0.1728, "step": 35990 }, { "epoch": 0.6499216374522422, "grad_norm": 0.45743557810783386, "learning_rate": 5.462091534082623e-06, "loss": 0.2532, "step": 35995 }, { "epoch": 0.6500119168851429, "grad_norm": 0.28376418352127075, "learning_rate": 5.4595643173891865e-06, "loss": 0.1404, "step": 36000 }, { "epoch": 0.6501021963180436, "grad_norm": 0.39995232224464417, "learning_rate": 5.457037465940789e-06, "loss": 0.2931, "step": 36005 }, { "epoch": 0.6501924757509443, "grad_norm": 0.36790263652801514, "learning_rate": 5.4545109799407006e-06, "loss": 0.1568, "step": 36010 }, { "epoch": 0.650282755183845, "grad_norm": 1.251679539680481, "learning_rate": 5.45198485959215e-06, "loss": 0.2481, "step": 36015 }, { "epoch": 0.6503730346167458, "grad_norm": 0.5150558352470398, "learning_rate": 5.449459105098356e-06, "loss": 0.2474, "step": 36020 }, { "epoch": 0.6504633140496464, "grad_norm": 0.3952091932296753, "learning_rate": 5.446933716662486e-06, "loss": 0.2591, "step": 36025 }, { "epoch": 0.6505535934825472, "grad_norm": 1.0100435018539429, "learning_rate": 5.4444086944877e-06, "loss": 0.2566, "step": 36030 }, { "epoch": 0.6506438729154479, "grad_norm": 0.7629332542419434, "learning_rate": 5.4418840387771056e-06, "loss": 0.2562, "step": 36035 }, { "epoch": 0.6507341523483486, "grad_norm": 0.6848556399345398, "learning_rate": 5.439359749733801e-06, "loss": 0.2854, "step": 36040 }, { "epoch": 0.6508244317812493, "grad_norm": 0.2180611938238144, "learning_rate": 5.436835827560847e-06, "loss": 0.2099, "step": 36045 }, { "epoch": 0.65091471121415, "grad_norm": 0.2651490271091461, "learning_rate": 5.434312272461275e-06, "loss": 0.2515, "step": 36050 }, { "epoch": 0.6510049906470508, "grad_norm": 0.565922737121582, "learning_rate": 5.431789084638082e-06, "loss": 0.265, "step": 36055 }, { "epoch": 0.6510952700799515, "grad_norm": 0.4796663224697113, "learning_rate": 5.429266264294246e-06, "loss": 0.1974, "step": 36060 }, { "epoch": 0.6511855495128522, "grad_norm": 0.6576713919639587, "learning_rate": 5.426743811632705e-06, "loss": 0.2221, "step": 36065 }, { "epoch": 0.6512758289457529, "grad_norm": 0.4697735011577606, "learning_rate": 5.424221726856371e-06, "loss": 0.2401, "step": 36070 }, { "epoch": 0.6513661083786536, "grad_norm": 0.441311776638031, "learning_rate": 5.421700010168132e-06, "loss": 0.1525, "step": 36075 }, { "epoch": 0.6514563878115543, "grad_norm": 0.5874056816101074, "learning_rate": 5.419178661770841e-06, "loss": 0.2494, "step": 36080 }, { "epoch": 0.6515466672444551, "grad_norm": 0.3328922390937805, "learning_rate": 5.416657681867319e-06, "loss": 0.1765, "step": 36085 }, { "epoch": 0.6516369466773557, "grad_norm": 0.33248043060302734, "learning_rate": 5.414137070660366e-06, "loss": 0.2048, "step": 36090 }, { "epoch": 0.6517272261102565, "grad_norm": 0.48412153124809265, "learning_rate": 5.411616828352739e-06, "loss": 0.327, "step": 36095 }, { "epoch": 0.6518175055431572, "grad_norm": 0.5995043516159058, "learning_rate": 5.409096955147182e-06, "loss": 0.1911, "step": 36100 }, { "epoch": 0.651907784976058, "grad_norm": 0.5780115723609924, "learning_rate": 5.4065774512463905e-06, "loss": 0.2279, "step": 36105 }, { "epoch": 0.6519980644089586, "grad_norm": 0.19956594705581665, "learning_rate": 5.404058316853047e-06, "loss": 0.2053, "step": 36110 }, { "epoch": 0.6520883438418593, "grad_norm": 0.8025631904602051, "learning_rate": 5.4015395521698e-06, "loss": 0.3269, "step": 36115 }, { "epoch": 0.6521786232747601, "grad_norm": 0.6508951783180237, "learning_rate": 5.399021157399258e-06, "loss": 0.2592, "step": 36120 }, { "epoch": 0.6522689027076608, "grad_norm": 0.4481887221336365, "learning_rate": 5.396503132744016e-06, "loss": 0.194, "step": 36125 }, { "epoch": 0.6523591821405614, "grad_norm": 0.5886951684951782, "learning_rate": 5.39398547840662e-06, "loss": 0.2577, "step": 36130 }, { "epoch": 0.6524494615734622, "grad_norm": 0.43413764238357544, "learning_rate": 5.391468194589608e-06, "loss": 0.2112, "step": 36135 }, { "epoch": 0.6525397410063629, "grad_norm": 0.5776399970054626, "learning_rate": 5.388951281495467e-06, "loss": 0.2334, "step": 36140 }, { "epoch": 0.6526300204392637, "grad_norm": 0.3230409026145935, "learning_rate": 5.386434739326676e-06, "loss": 0.188, "step": 36145 }, { "epoch": 0.6527202998721643, "grad_norm": 0.3495572507381439, "learning_rate": 5.383918568285662e-06, "loss": 0.2011, "step": 36150 }, { "epoch": 0.652810579305065, "grad_norm": 0.3622836768627167, "learning_rate": 5.381402768574841e-06, "loss": 0.1843, "step": 36155 }, { "epoch": 0.6529008587379658, "grad_norm": 0.4878521263599396, "learning_rate": 5.378887340396584e-06, "loss": 0.1661, "step": 36160 }, { "epoch": 0.6529911381708665, "grad_norm": 0.45289501547813416, "learning_rate": 5.376372283953245e-06, "loss": 0.2874, "step": 36165 }, { "epoch": 0.6530814176037671, "grad_norm": 0.29209086298942566, "learning_rate": 5.373857599447135e-06, "loss": 0.2054, "step": 36170 }, { "epoch": 0.6531716970366679, "grad_norm": 0.6697922945022583, "learning_rate": 5.371343287080546e-06, "loss": 0.217, "step": 36175 }, { "epoch": 0.6532619764695686, "grad_norm": 0.6784439086914062, "learning_rate": 5.368829347055735e-06, "loss": 0.1929, "step": 36180 }, { "epoch": 0.6533522559024694, "grad_norm": 0.7056629657745361, "learning_rate": 5.366315779574936e-06, "loss": 0.2297, "step": 36185 }, { "epoch": 0.65344253533537, "grad_norm": 0.34243327379226685, "learning_rate": 5.363802584840338e-06, "loss": 0.1655, "step": 36190 }, { "epoch": 0.6535328147682707, "grad_norm": 0.4730566740036011, "learning_rate": 5.361289763054119e-06, "loss": 0.2395, "step": 36195 }, { "epoch": 0.6536230942011715, "grad_norm": 0.34965676069259644, "learning_rate": 5.35877731441841e-06, "loss": 0.2957, "step": 36200 }, { "epoch": 0.6537133736340722, "grad_norm": 0.5952594876289368, "learning_rate": 5.356265239135313e-06, "loss": 0.2028, "step": 36205 }, { "epoch": 0.6538036530669729, "grad_norm": 0.38088926672935486, "learning_rate": 5.353753537406923e-06, "loss": 0.3096, "step": 36210 }, { "epoch": 0.6538939324998736, "grad_norm": 0.32168248295783997, "learning_rate": 5.3512422094352765e-06, "loss": 0.2515, "step": 36215 }, { "epoch": 0.6539842119327743, "grad_norm": 0.6534793376922607, "learning_rate": 5.348731255422397e-06, "loss": 0.2103, "step": 36220 }, { "epoch": 0.6540744913656751, "grad_norm": 0.2537820041179657, "learning_rate": 5.346220675570266e-06, "loss": 0.1419, "step": 36225 }, { "epoch": 0.6541647707985757, "grad_norm": 0.38548409938812256, "learning_rate": 5.34371047008085e-06, "loss": 0.2454, "step": 36230 }, { "epoch": 0.6542550502314765, "grad_norm": 0.42382970452308655, "learning_rate": 5.341200639156068e-06, "loss": 0.2954, "step": 36235 }, { "epoch": 0.6543453296643772, "grad_norm": 0.5312998294830322, "learning_rate": 5.338691182997822e-06, "loss": 0.1951, "step": 36240 }, { "epoch": 0.6544356090972779, "grad_norm": 0.6543774008750916, "learning_rate": 5.3361821018079805e-06, "loss": 0.3208, "step": 36245 }, { "epoch": 0.6545258885301786, "grad_norm": 0.6860877275466919, "learning_rate": 5.3336733957883814e-06, "loss": 0.2424, "step": 36250 }, { "epoch": 0.6546161679630793, "grad_norm": 0.4910118579864502, "learning_rate": 5.331165065140827e-06, "loss": 0.2598, "step": 36255 }, { "epoch": 0.65470644739598, "grad_norm": 0.5055651664733887, "learning_rate": 5.328657110067103e-06, "loss": 0.2381, "step": 36260 }, { "epoch": 0.6547967268288808, "grad_norm": 0.4690207540988922, "learning_rate": 5.3261495307689446e-06, "loss": 0.2088, "step": 36265 }, { "epoch": 0.6548870062617814, "grad_norm": 0.570319652557373, "learning_rate": 5.323642327448075e-06, "loss": 0.2584, "step": 36270 }, { "epoch": 0.6549772856946822, "grad_norm": 0.6898211240768433, "learning_rate": 5.321135500306179e-06, "loss": 0.168, "step": 36275 }, { "epoch": 0.6550675651275829, "grad_norm": 0.43217116594314575, "learning_rate": 5.318629049544919e-06, "loss": 0.2906, "step": 36280 }, { "epoch": 0.6551578445604836, "grad_norm": 0.37369659543037415, "learning_rate": 5.316122975365914e-06, "loss": 0.2235, "step": 36285 }, { "epoch": 0.6552481239933843, "grad_norm": 0.16432589292526245, "learning_rate": 5.313617277970756e-06, "loss": 0.1311, "step": 36290 }, { "epoch": 0.655338403426285, "grad_norm": 0.7773046493530273, "learning_rate": 5.3111119575610195e-06, "loss": 0.1963, "step": 36295 }, { "epoch": 0.6554286828591858, "grad_norm": 0.4540032744407654, "learning_rate": 5.308607014338229e-06, "loss": 0.2944, "step": 36300 }, { "epoch": 0.6555189622920865, "grad_norm": 0.4507315456867218, "learning_rate": 5.306102448503897e-06, "loss": 0.3008, "step": 36305 }, { "epoch": 0.6556092417249871, "grad_norm": 0.35809871554374695, "learning_rate": 5.303598260259494e-06, "loss": 0.2297, "step": 36310 }, { "epoch": 0.6556995211578879, "grad_norm": 0.48642030358314514, "learning_rate": 5.301094449806469e-06, "loss": 0.1915, "step": 36315 }, { "epoch": 0.6557898005907886, "grad_norm": 0.7581939101219177, "learning_rate": 5.298591017346226e-06, "loss": 0.3413, "step": 36320 }, { "epoch": 0.6558800800236894, "grad_norm": 0.5003683567047119, "learning_rate": 5.296087963080159e-06, "loss": 0.2669, "step": 36325 }, { "epoch": 0.65597035945659, "grad_norm": 0.3249092698097229, "learning_rate": 5.293585287209609e-06, "loss": 0.2155, "step": 36330 }, { "epoch": 0.6560606388894907, "grad_norm": 0.5210465788841248, "learning_rate": 5.291082989935904e-06, "loss": 0.1858, "step": 36335 }, { "epoch": 0.6561509183223915, "grad_norm": 0.3324693739414215, "learning_rate": 5.288581071460335e-06, "loss": 0.2807, "step": 36340 }, { "epoch": 0.6562411977552922, "grad_norm": 0.44445836544036865, "learning_rate": 5.28607953198417e-06, "loss": 0.1725, "step": 36345 }, { "epoch": 0.6563314771881928, "grad_norm": 0.5412079691886902, "learning_rate": 5.283578371708626e-06, "loss": 0.2055, "step": 36350 }, { "epoch": 0.6564217566210936, "grad_norm": 0.48812752962112427, "learning_rate": 5.281077590834916e-06, "loss": 0.2257, "step": 36355 }, { "epoch": 0.6565120360539943, "grad_norm": 0.31539368629455566, "learning_rate": 5.278577189564199e-06, "loss": 0.2113, "step": 36360 }, { "epoch": 0.6566023154868951, "grad_norm": 0.7528833150863647, "learning_rate": 5.276077168097624e-06, "loss": 0.29, "step": 36365 }, { "epoch": 0.6566925949197957, "grad_norm": 0.3404152989387512, "learning_rate": 5.27357752663629e-06, "loss": 0.2261, "step": 36370 }, { "epoch": 0.6567828743526964, "grad_norm": 0.40433064103126526, "learning_rate": 5.27107826538128e-06, "loss": 0.2948, "step": 36375 }, { "epoch": 0.6568731537855972, "grad_norm": 0.38003936409950256, "learning_rate": 5.268579384533644e-06, "loss": 0.242, "step": 36380 }, { "epoch": 0.6569634332184979, "grad_norm": 0.35246750712394714, "learning_rate": 5.266080884294392e-06, "loss": 0.2454, "step": 36385 }, { "epoch": 0.6570537126513986, "grad_norm": 0.5807691216468811, "learning_rate": 5.263582764864519e-06, "loss": 0.3307, "step": 36390 }, { "epoch": 0.6571439920842993, "grad_norm": 0.402802437543869, "learning_rate": 5.26108502644497e-06, "loss": 0.2739, "step": 36395 }, { "epoch": 0.6572342715172, "grad_norm": 0.5744020342826843, "learning_rate": 5.258587669236676e-06, "loss": 0.2833, "step": 36400 }, { "epoch": 0.6573245509501008, "grad_norm": 0.3158230185508728, "learning_rate": 5.256090693440532e-06, "loss": 0.1906, "step": 36405 }, { "epoch": 0.6574148303830014, "grad_norm": 0.48997870087623596, "learning_rate": 5.253594099257402e-06, "loss": 0.2397, "step": 36410 }, { "epoch": 0.6575051098159022, "grad_norm": 0.3089967966079712, "learning_rate": 5.251097886888115e-06, "loss": 0.2195, "step": 36415 }, { "epoch": 0.6575953892488029, "grad_norm": 0.4702431559562683, "learning_rate": 5.248602056533478e-06, "loss": 0.2356, "step": 36420 }, { "epoch": 0.6576856686817036, "grad_norm": 0.48032915592193604, "learning_rate": 5.246106608394255e-06, "loss": 0.138, "step": 36425 }, { "epoch": 0.6577759481146043, "grad_norm": 0.3023422360420227, "learning_rate": 5.243611542671196e-06, "loss": 0.225, "step": 36430 }, { "epoch": 0.657866227547505, "grad_norm": 0.5493566989898682, "learning_rate": 5.241116859565002e-06, "loss": 0.1258, "step": 36435 }, { "epoch": 0.6579565069804058, "grad_norm": 0.4261058568954468, "learning_rate": 5.238622559276356e-06, "loss": 0.2396, "step": 36440 }, { "epoch": 0.6580467864133065, "grad_norm": 0.473242849111557, "learning_rate": 5.2361286420059064e-06, "loss": 0.1612, "step": 36445 }, { "epoch": 0.6581370658462071, "grad_norm": 0.3436768651008606, "learning_rate": 5.2336351079542755e-06, "loss": 0.221, "step": 36450 }, { "epoch": 0.6582273452791079, "grad_norm": 0.7097442150115967, "learning_rate": 5.231141957322041e-06, "loss": 0.1859, "step": 36455 }, { "epoch": 0.6583176247120086, "grad_norm": 0.5997614860534668, "learning_rate": 5.228649190309767e-06, "loss": 0.2897, "step": 36460 }, { "epoch": 0.6584079041449094, "grad_norm": 0.5260922908782959, "learning_rate": 5.226156807117971e-06, "loss": 0.2347, "step": 36465 }, { "epoch": 0.6584981835778101, "grad_norm": 0.23415659368038177, "learning_rate": 5.223664807947151e-06, "loss": 0.1616, "step": 36470 }, { "epoch": 0.6585884630107107, "grad_norm": 0.567236065864563, "learning_rate": 5.221173192997773e-06, "loss": 0.2294, "step": 36475 }, { "epoch": 0.6586787424436115, "grad_norm": 0.6114963293075562, "learning_rate": 5.218681962470263e-06, "loss": 0.1269, "step": 36480 }, { "epoch": 0.6587690218765122, "grad_norm": 0.7257540822029114, "learning_rate": 5.216191116565028e-06, "loss": 0.2045, "step": 36485 }, { "epoch": 0.658859301309413, "grad_norm": 0.5209638476371765, "learning_rate": 5.2137006554824345e-06, "loss": 0.26, "step": 36490 }, { "epoch": 0.6589495807423136, "grad_norm": 0.8320066332817078, "learning_rate": 5.211210579422825e-06, "loss": 0.2026, "step": 36495 }, { "epoch": 0.6590398601752143, "grad_norm": 0.4539569318294525, "learning_rate": 5.2087208885865036e-06, "loss": 0.3083, "step": 36500 }, { "epoch": 0.6591301396081151, "grad_norm": 0.22275879979133606, "learning_rate": 5.206231583173751e-06, "loss": 0.1345, "step": 36505 }, { "epoch": 0.6592204190410158, "grad_norm": 0.5147002339363098, "learning_rate": 5.203742663384813e-06, "loss": 0.1485, "step": 36510 }, { "epoch": 0.6593106984739164, "grad_norm": 0.507576048374176, "learning_rate": 5.20125412941991e-06, "loss": 0.1761, "step": 36515 }, { "epoch": 0.6594009779068172, "grad_norm": 0.40669941902160645, "learning_rate": 5.198765981479217e-06, "loss": 0.2278, "step": 36520 }, { "epoch": 0.6594912573397179, "grad_norm": 0.33739709854125977, "learning_rate": 5.1962782197628984e-06, "loss": 0.1623, "step": 36525 }, { "epoch": 0.6595815367726187, "grad_norm": 0.33824312686920166, "learning_rate": 5.193790844471065e-06, "loss": 0.3332, "step": 36530 }, { "epoch": 0.6596718162055193, "grad_norm": 0.5075321197509766, "learning_rate": 5.191303855803815e-06, "loss": 0.1877, "step": 36535 }, { "epoch": 0.65976209563842, "grad_norm": 0.3887553811073303, "learning_rate": 5.1888172539612045e-06, "loss": 0.2291, "step": 36540 }, { "epoch": 0.6598523750713208, "grad_norm": 0.35546180605888367, "learning_rate": 5.186331039143272e-06, "loss": 0.1708, "step": 36545 }, { "epoch": 0.6599426545042215, "grad_norm": 0.3968889117240906, "learning_rate": 5.183845211550004e-06, "loss": 0.3045, "step": 36550 }, { "epoch": 0.6600329339371221, "grad_norm": 0.2924818694591522, "learning_rate": 5.181359771381376e-06, "loss": 0.1616, "step": 36555 }, { "epoch": 0.6601232133700229, "grad_norm": 0.45263779163360596, "learning_rate": 5.178874718837318e-06, "loss": 0.2787, "step": 36560 }, { "epoch": 0.6602134928029236, "grad_norm": 0.4006568491458893, "learning_rate": 5.176390054117732e-06, "loss": 0.214, "step": 36565 }, { "epoch": 0.6603037722358244, "grad_norm": 0.30012068152427673, "learning_rate": 5.1739057774224965e-06, "loss": 0.1781, "step": 36570 }, { "epoch": 0.660394051668725, "grad_norm": 0.5558052659034729, "learning_rate": 5.1714218889514526e-06, "loss": 0.156, "step": 36575 }, { "epoch": 0.6604843311016257, "grad_norm": 0.4274640381336212, "learning_rate": 5.168938388904413e-06, "loss": 0.1808, "step": 36580 }, { "epoch": 0.6605746105345265, "grad_norm": 0.5439524054527283, "learning_rate": 5.166455277481153e-06, "loss": 0.3033, "step": 36585 }, { "epoch": 0.6606648899674272, "grad_norm": 0.4050881862640381, "learning_rate": 5.163972554881424e-06, "loss": 0.1802, "step": 36590 }, { "epoch": 0.6607551694003279, "grad_norm": 0.4661766588687897, "learning_rate": 5.161490221304939e-06, "loss": 0.1279, "step": 36595 }, { "epoch": 0.6608454488332286, "grad_norm": 0.5030481815338135, "learning_rate": 5.159008276951386e-06, "loss": 0.1636, "step": 36600 }, { "epoch": 0.6609357282661293, "grad_norm": 0.22832004725933075, "learning_rate": 5.156526722020419e-06, "loss": 0.2206, "step": 36605 }, { "epoch": 0.6610260076990301, "grad_norm": 0.3879418969154358, "learning_rate": 5.154045556711667e-06, "loss": 0.2062, "step": 36610 }, { "epoch": 0.6611162871319307, "grad_norm": 0.6533599495887756, "learning_rate": 5.151564781224712e-06, "loss": 0.2051, "step": 36615 }, { "epoch": 0.6612065665648315, "grad_norm": 0.6125730872154236, "learning_rate": 5.149084395759123e-06, "loss": 0.1912, "step": 36620 }, { "epoch": 0.6612968459977322, "grad_norm": 0.308635413646698, "learning_rate": 5.146604400514421e-06, "loss": 0.2386, "step": 36625 }, { "epoch": 0.6613871254306329, "grad_norm": 0.36153319478034973, "learning_rate": 5.144124795690111e-06, "loss": 0.2323, "step": 36630 }, { "epoch": 0.6614774048635336, "grad_norm": 0.3307132124900818, "learning_rate": 5.1416455814856515e-06, "loss": 0.1951, "step": 36635 }, { "epoch": 0.6615676842964343, "grad_norm": 0.4885282814502716, "learning_rate": 5.139166758100481e-06, "loss": 0.1321, "step": 36640 }, { "epoch": 0.661657963729335, "grad_norm": 0.5977446436882019, "learning_rate": 5.136688325734004e-06, "loss": 0.2655, "step": 36645 }, { "epoch": 0.6617482431622358, "grad_norm": 0.3410941958427429, "learning_rate": 5.134210284585595e-06, "loss": 0.1885, "step": 36650 }, { "epoch": 0.6618385225951364, "grad_norm": 0.4518417418003082, "learning_rate": 5.131732634854592e-06, "loss": 0.2936, "step": 36655 }, { "epoch": 0.6619288020280372, "grad_norm": 0.4261515736579895, "learning_rate": 5.129255376740297e-06, "loss": 0.1962, "step": 36660 }, { "epoch": 0.6620190814609379, "grad_norm": 0.5102550387382507, "learning_rate": 5.1267785104419945e-06, "loss": 0.1593, "step": 36665 }, { "epoch": 0.6621093608938386, "grad_norm": 0.46885019540786743, "learning_rate": 5.124302036158928e-06, "loss": 0.3083, "step": 36670 }, { "epoch": 0.6621996403267393, "grad_norm": 0.5711917877197266, "learning_rate": 5.121825954090317e-06, "loss": 0.2011, "step": 36675 }, { "epoch": 0.66228991975964, "grad_norm": 0.38981893658638, "learning_rate": 5.1193502644353365e-06, "loss": 0.187, "step": 36680 }, { "epoch": 0.6623801991925408, "grad_norm": 0.16882199048995972, "learning_rate": 5.1168749673931465e-06, "loss": 0.1494, "step": 36685 }, { "epoch": 0.6624704786254415, "grad_norm": 0.4587150514125824, "learning_rate": 5.114400063162857e-06, "loss": 0.2764, "step": 36690 }, { "epoch": 0.6625607580583421, "grad_norm": 0.39484915137290955, "learning_rate": 5.111925551943564e-06, "loss": 0.1908, "step": 36695 }, { "epoch": 0.6626510374912429, "grad_norm": 0.47309184074401855, "learning_rate": 5.1094514339343174e-06, "loss": 0.2775, "step": 36700 }, { "epoch": 0.6627413169241436, "grad_norm": 0.4850340187549591, "learning_rate": 5.106977709334144e-06, "loss": 0.1514, "step": 36705 }, { "epoch": 0.6628315963570444, "grad_norm": 0.3185379207134247, "learning_rate": 5.104504378342039e-06, "loss": 0.2157, "step": 36710 }, { "epoch": 0.662921875789945, "grad_norm": 0.4683305025100708, "learning_rate": 5.1020314411569675e-06, "loss": 0.2074, "step": 36715 }, { "epoch": 0.6630121552228457, "grad_norm": 0.4003462493419647, "learning_rate": 5.099558897977851e-06, "loss": 0.262, "step": 36720 }, { "epoch": 0.6631024346557465, "grad_norm": 0.49980586767196655, "learning_rate": 5.097086749003595e-06, "loss": 0.3328, "step": 36725 }, { "epoch": 0.6631927140886472, "grad_norm": 0.48928263783454895, "learning_rate": 5.094614994433058e-06, "loss": 0.1817, "step": 36730 }, { "epoch": 0.6632829935215478, "grad_norm": 0.25729429721832275, "learning_rate": 5.09214363446508e-06, "loss": 0.252, "step": 36735 }, { "epoch": 0.6633732729544486, "grad_norm": 0.49312248826026917, "learning_rate": 5.089672669298462e-06, "loss": 0.1915, "step": 36740 }, { "epoch": 0.6634635523873493, "grad_norm": 0.49311479926109314, "learning_rate": 5.0872020991319816e-06, "loss": 0.2368, "step": 36745 }, { "epoch": 0.6635538318202501, "grad_norm": 0.4821045398712158, "learning_rate": 5.084731924164373e-06, "loss": 0.2603, "step": 36750 }, { "epoch": 0.6636441112531507, "grad_norm": 0.47183868288993835, "learning_rate": 5.082262144594338e-06, "loss": 0.1998, "step": 36755 }, { "epoch": 0.6637343906860514, "grad_norm": 0.6003633737564087, "learning_rate": 5.079792760620562e-06, "loss": 0.2824, "step": 36760 }, { "epoch": 0.6638246701189522, "grad_norm": 0.5717042088508606, "learning_rate": 5.077323772441683e-06, "loss": 0.2828, "step": 36765 }, { "epoch": 0.6639149495518529, "grad_norm": 0.7113093733787537, "learning_rate": 5.074855180256313e-06, "loss": 0.246, "step": 36770 }, { "epoch": 0.6640052289847536, "grad_norm": 0.4163897931575775, "learning_rate": 5.072386984263036e-06, "loss": 0.1641, "step": 36775 }, { "epoch": 0.6640955084176543, "grad_norm": 0.5821138620376587, "learning_rate": 5.069919184660402e-06, "loss": 0.2837, "step": 36780 }, { "epoch": 0.664185787850555, "grad_norm": 0.2605414092540741, "learning_rate": 5.067451781646922e-06, "loss": 0.1363, "step": 36785 }, { "epoch": 0.6642760672834558, "grad_norm": 0.34480786323547363, "learning_rate": 5.064984775421085e-06, "loss": 0.2066, "step": 36790 }, { "epoch": 0.6643663467163564, "grad_norm": 0.5099565982818604, "learning_rate": 5.062518166181338e-06, "loss": 0.1796, "step": 36795 }, { "epoch": 0.6644566261492572, "grad_norm": 0.3806185722351074, "learning_rate": 5.060051954126106e-06, "loss": 0.2543, "step": 36800 }, { "epoch": 0.6645469055821579, "grad_norm": 0.47018367052078247, "learning_rate": 5.057586139453779e-06, "loss": 0.2296, "step": 36805 }, { "epoch": 0.6646371850150586, "grad_norm": 0.5153882503509521, "learning_rate": 5.0551207223627144e-06, "loss": 0.2, "step": 36810 }, { "epoch": 0.6647274644479593, "grad_norm": 0.8088284134864807, "learning_rate": 5.052655703051231e-06, "loss": 0.2412, "step": 36815 }, { "epoch": 0.66481774388086, "grad_norm": 0.5115001797676086, "learning_rate": 5.0501910817176305e-06, "loss": 0.2293, "step": 36820 }, { "epoch": 0.6649080233137608, "grad_norm": 0.5519483089447021, "learning_rate": 5.047726858560165e-06, "loss": 0.2607, "step": 36825 }, { "epoch": 0.6649983027466615, "grad_norm": 0.4139096736907959, "learning_rate": 5.04526303377707e-06, "loss": 0.223, "step": 36830 }, { "epoch": 0.6650885821795621, "grad_norm": 0.2258121818304062, "learning_rate": 5.0427996075665365e-06, "loss": 0.152, "step": 36835 }, { "epoch": 0.6651788616124629, "grad_norm": 0.19573138654232025, "learning_rate": 5.040336580126732e-06, "loss": 0.1931, "step": 36840 }, { "epoch": 0.6652691410453636, "grad_norm": 0.4620254337787628, "learning_rate": 5.037873951655794e-06, "loss": 0.1999, "step": 36845 }, { "epoch": 0.6653594204782644, "grad_norm": 0.7757815718650818, "learning_rate": 5.035411722351813e-06, "loss": 0.1894, "step": 36850 }, { "epoch": 0.6654496999111651, "grad_norm": 0.882802426815033, "learning_rate": 5.032949892412868e-06, "loss": 0.2849, "step": 36855 }, { "epoch": 0.6655399793440657, "grad_norm": 0.3408997356891632, "learning_rate": 5.030488462036987e-06, "loss": 0.2316, "step": 36860 }, { "epoch": 0.6656302587769665, "grad_norm": 0.5360017418861389, "learning_rate": 5.028027431422176e-06, "loss": 0.2421, "step": 36865 }, { "epoch": 0.6657205382098672, "grad_norm": 0.5048326253890991, "learning_rate": 5.025566800766409e-06, "loss": 0.3436, "step": 36870 }, { "epoch": 0.665810817642768, "grad_norm": 0.3691485822200775, "learning_rate": 5.023106570267627e-06, "loss": 0.1521, "step": 36875 }, { "epoch": 0.6659010970756686, "grad_norm": 0.24679838120937347, "learning_rate": 5.020646740123735e-06, "loss": 0.3498, "step": 36880 }, { "epoch": 0.6659913765085693, "grad_norm": 0.23643231391906738, "learning_rate": 5.018187310532611e-06, "loss": 0.2223, "step": 36885 }, { "epoch": 0.6660816559414701, "grad_norm": 0.399018794298172, "learning_rate": 5.015728281692094e-06, "loss": 0.2203, "step": 36890 }, { "epoch": 0.6661719353743708, "grad_norm": 0.4639451801776886, "learning_rate": 5.013269653800001e-06, "loss": 0.2392, "step": 36895 }, { "epoch": 0.6662622148072714, "grad_norm": 0.4985400140285492, "learning_rate": 5.0108114270541055e-06, "loss": 0.1535, "step": 36900 }, { "epoch": 0.6663524942401722, "grad_norm": 0.44081366062164307, "learning_rate": 5.008353601652154e-06, "loss": 0.2293, "step": 36905 }, { "epoch": 0.6664427736730729, "grad_norm": 0.3351982831954956, "learning_rate": 5.005896177791862e-06, "loss": 0.2259, "step": 36910 }, { "epoch": 0.6665330531059737, "grad_norm": 0.27997368574142456, "learning_rate": 5.003439155670916e-06, "loss": 0.1895, "step": 36915 }, { "epoch": 0.6666233325388743, "grad_norm": 0.607708752155304, "learning_rate": 5.000982535486958e-06, "loss": 0.296, "step": 36920 }, { "epoch": 0.666713611971775, "grad_norm": 0.30011940002441406, "learning_rate": 4.998526317437612e-06, "loss": 0.2256, "step": 36925 }, { "epoch": 0.6668038914046758, "grad_norm": 0.44001007080078125, "learning_rate": 4.996070501720456e-06, "loss": 0.2381, "step": 36930 }, { "epoch": 0.6668941708375765, "grad_norm": 0.43888503313064575, "learning_rate": 4.993615088533046e-06, "loss": 0.1804, "step": 36935 }, { "epoch": 0.6669844502704771, "grad_norm": 0.4655245542526245, "learning_rate": 4.991160078072907e-06, "loss": 0.2559, "step": 36940 }, { "epoch": 0.6670747297033779, "grad_norm": 1.812050223350525, "learning_rate": 4.988705470537516e-06, "loss": 0.2734, "step": 36945 }, { "epoch": 0.6671650091362786, "grad_norm": 0.44596067070961, "learning_rate": 4.9862512661243386e-06, "loss": 0.3315, "step": 36950 }, { "epoch": 0.6672552885691794, "grad_norm": 0.5072383284568787, "learning_rate": 4.9837974650307905e-06, "loss": 0.1918, "step": 36955 }, { "epoch": 0.66734556800208, "grad_norm": 0.30700528621673584, "learning_rate": 4.981344067454269e-06, "loss": 0.2694, "step": 36960 }, { "epoch": 0.6674358474349807, "grad_norm": 0.4336912930011749, "learning_rate": 4.9788910735921235e-06, "loss": 0.2399, "step": 36965 }, { "epoch": 0.6675261268678815, "grad_norm": 0.4809855818748474, "learning_rate": 4.9764384836416845e-06, "loss": 0.2984, "step": 36970 }, { "epoch": 0.6676164063007822, "grad_norm": 0.597041130065918, "learning_rate": 4.973986297800245e-06, "loss": 0.2233, "step": 36975 }, { "epoch": 0.6677066857336829, "grad_norm": 0.49554866552352905, "learning_rate": 4.971534516265069e-06, "loss": 0.2486, "step": 36980 }, { "epoch": 0.6677969651665836, "grad_norm": 0.4046391248703003, "learning_rate": 4.969083139233376e-06, "loss": 0.2427, "step": 36985 }, { "epoch": 0.6678872445994843, "grad_norm": 0.3640994429588318, "learning_rate": 4.966632166902371e-06, "loss": 0.2132, "step": 36990 }, { "epoch": 0.6679775240323851, "grad_norm": 0.5942362546920776, "learning_rate": 4.964181599469208e-06, "loss": 0.1826, "step": 36995 }, { "epoch": 0.6680678034652857, "grad_norm": 0.45490118861198425, "learning_rate": 4.96173143713102e-06, "loss": 0.2304, "step": 37000 }, { "epoch": 0.6681580828981865, "grad_norm": 0.8721827864646912, "learning_rate": 4.959281680084907e-06, "loss": 0.1934, "step": 37005 }, { "epoch": 0.6682483623310872, "grad_norm": 0.6914052367210388, "learning_rate": 4.956832328527939e-06, "loss": 0.2042, "step": 37010 }, { "epoch": 0.6683386417639879, "grad_norm": 0.6214689612388611, "learning_rate": 4.954383382657136e-06, "loss": 0.3038, "step": 37015 }, { "epoch": 0.6684289211968886, "grad_norm": 0.31522366404533386, "learning_rate": 4.95193484266951e-06, "loss": 0.2341, "step": 37020 }, { "epoch": 0.6685192006297893, "grad_norm": 0.590651273727417, "learning_rate": 4.9494867087620225e-06, "loss": 0.1615, "step": 37025 }, { "epoch": 0.66860948006269, "grad_norm": 0.20999670028686523, "learning_rate": 4.947038981131605e-06, "loss": 0.1288, "step": 37030 }, { "epoch": 0.6686997594955908, "grad_norm": 0.4950847923755646, "learning_rate": 4.944591659975163e-06, "loss": 0.2576, "step": 37035 }, { "epoch": 0.6687900389284914, "grad_norm": 0.5933647751808167, "learning_rate": 4.942144745489565e-06, "loss": 0.263, "step": 37040 }, { "epoch": 0.6688803183613922, "grad_norm": 1.0284976959228516, "learning_rate": 4.9396982378716526e-06, "loss": 0.2176, "step": 37045 }, { "epoch": 0.6689705977942929, "grad_norm": 0.3346775770187378, "learning_rate": 4.9372521373182214e-06, "loss": 0.2331, "step": 37050 }, { "epoch": 0.6690608772271937, "grad_norm": 0.4718320071697235, "learning_rate": 4.934806444026049e-06, "loss": 0.2758, "step": 37055 }, { "epoch": 0.6691511566600943, "grad_norm": 0.4274367690086365, "learning_rate": 4.932361158191868e-06, "loss": 0.1495, "step": 37060 }, { "epoch": 0.669241436092995, "grad_norm": 0.4431329369544983, "learning_rate": 4.929916280012384e-06, "loss": 0.2516, "step": 37065 }, { "epoch": 0.6693317155258958, "grad_norm": 0.46292510628700256, "learning_rate": 4.927471809684274e-06, "loss": 0.3105, "step": 37070 }, { "epoch": 0.6694219949587965, "grad_norm": 0.3133583068847656, "learning_rate": 4.925027747404181e-06, "loss": 0.1601, "step": 37075 }, { "epoch": 0.6695122743916971, "grad_norm": 0.4823862910270691, "learning_rate": 4.922584093368702e-06, "loss": 0.2668, "step": 37080 }, { "epoch": 0.6696025538245979, "grad_norm": 0.44316956400871277, "learning_rate": 4.9201408477744195e-06, "loss": 0.2384, "step": 37085 }, { "epoch": 0.6696928332574986, "grad_norm": 0.4984242022037506, "learning_rate": 4.9176980108178685e-06, "loss": 0.2973, "step": 37090 }, { "epoch": 0.6697831126903994, "grad_norm": 0.3840685784816742, "learning_rate": 4.915255582695565e-06, "loss": 0.2633, "step": 37095 }, { "epoch": 0.6698733921233, "grad_norm": 0.6775936484336853, "learning_rate": 4.912813563603975e-06, "loss": 0.2415, "step": 37100 }, { "epoch": 0.6699636715562007, "grad_norm": 0.2997501492500305, "learning_rate": 4.910371953739548e-06, "loss": 0.1747, "step": 37105 }, { "epoch": 0.6700539509891015, "grad_norm": 0.7513289451599121, "learning_rate": 4.907930753298691e-06, "loss": 0.2323, "step": 37110 }, { "epoch": 0.6701442304220022, "grad_norm": 0.5835806131362915, "learning_rate": 4.905489962477786e-06, "loss": 0.2219, "step": 37115 }, { "epoch": 0.6702345098549028, "grad_norm": 0.3198319375514984, "learning_rate": 4.903049581473173e-06, "loss": 0.1908, "step": 37120 }, { "epoch": 0.6703247892878036, "grad_norm": 0.33636313676834106, "learning_rate": 4.90060961048116e-06, "loss": 0.182, "step": 37125 }, { "epoch": 0.6704150687207043, "grad_norm": 0.4486347734928131, "learning_rate": 4.8981700496980255e-06, "loss": 0.2391, "step": 37130 }, { "epoch": 0.6705053481536051, "grad_norm": 0.2821575701236725, "learning_rate": 4.895730899320018e-06, "loss": 0.203, "step": 37135 }, { "epoch": 0.6705956275865057, "grad_norm": 0.2851160764694214, "learning_rate": 4.8932921595433516e-06, "loss": 0.1512, "step": 37140 }, { "epoch": 0.6706859070194064, "grad_norm": 0.6033151149749756, "learning_rate": 4.890853830564198e-06, "loss": 0.3092, "step": 37145 }, { "epoch": 0.6707761864523072, "grad_norm": 0.23315021395683289, "learning_rate": 4.8884159125787105e-06, "loss": 0.1523, "step": 37150 }, { "epoch": 0.6708664658852079, "grad_norm": 0.2469920516014099, "learning_rate": 4.885978405782994e-06, "loss": 0.2195, "step": 37155 }, { "epoch": 0.6709567453181086, "grad_norm": 0.9692341685295105, "learning_rate": 4.883541310373136e-06, "loss": 0.2526, "step": 37160 }, { "epoch": 0.6710470247510093, "grad_norm": 0.5049905776977539, "learning_rate": 4.881104626545175e-06, "loss": 0.153, "step": 37165 }, { "epoch": 0.67113730418391, "grad_norm": 0.428270548582077, "learning_rate": 4.8786683544951295e-06, "loss": 0.2615, "step": 37170 }, { "epoch": 0.6712275836168108, "grad_norm": 0.439714640378952, "learning_rate": 4.876232494418978e-06, "loss": 0.1528, "step": 37175 }, { "epoch": 0.6713178630497114, "grad_norm": 0.42459869384765625, "learning_rate": 4.873797046512675e-06, "loss": 0.2203, "step": 37180 }, { "epoch": 0.6714081424826122, "grad_norm": 0.5259662866592407, "learning_rate": 4.871362010972124e-06, "loss": 0.2435, "step": 37185 }, { "epoch": 0.6714984219155129, "grad_norm": 0.45902788639068604, "learning_rate": 4.8689273879932135e-06, "loss": 0.2823, "step": 37190 }, { "epoch": 0.6715887013484136, "grad_norm": 0.6105425357818604, "learning_rate": 4.866493177771784e-06, "loss": 0.289, "step": 37195 }, { "epoch": 0.6716789807813143, "grad_norm": 0.9166700839996338, "learning_rate": 4.864059380503655e-06, "loss": 0.2488, "step": 37200 }, { "epoch": 0.671769260214215, "grad_norm": 0.5581538677215576, "learning_rate": 4.861625996384611e-06, "loss": 0.3423, "step": 37205 }, { "epoch": 0.6718595396471158, "grad_norm": 0.16273179650306702, "learning_rate": 4.859193025610391e-06, "loss": 0.1914, "step": 37210 }, { "epoch": 0.6719498190800165, "grad_norm": 0.5495330691337585, "learning_rate": 4.856760468376719e-06, "loss": 0.2922, "step": 37215 }, { "epoch": 0.6720400985129171, "grad_norm": 0.4891622066497803, "learning_rate": 4.854328324879269e-06, "loss": 0.2126, "step": 37220 }, { "epoch": 0.6721303779458179, "grad_norm": 0.3623976409435272, "learning_rate": 4.851896595313697e-06, "loss": 0.1501, "step": 37225 }, { "epoch": 0.6722206573787186, "grad_norm": 0.6092562675476074, "learning_rate": 4.849465279875608e-06, "loss": 0.2624, "step": 37230 }, { "epoch": 0.6723109368116194, "grad_norm": 0.43685096502304077, "learning_rate": 4.84703437876059e-06, "loss": 0.2445, "step": 37235 }, { "epoch": 0.67240121624452, "grad_norm": 0.3236982524394989, "learning_rate": 4.844603892164192e-06, "loss": 0.287, "step": 37240 }, { "epoch": 0.6724914956774207, "grad_norm": 0.6211898326873779, "learning_rate": 4.842173820281929e-06, "loss": 0.1967, "step": 37245 }, { "epoch": 0.6725817751103215, "grad_norm": 0.35335037112236023, "learning_rate": 4.839744163309279e-06, "loss": 0.1927, "step": 37250 }, { "epoch": 0.6726720545432222, "grad_norm": 3.5796072483062744, "learning_rate": 4.837314921441695e-06, "loss": 0.3433, "step": 37255 }, { "epoch": 0.672762333976123, "grad_norm": 0.5667800903320312, "learning_rate": 4.834886094874587e-06, "loss": 0.1933, "step": 37260 }, { "epoch": 0.6728526134090236, "grad_norm": 0.238250270485878, "learning_rate": 4.832457683803337e-06, "loss": 0.2244, "step": 37265 }, { "epoch": 0.6729428928419243, "grad_norm": 0.9651192426681519, "learning_rate": 4.830029688423296e-06, "loss": 0.2784, "step": 37270 }, { "epoch": 0.6730331722748251, "grad_norm": 0.270931214094162, "learning_rate": 4.827602108929779e-06, "loss": 0.2955, "step": 37275 }, { "epoch": 0.6731234517077258, "grad_norm": 0.4074895977973938, "learning_rate": 4.825174945518064e-06, "loss": 0.306, "step": 37280 }, { "epoch": 0.6732137311406264, "grad_norm": 0.22945667803287506, "learning_rate": 4.822748198383402e-06, "loss": 0.2048, "step": 37285 }, { "epoch": 0.6733040105735272, "grad_norm": 0.7907798886299133, "learning_rate": 4.820321867721003e-06, "loss": 0.2558, "step": 37290 }, { "epoch": 0.6733942900064279, "grad_norm": 0.5409635901451111, "learning_rate": 4.817895953726053e-06, "loss": 0.271, "step": 37295 }, { "epoch": 0.6734845694393287, "grad_norm": 0.6511736512184143, "learning_rate": 4.815470456593692e-06, "loss": 0.3073, "step": 37300 }, { "epoch": 0.6735748488722293, "grad_norm": 0.560627281665802, "learning_rate": 4.813045376519038e-06, "loss": 0.2054, "step": 37305 }, { "epoch": 0.67366512830513, "grad_norm": 0.4113266170024872, "learning_rate": 4.810620713697173e-06, "loss": 0.1989, "step": 37310 }, { "epoch": 0.6737554077380308, "grad_norm": 0.5025131106376648, "learning_rate": 4.808196468323138e-06, "loss": 0.3788, "step": 37315 }, { "epoch": 0.6738456871709315, "grad_norm": 0.47875532507896423, "learning_rate": 4.805772640591954e-06, "loss": 0.2544, "step": 37320 }, { "epoch": 0.6739359666038321, "grad_norm": 0.6549875736236572, "learning_rate": 4.803349230698591e-06, "loss": 0.2717, "step": 37325 }, { "epoch": 0.6740262460367329, "grad_norm": 0.4416073262691498, "learning_rate": 4.800926238837998e-06, "loss": 0.2037, "step": 37330 }, { "epoch": 0.6741165254696336, "grad_norm": 0.17586322128772736, "learning_rate": 4.7985036652050895e-06, "loss": 0.2117, "step": 37335 }, { "epoch": 0.6742068049025344, "grad_norm": 0.49328237771987915, "learning_rate": 4.7960815099947465e-06, "loss": 0.1992, "step": 37340 }, { "epoch": 0.674297084335435, "grad_norm": 0.17381010949611664, "learning_rate": 4.793659773401805e-06, "loss": 0.2372, "step": 37345 }, { "epoch": 0.6743873637683357, "grad_norm": 0.2661042809486389, "learning_rate": 4.7912384556210854e-06, "loss": 0.2309, "step": 37350 }, { "epoch": 0.6744776432012365, "grad_norm": 0.3867124021053314, "learning_rate": 4.788817556847357e-06, "loss": 0.2342, "step": 37355 }, { "epoch": 0.6745679226341372, "grad_norm": 0.4215829074382782, "learning_rate": 4.786397077275371e-06, "loss": 0.2607, "step": 37360 }, { "epoch": 0.6746582020670379, "grad_norm": 0.3529093563556671, "learning_rate": 4.783977017099826e-06, "loss": 0.1426, "step": 37365 }, { "epoch": 0.6747484814999386, "grad_norm": 0.773078978061676, "learning_rate": 4.781557376515413e-06, "loss": 0.3066, "step": 37370 }, { "epoch": 0.6748387609328393, "grad_norm": 0.6937637329101562, "learning_rate": 4.779138155716764e-06, "loss": 0.2038, "step": 37375 }, { "epoch": 0.6749290403657401, "grad_norm": 0.653456449508667, "learning_rate": 4.7767193548984945e-06, "loss": 0.2444, "step": 37380 }, { "epoch": 0.6750193197986407, "grad_norm": 0.41567978262901306, "learning_rate": 4.774300974255173e-06, "loss": 0.24, "step": 37385 }, { "epoch": 0.6751095992315415, "grad_norm": 0.6049378514289856, "learning_rate": 4.771883013981346e-06, "loss": 0.2051, "step": 37390 }, { "epoch": 0.6751998786644422, "grad_norm": 0.5627599954605103, "learning_rate": 4.769465474271515e-06, "loss": 0.2942, "step": 37395 }, { "epoch": 0.6752901580973429, "grad_norm": 0.27880987524986267, "learning_rate": 4.767048355320158e-06, "loss": 0.1388, "step": 37400 }, { "epoch": 0.6753804375302436, "grad_norm": 0.5502472519874573, "learning_rate": 4.764631657321717e-06, "loss": 0.2598, "step": 37405 }, { "epoch": 0.6754707169631443, "grad_norm": 0.8498625159263611, "learning_rate": 4.76221538047059e-06, "loss": 0.2192, "step": 37410 }, { "epoch": 0.675560996396045, "grad_norm": 0.3717988133430481, "learning_rate": 4.759799524961157e-06, "loss": 0.1724, "step": 37415 }, { "epoch": 0.6756512758289458, "grad_norm": 0.4655216932296753, "learning_rate": 4.757384090987749e-06, "loss": 0.2329, "step": 37420 }, { "epoch": 0.6757415552618464, "grad_norm": 0.4961567521095276, "learning_rate": 4.754969078744677e-06, "loss": 0.1794, "step": 37425 }, { "epoch": 0.6758318346947472, "grad_norm": 0.49937573075294495, "learning_rate": 4.752554488426203e-06, "loss": 0.2079, "step": 37430 }, { "epoch": 0.6759221141276479, "grad_norm": 0.4563522934913635, "learning_rate": 4.750140320226568e-06, "loss": 0.2004, "step": 37435 }, { "epoch": 0.6760123935605487, "grad_norm": 0.41807126998901367, "learning_rate": 4.747726574339975e-06, "loss": 0.275, "step": 37440 }, { "epoch": 0.6761026729934493, "grad_norm": 0.3730819523334503, "learning_rate": 4.745313250960595e-06, "loss": 0.2351, "step": 37445 }, { "epoch": 0.67619295242635, "grad_norm": 2.460869550704956, "learning_rate": 4.742900350282555e-06, "loss": 0.18, "step": 37450 }, { "epoch": 0.6762832318592508, "grad_norm": 0.4683798551559448, "learning_rate": 4.740487872499962e-06, "loss": 0.2404, "step": 37455 }, { "epoch": 0.6763735112921515, "grad_norm": 0.6973472833633423, "learning_rate": 4.738075817806875e-06, "loss": 0.1958, "step": 37460 }, { "epoch": 0.6764637907250521, "grad_norm": 0.7393599152565002, "learning_rate": 4.735664186397332e-06, "loss": 0.2334, "step": 37465 }, { "epoch": 0.6765540701579529, "grad_norm": 0.6769455671310425, "learning_rate": 4.733252978465329e-06, "loss": 0.3239, "step": 37470 }, { "epoch": 0.6766443495908536, "grad_norm": 0.46761155128479004, "learning_rate": 4.7308421942048355e-06, "loss": 0.1858, "step": 37475 }, { "epoch": 0.6767346290237544, "grad_norm": 0.3539060652256012, "learning_rate": 4.728431833809774e-06, "loss": 0.2737, "step": 37480 }, { "epoch": 0.676824908456655, "grad_norm": 0.5235952734947205, "learning_rate": 4.726021897474045e-06, "loss": 0.2115, "step": 37485 }, { "epoch": 0.6769151878895557, "grad_norm": 0.9684334397315979, "learning_rate": 4.723612385391509e-06, "loss": 0.2367, "step": 37490 }, { "epoch": 0.6770054673224565, "grad_norm": 0.37715333700180054, "learning_rate": 4.721203297755991e-06, "loss": 0.2294, "step": 37495 }, { "epoch": 0.6770957467553572, "grad_norm": 0.5215760469436646, "learning_rate": 4.718794634761286e-06, "loss": 0.161, "step": 37500 }, { "epoch": 0.6771860261882579, "grad_norm": 0.34836265444755554, "learning_rate": 4.716386396601156e-06, "loss": 0.2247, "step": 37505 }, { "epoch": 0.6772763056211586, "grad_norm": 0.34557798504829407, "learning_rate": 4.713978583469327e-06, "loss": 0.1931, "step": 37510 }, { "epoch": 0.6773665850540593, "grad_norm": 0.7941352128982544, "learning_rate": 4.711571195559485e-06, "loss": 0.3337, "step": 37515 }, { "epoch": 0.6774568644869601, "grad_norm": 0.5474073886871338, "learning_rate": 4.709164233065294e-06, "loss": 0.1931, "step": 37520 }, { "epoch": 0.6775471439198607, "grad_norm": 0.39072561264038086, "learning_rate": 4.706757696180368e-06, "loss": 0.1887, "step": 37525 }, { "epoch": 0.6776374233527614, "grad_norm": 0.4135580360889435, "learning_rate": 4.7043515850983e-06, "loss": 0.2794, "step": 37530 }, { "epoch": 0.6777277027856622, "grad_norm": 0.3942650258541107, "learning_rate": 4.7019459000126435e-06, "loss": 0.1952, "step": 37535 }, { "epoch": 0.6778179822185629, "grad_norm": 0.6654621362686157, "learning_rate": 4.699540641116923e-06, "loss": 0.2369, "step": 37540 }, { "epoch": 0.6779082616514636, "grad_norm": 0.5702198147773743, "learning_rate": 4.697135808604616e-06, "loss": 0.3061, "step": 37545 }, { "epoch": 0.6779985410843643, "grad_norm": 0.4783928692340851, "learning_rate": 4.694731402669182e-06, "loss": 0.2044, "step": 37550 }, { "epoch": 0.678088820517265, "grad_norm": 0.409105122089386, "learning_rate": 4.692327423504029e-06, "loss": 0.2278, "step": 37555 }, { "epoch": 0.6781790999501658, "grad_norm": 0.4907389283180237, "learning_rate": 4.689923871302547e-06, "loss": 0.2065, "step": 37560 }, { "epoch": 0.6782693793830664, "grad_norm": 0.5981885194778442, "learning_rate": 4.687520746258075e-06, "loss": 0.2759, "step": 37565 }, { "epoch": 0.6783596588159672, "grad_norm": 0.300359308719635, "learning_rate": 4.6851180485639415e-06, "loss": 0.2398, "step": 37570 }, { "epoch": 0.6784499382488679, "grad_norm": 0.4736993610858917, "learning_rate": 4.682715778413418e-06, "loss": 0.2492, "step": 37575 }, { "epoch": 0.6785402176817686, "grad_norm": 0.54407799243927, "learning_rate": 4.6803139359997455e-06, "loss": 0.1323, "step": 37580 }, { "epoch": 0.6786304971146693, "grad_norm": 1.4211688041687012, "learning_rate": 4.677912521516143e-06, "loss": 0.1437, "step": 37585 }, { "epoch": 0.67872077654757, "grad_norm": 0.20921234786510468, "learning_rate": 4.675511535155779e-06, "loss": 0.2183, "step": 37590 }, { "epoch": 0.6788110559804708, "grad_norm": 0.33046719431877136, "learning_rate": 4.673110977111799e-06, "loss": 0.1872, "step": 37595 }, { "epoch": 0.6789013354133715, "grad_norm": 0.40264737606048584, "learning_rate": 4.6707108475773115e-06, "loss": 0.1985, "step": 37600 }, { "epoch": 0.6789916148462721, "grad_norm": 0.5488992929458618, "learning_rate": 4.668311146745391e-06, "loss": 0.2889, "step": 37605 }, { "epoch": 0.6790818942791729, "grad_norm": 0.9485848546028137, "learning_rate": 4.665911874809072e-06, "loss": 0.3017, "step": 37610 }, { "epoch": 0.6791721737120736, "grad_norm": 0.7803026437759399, "learning_rate": 4.663513031961362e-06, "loss": 0.2445, "step": 37615 }, { "epoch": 0.6792624531449744, "grad_norm": 0.7331886291503906, "learning_rate": 4.661114618395225e-06, "loss": 0.3379, "step": 37620 }, { "epoch": 0.679352732577875, "grad_norm": 0.8646613359451294, "learning_rate": 4.658716634303604e-06, "loss": 0.1735, "step": 37625 }, { "epoch": 0.6794430120107757, "grad_norm": 0.4974726438522339, "learning_rate": 4.656319079879386e-06, "loss": 0.1663, "step": 37630 }, { "epoch": 0.6795332914436765, "grad_norm": 0.34508395195007324, "learning_rate": 4.653921955315454e-06, "loss": 0.2258, "step": 37635 }, { "epoch": 0.6796235708765772, "grad_norm": 0.45985549688339233, "learning_rate": 4.651525260804628e-06, "loss": 0.2097, "step": 37640 }, { "epoch": 0.6797138503094778, "grad_norm": 0.4692579209804535, "learning_rate": 4.6491289965397114e-06, "loss": 0.1885, "step": 37645 }, { "epoch": 0.6798041297423786, "grad_norm": 0.1568463295698166, "learning_rate": 4.646733162713457e-06, "loss": 0.2665, "step": 37650 }, { "epoch": 0.6798944091752793, "grad_norm": 0.2690006494522095, "learning_rate": 4.6443377595186036e-06, "loss": 0.2539, "step": 37655 }, { "epoch": 0.6799846886081801, "grad_norm": 0.4124183654785156, "learning_rate": 4.6419427871478325e-06, "loss": 0.2961, "step": 37660 }, { "epoch": 0.6800749680410808, "grad_norm": 0.4410317540168762, "learning_rate": 4.639548245793808e-06, "loss": 0.2054, "step": 37665 }, { "epoch": 0.6801652474739814, "grad_norm": 0.2998250722885132, "learning_rate": 4.637154135649156e-06, "loss": 0.1674, "step": 37670 }, { "epoch": 0.6802555269068822, "grad_norm": 0.3939516246318817, "learning_rate": 4.63476045690646e-06, "loss": 0.2564, "step": 37675 }, { "epoch": 0.6803458063397829, "grad_norm": 0.7001118659973145, "learning_rate": 4.632367209758278e-06, "loss": 0.2158, "step": 37680 }, { "epoch": 0.6804360857726837, "grad_norm": 0.4548497498035431, "learning_rate": 4.629974394397125e-06, "loss": 0.2227, "step": 37685 }, { "epoch": 0.6805263652055843, "grad_norm": 0.6921202540397644, "learning_rate": 4.627582011015491e-06, "loss": 0.2923, "step": 37690 }, { "epoch": 0.680616644638485, "grad_norm": 0.6251453757286072, "learning_rate": 4.625190059805816e-06, "loss": 0.2418, "step": 37695 }, { "epoch": 0.6807069240713858, "grad_norm": 0.32615020871162415, "learning_rate": 4.6227985409605305e-06, "loss": 0.1913, "step": 37700 }, { "epoch": 0.6807972035042865, "grad_norm": 0.8246029615402222, "learning_rate": 4.620407454672001e-06, "loss": 0.1956, "step": 37705 }, { "epoch": 0.6808874829371871, "grad_norm": 0.6869685053825378, "learning_rate": 4.618016801132582e-06, "loss": 0.2555, "step": 37710 }, { "epoch": 0.6809777623700879, "grad_norm": 0.5275892019271851, "learning_rate": 4.6156265805345775e-06, "loss": 0.1851, "step": 37715 }, { "epoch": 0.6810680418029886, "grad_norm": 0.5740939378738403, "learning_rate": 4.613236793070271e-06, "loss": 0.3119, "step": 37720 }, { "epoch": 0.6811583212358894, "grad_norm": 0.5859683752059937, "learning_rate": 4.610847438931895e-06, "loss": 0.3205, "step": 37725 }, { "epoch": 0.68124860066879, "grad_norm": 0.3429293632507324, "learning_rate": 4.608458518311658e-06, "loss": 0.2084, "step": 37730 }, { "epoch": 0.6813388801016907, "grad_norm": 0.25039711594581604, "learning_rate": 4.606070031401733e-06, "loss": 0.2725, "step": 37735 }, { "epoch": 0.6814291595345915, "grad_norm": 0.27948713302612305, "learning_rate": 4.603681978394262e-06, "loss": 0.1722, "step": 37740 }, { "epoch": 0.6815194389674922, "grad_norm": 0.32593435049057007, "learning_rate": 4.6012943594813365e-06, "loss": 0.2556, "step": 37745 }, { "epoch": 0.6816097184003929, "grad_norm": 0.3133193850517273, "learning_rate": 4.59890717485503e-06, "loss": 0.2198, "step": 37750 }, { "epoch": 0.6816999978332936, "grad_norm": 1.4969346523284912, "learning_rate": 4.596520424707368e-06, "loss": 0.2234, "step": 37755 }, { "epoch": 0.6817902772661943, "grad_norm": 0.3820660412311554, "learning_rate": 4.594134109230351e-06, "loss": 0.2918, "step": 37760 }, { "epoch": 0.6818805566990951, "grad_norm": 0.5165436267852783, "learning_rate": 4.591748228615945e-06, "loss": 0.2041, "step": 37765 }, { "epoch": 0.6819708361319957, "grad_norm": 0.31727850437164307, "learning_rate": 4.589362783056068e-06, "loss": 0.2259, "step": 37770 }, { "epoch": 0.6820611155648965, "grad_norm": 0.4642702639102936, "learning_rate": 4.58697777274262e-06, "loss": 0.1235, "step": 37775 }, { "epoch": 0.6821513949977972, "grad_norm": 0.4593847692012787, "learning_rate": 4.58459319786745e-06, "loss": 0.3199, "step": 37780 }, { "epoch": 0.6822416744306979, "grad_norm": 0.31745219230651855, "learning_rate": 4.582209058622386e-06, "loss": 0.27, "step": 37785 }, { "epoch": 0.6823319538635986, "grad_norm": 0.4456140398979187, "learning_rate": 4.579825355199209e-06, "loss": 0.2096, "step": 37790 }, { "epoch": 0.6824222332964993, "grad_norm": 0.4190645217895508, "learning_rate": 4.577442087789674e-06, "loss": 0.2529, "step": 37795 }, { "epoch": 0.6825125127294001, "grad_norm": 0.32241523265838623, "learning_rate": 4.575059256585497e-06, "loss": 0.1457, "step": 37800 }, { "epoch": 0.6826027921623008, "grad_norm": 0.4470575451850891, "learning_rate": 4.572676861778363e-06, "loss": 0.1714, "step": 37805 }, { "epoch": 0.6826930715952014, "grad_norm": 0.315279096364975, "learning_rate": 4.5702949035599124e-06, "loss": 0.1288, "step": 37810 }, { "epoch": 0.6827833510281022, "grad_norm": 0.7390419244766235, "learning_rate": 4.567913382121761e-06, "loss": 0.1483, "step": 37815 }, { "epoch": 0.6828736304610029, "grad_norm": 0.5478441119194031, "learning_rate": 4.56553229765548e-06, "loss": 0.187, "step": 37820 }, { "epoch": 0.6829639098939037, "grad_norm": 0.4399346113204956, "learning_rate": 4.563151650352614e-06, "loss": 0.1779, "step": 37825 }, { "epoch": 0.6830541893268043, "grad_norm": 0.9848801493644714, "learning_rate": 4.560771440404668e-06, "loss": 0.2331, "step": 37830 }, { "epoch": 0.683144468759705, "grad_norm": 0.3818892240524292, "learning_rate": 4.558391668003116e-06, "loss": 0.1728, "step": 37835 }, { "epoch": 0.6832347481926058, "grad_norm": 0.5127613544464111, "learning_rate": 4.556012333339386e-06, "loss": 0.2138, "step": 37840 }, { "epoch": 0.6833250276255065, "grad_norm": 0.4102364182472229, "learning_rate": 4.553633436604885e-06, "loss": 0.287, "step": 37845 }, { "epoch": 0.6834153070584071, "grad_norm": 0.3781411349773407, "learning_rate": 4.5512549779909765e-06, "loss": 0.2346, "step": 37850 }, { "epoch": 0.6835055864913079, "grad_norm": 0.3069896101951599, "learning_rate": 4.548876957688985e-06, "loss": 0.2677, "step": 37855 }, { "epoch": 0.6835958659242086, "grad_norm": 0.351437509059906, "learning_rate": 4.546499375890209e-06, "loss": 0.2813, "step": 37860 }, { "epoch": 0.6836861453571094, "grad_norm": 0.3506467640399933, "learning_rate": 4.5441222327859055e-06, "loss": 0.194, "step": 37865 }, { "epoch": 0.68377642479001, "grad_norm": 0.45503199100494385, "learning_rate": 4.541745528567306e-06, "loss": 0.1705, "step": 37870 }, { "epoch": 0.6838667042229107, "grad_norm": 0.5410659313201904, "learning_rate": 4.539369263425589e-06, "loss": 0.1441, "step": 37875 }, { "epoch": 0.6839569836558115, "grad_norm": 0.5261062979698181, "learning_rate": 4.536993437551916e-06, "loss": 0.2447, "step": 37880 }, { "epoch": 0.6840472630887122, "grad_norm": 0.08837950229644775, "learning_rate": 4.534618051137397e-06, "loss": 0.1915, "step": 37885 }, { "epoch": 0.6841375425216129, "grad_norm": 0.5404590964317322, "learning_rate": 4.532243104373123e-06, "loss": 0.1768, "step": 37890 }, { "epoch": 0.6842278219545136, "grad_norm": 0.4018935263156891, "learning_rate": 4.5298685974501285e-06, "loss": 0.2204, "step": 37895 }, { "epoch": 0.6843181013874143, "grad_norm": 0.42148804664611816, "learning_rate": 4.527494530559442e-06, "loss": 0.2727, "step": 37900 }, { "epoch": 0.6844083808203151, "grad_norm": 0.36024728417396545, "learning_rate": 4.5251209038920276e-06, "loss": 0.274, "step": 37905 }, { "epoch": 0.6844986602532157, "grad_norm": 0.40288153290748596, "learning_rate": 4.522747717638833e-06, "loss": 0.285, "step": 37910 }, { "epoch": 0.6845889396861164, "grad_norm": 0.3067623972892761, "learning_rate": 4.520374971990758e-06, "loss": 0.2437, "step": 37915 }, { "epoch": 0.6846792191190172, "grad_norm": 0.25448545813560486, "learning_rate": 4.51800266713868e-06, "loss": 0.1896, "step": 37920 }, { "epoch": 0.6847694985519179, "grad_norm": 0.5942654609680176, "learning_rate": 4.515630803273425e-06, "loss": 0.3519, "step": 37925 }, { "epoch": 0.6848597779848186, "grad_norm": 0.4476010501384735, "learning_rate": 4.513259380585798e-06, "loss": 0.2339, "step": 37930 }, { "epoch": 0.6849500574177193, "grad_norm": 0.25513124465942383, "learning_rate": 4.51088839926656e-06, "loss": 0.285, "step": 37935 }, { "epoch": 0.68504033685062, "grad_norm": 0.8240607380867004, "learning_rate": 4.508517859506445e-06, "loss": 0.2533, "step": 37940 }, { "epoch": 0.6851306162835208, "grad_norm": 0.4471612572669983, "learning_rate": 4.506147761496141e-06, "loss": 0.2492, "step": 37945 }, { "epoch": 0.6852208957164214, "grad_norm": 0.4679805338382721, "learning_rate": 4.503778105426302e-06, "loss": 0.1738, "step": 37950 }, { "epoch": 0.6853111751493222, "grad_norm": 0.31638088822364807, "learning_rate": 4.5014088914875555e-06, "loss": 0.2002, "step": 37955 }, { "epoch": 0.6854014545822229, "grad_norm": 1.1712069511413574, "learning_rate": 4.499040119870479e-06, "loss": 0.2783, "step": 37960 }, { "epoch": 0.6854917340151236, "grad_norm": 0.5667579770088196, "learning_rate": 4.496671790765636e-06, "loss": 0.1797, "step": 37965 }, { "epoch": 0.6855820134480243, "grad_norm": 0.9462890028953552, "learning_rate": 4.4943039043635305e-06, "loss": 0.1809, "step": 37970 }, { "epoch": 0.685672292880925, "grad_norm": 0.6696999073028564, "learning_rate": 4.491936460854649e-06, "loss": 0.1621, "step": 37975 }, { "epoch": 0.6857625723138258, "grad_norm": 0.4125041663646698, "learning_rate": 4.4895694604294295e-06, "loss": 0.2684, "step": 37980 }, { "epoch": 0.6858528517467265, "grad_norm": 0.38330793380737305, "learning_rate": 4.487202903278283e-06, "loss": 0.2668, "step": 37985 }, { "epoch": 0.6859431311796271, "grad_norm": 0.4018458127975464, "learning_rate": 4.48483678959158e-06, "loss": 0.2052, "step": 37990 }, { "epoch": 0.6860334106125279, "grad_norm": 0.416517972946167, "learning_rate": 4.482471119559658e-06, "loss": 0.2099, "step": 37995 }, { "epoch": 0.6861236900454286, "grad_norm": 0.3194110095500946, "learning_rate": 4.480105893372817e-06, "loss": 0.213, "step": 38000 }, { "epoch": 0.6862139694783294, "grad_norm": 0.39987972378730774, "learning_rate": 4.477741111221327e-06, "loss": 0.2826, "step": 38005 }, { "epoch": 0.68630424891123, "grad_norm": 0.5515651702880859, "learning_rate": 4.475376773295411e-06, "loss": 0.2616, "step": 38010 }, { "epoch": 0.6863945283441307, "grad_norm": 0.42721080780029297, "learning_rate": 4.47301287978527e-06, "loss": 0.2316, "step": 38015 }, { "epoch": 0.6864848077770315, "grad_norm": 0.5164182782173157, "learning_rate": 4.470649430881053e-06, "loss": 0.166, "step": 38020 }, { "epoch": 0.6865750872099322, "grad_norm": 0.4573683738708496, "learning_rate": 4.468286426772887e-06, "loss": 0.2536, "step": 38025 }, { "epoch": 0.6866653666428328, "grad_norm": 0.40096044540405273, "learning_rate": 4.465923867650859e-06, "loss": 0.2237, "step": 38030 }, { "epoch": 0.6867556460757336, "grad_norm": 0.4793723225593567, "learning_rate": 4.463561753705022e-06, "loss": 0.1924, "step": 38035 }, { "epoch": 0.6868459255086343, "grad_norm": 1.0447996854782104, "learning_rate": 4.461200085125389e-06, "loss": 0.2464, "step": 38040 }, { "epoch": 0.6869362049415351, "grad_norm": 0.4933064877986908, "learning_rate": 4.458838862101934e-06, "loss": 0.2805, "step": 38045 }, { "epoch": 0.6870264843744358, "grad_norm": 0.44689321517944336, "learning_rate": 4.45647808482461e-06, "loss": 0.299, "step": 38050 }, { "epoch": 0.6871167638073364, "grad_norm": 0.5152558088302612, "learning_rate": 4.4541177534833125e-06, "loss": 0.2808, "step": 38055 }, { "epoch": 0.6872070432402372, "grad_norm": 0.2078225165605545, "learning_rate": 4.451757868267919e-06, "loss": 0.1442, "step": 38060 }, { "epoch": 0.6872973226731379, "grad_norm": 0.26201045513153076, "learning_rate": 4.449398429368268e-06, "loss": 0.26, "step": 38065 }, { "epoch": 0.6873876021060387, "grad_norm": 0.5290224552154541, "learning_rate": 4.4470394369741585e-06, "loss": 0.2903, "step": 38070 }, { "epoch": 0.6874778815389393, "grad_norm": 0.2802322506904602, "learning_rate": 4.4446808912753495e-06, "loss": 0.1756, "step": 38075 }, { "epoch": 0.68756816097184, "grad_norm": 0.5011495351791382, "learning_rate": 4.442322792461574e-06, "loss": 0.2399, "step": 38080 }, { "epoch": 0.6876584404047408, "grad_norm": 0.3852989673614502, "learning_rate": 4.43996514072252e-06, "loss": 0.2165, "step": 38085 }, { "epoch": 0.6877487198376415, "grad_norm": 0.5957157611846924, "learning_rate": 4.437607936247843e-06, "loss": 0.2796, "step": 38090 }, { "epoch": 0.6878389992705422, "grad_norm": 0.31345441937446594, "learning_rate": 4.4352511792271655e-06, "loss": 0.1827, "step": 38095 }, { "epoch": 0.6879292787034429, "grad_norm": 0.4975501298904419, "learning_rate": 4.432894869850077e-06, "loss": 0.2044, "step": 38100 }, { "epoch": 0.6880195581363436, "grad_norm": 1.0897471904754639, "learning_rate": 4.430539008306113e-06, "loss": 0.1926, "step": 38105 }, { "epoch": 0.6881098375692444, "grad_norm": 0.48652711510658264, "learning_rate": 4.428183594784796e-06, "loss": 0.2263, "step": 38110 }, { "epoch": 0.688200117002145, "grad_norm": 0.41459593176841736, "learning_rate": 4.425828629475595e-06, "loss": 0.2908, "step": 38115 }, { "epoch": 0.6882903964350457, "grad_norm": 0.4874315559864044, "learning_rate": 4.423474112567956e-06, "loss": 0.2287, "step": 38120 }, { "epoch": 0.6883806758679465, "grad_norm": 0.5218778252601624, "learning_rate": 4.421120044251276e-06, "loss": 0.2283, "step": 38125 }, { "epoch": 0.6884709553008472, "grad_norm": 0.4416961371898651, "learning_rate": 4.4187664247149256e-06, "loss": 0.2895, "step": 38130 }, { "epoch": 0.6885612347337479, "grad_norm": 0.18363463878631592, "learning_rate": 4.416413254148241e-06, "loss": 0.1656, "step": 38135 }, { "epoch": 0.6886515141666486, "grad_norm": 0.45894527435302734, "learning_rate": 4.414060532740509e-06, "loss": 0.3297, "step": 38140 }, { "epoch": 0.6887417935995493, "grad_norm": 1.545738697052002, "learning_rate": 4.411708260680997e-06, "loss": 0.1385, "step": 38145 }, { "epoch": 0.6888320730324501, "grad_norm": 0.5047971606254578, "learning_rate": 4.409356438158921e-06, "loss": 0.2842, "step": 38150 }, { "epoch": 0.6889223524653507, "grad_norm": 0.8523099422454834, "learning_rate": 4.407005065363473e-06, "loss": 0.2644, "step": 38155 }, { "epoch": 0.6890126318982515, "grad_norm": 0.7765586972236633, "learning_rate": 4.404654142483802e-06, "loss": 0.1474, "step": 38160 }, { "epoch": 0.6891029113311522, "grad_norm": 0.507250964641571, "learning_rate": 4.402303669709026e-06, "loss": 0.2036, "step": 38165 }, { "epoch": 0.6891931907640529, "grad_norm": 0.8221535682678223, "learning_rate": 4.399953647228216e-06, "loss": 0.2544, "step": 38170 }, { "epoch": 0.6892834701969536, "grad_norm": 0.44962579011917114, "learning_rate": 4.397604075230425e-06, "loss": 0.3094, "step": 38175 }, { "epoch": 0.6893737496298543, "grad_norm": 0.5254794955253601, "learning_rate": 4.395254953904648e-06, "loss": 0.2317, "step": 38180 }, { "epoch": 0.6894640290627551, "grad_norm": 0.5492463707923889, "learning_rate": 4.3929062834398624e-06, "loss": 0.2245, "step": 38185 }, { "epoch": 0.6895543084956558, "grad_norm": 0.5577696561813354, "learning_rate": 4.390558064024997e-06, "loss": 0.21, "step": 38190 }, { "epoch": 0.6896445879285564, "grad_norm": 0.3024185299873352, "learning_rate": 4.388210295848949e-06, "loss": 0.1505, "step": 38195 }, { "epoch": 0.6897348673614572, "grad_norm": 0.42269188165664673, "learning_rate": 4.385862979100581e-06, "loss": 0.2802, "step": 38200 }, { "epoch": 0.6898251467943579, "grad_norm": 0.7994741797447205, "learning_rate": 4.3835161139687236e-06, "loss": 0.2289, "step": 38205 }, { "epoch": 0.6899154262272587, "grad_norm": 0.25307267904281616, "learning_rate": 4.381169700642153e-06, "loss": 0.1675, "step": 38210 }, { "epoch": 0.6900057056601593, "grad_norm": 0.5518661141395569, "learning_rate": 4.378823739309631e-06, "loss": 0.1861, "step": 38215 }, { "epoch": 0.69009598509306, "grad_norm": 0.4862739145755768, "learning_rate": 4.376478230159866e-06, "loss": 0.2629, "step": 38220 }, { "epoch": 0.6901862645259608, "grad_norm": 0.3612036406993866, "learning_rate": 4.374133173381541e-06, "loss": 0.2857, "step": 38225 }, { "epoch": 0.6902765439588615, "grad_norm": 0.4374309182167053, "learning_rate": 4.371788569163301e-06, "loss": 0.2152, "step": 38230 }, { "epoch": 0.6903668233917621, "grad_norm": 0.8651866316795349, "learning_rate": 4.369444417693747e-06, "loss": 0.2508, "step": 38235 }, { "epoch": 0.6904571028246629, "grad_norm": 0.7004550695419312, "learning_rate": 4.3671007191614545e-06, "loss": 0.2494, "step": 38240 }, { "epoch": 0.6905473822575636, "grad_norm": 0.4672585725784302, "learning_rate": 4.364757473754951e-06, "loss": 0.1394, "step": 38245 }, { "epoch": 0.6906376616904644, "grad_norm": 0.36849546432495117, "learning_rate": 4.362414681662739e-06, "loss": 0.1905, "step": 38250 }, { "epoch": 0.690727941123365, "grad_norm": 0.3537253737449646, "learning_rate": 4.360072343073273e-06, "loss": 0.2032, "step": 38255 }, { "epoch": 0.6908182205562657, "grad_norm": 0.3273913860321045, "learning_rate": 4.357730458174982e-06, "loss": 0.1979, "step": 38260 }, { "epoch": 0.6909084999891665, "grad_norm": 0.36351215839385986, "learning_rate": 4.355389027156251e-06, "loss": 0.3141, "step": 38265 }, { "epoch": 0.6909987794220672, "grad_norm": 0.316074937582016, "learning_rate": 4.353048050205437e-06, "loss": 0.2884, "step": 38270 }, { "epoch": 0.6910890588549679, "grad_norm": 0.7243396639823914, "learning_rate": 4.350707527510847e-06, "loss": 0.246, "step": 38275 }, { "epoch": 0.6911793382878686, "grad_norm": 0.5573142170906067, "learning_rate": 4.348367459260764e-06, "loss": 0.2159, "step": 38280 }, { "epoch": 0.6912696177207693, "grad_norm": 0.43125444650650024, "learning_rate": 4.3460278456434245e-06, "loss": 0.1907, "step": 38285 }, { "epoch": 0.6913598971536701, "grad_norm": 0.5265786051750183, "learning_rate": 4.3436886868470375e-06, "loss": 0.2275, "step": 38290 }, { "epoch": 0.6914501765865707, "grad_norm": 0.3848966658115387, "learning_rate": 4.341349983059769e-06, "loss": 0.2373, "step": 38295 }, { "epoch": 0.6915404560194715, "grad_norm": 0.3046269118785858, "learning_rate": 4.339011734469757e-06, "loss": 0.1633, "step": 38300 }, { "epoch": 0.6916307354523722, "grad_norm": 0.4017146825790405, "learning_rate": 4.336673941265087e-06, "loss": 0.1801, "step": 38305 }, { "epoch": 0.6917210148852729, "grad_norm": 0.6081216335296631, "learning_rate": 4.334336603633826e-06, "loss": 0.2944, "step": 38310 }, { "epoch": 0.6918112943181736, "grad_norm": 0.34811705350875854, "learning_rate": 4.331999721763992e-06, "loss": 0.2143, "step": 38315 }, { "epoch": 0.6919015737510743, "grad_norm": 0.41205698251724243, "learning_rate": 4.329663295843566e-06, "loss": 0.2577, "step": 38320 }, { "epoch": 0.691991853183975, "grad_norm": 0.3803647756576538, "learning_rate": 4.3273273260605005e-06, "loss": 0.271, "step": 38325 }, { "epoch": 0.6920821326168758, "grad_norm": 0.5279304385185242, "learning_rate": 4.324991812602708e-06, "loss": 0.3284, "step": 38330 }, { "epoch": 0.6921724120497764, "grad_norm": 0.47987908124923706, "learning_rate": 4.322656755658066e-06, "loss": 0.2659, "step": 38335 }, { "epoch": 0.6922626914826772, "grad_norm": 0.45216530561447144, "learning_rate": 4.320322155414406e-06, "loss": 0.2845, "step": 38340 }, { "epoch": 0.6923529709155779, "grad_norm": 0.575176477432251, "learning_rate": 4.317988012059537e-06, "loss": 0.1986, "step": 38345 }, { "epoch": 0.6924432503484786, "grad_norm": 0.49986568093299866, "learning_rate": 4.315654325781217e-06, "loss": 0.27, "step": 38350 }, { "epoch": 0.6925335297813793, "grad_norm": 0.6504265069961548, "learning_rate": 4.3133210967671765e-06, "loss": 0.2235, "step": 38355 }, { "epoch": 0.69262380921428, "grad_norm": 0.23776890337467194, "learning_rate": 4.310988325205109e-06, "loss": 0.211, "step": 38360 }, { "epoch": 0.6927140886471808, "grad_norm": 0.40864163637161255, "learning_rate": 4.308656011282669e-06, "loss": 0.2471, "step": 38365 }, { "epoch": 0.6928043680800815, "grad_norm": 0.3818129599094391, "learning_rate": 4.3063241551874715e-06, "loss": 0.2566, "step": 38370 }, { "epoch": 0.6928946475129821, "grad_norm": 0.27496612071990967, "learning_rate": 4.303992757107102e-06, "loss": 0.2344, "step": 38375 }, { "epoch": 0.6929849269458829, "grad_norm": 0.6847406029701233, "learning_rate": 4.301661817229096e-06, "loss": 0.2341, "step": 38380 }, { "epoch": 0.6930752063787836, "grad_norm": 0.2835419178009033, "learning_rate": 4.29933133574097e-06, "loss": 0.2065, "step": 38385 }, { "epoch": 0.6931654858116844, "grad_norm": 0.5512233972549438, "learning_rate": 4.297001312830188e-06, "loss": 0.2103, "step": 38390 }, { "epoch": 0.693255765244585, "grad_norm": 0.32876721024513245, "learning_rate": 4.294671748684183e-06, "loss": 0.2412, "step": 38395 }, { "epoch": 0.6933460446774857, "grad_norm": 0.36407238245010376, "learning_rate": 4.292342643490357e-06, "loss": 0.1719, "step": 38400 }, { "epoch": 0.6934363241103865, "grad_norm": 0.44209739565849304, "learning_rate": 4.290013997436069e-06, "loss": 0.2445, "step": 38405 }, { "epoch": 0.6935266035432872, "grad_norm": 0.6222474575042725, "learning_rate": 4.287685810708639e-06, "loss": 0.2812, "step": 38410 }, { "epoch": 0.6936168829761878, "grad_norm": 0.06723568588495255, "learning_rate": 4.28535808349535e-06, "loss": 0.173, "step": 38415 }, { "epoch": 0.6937071624090886, "grad_norm": 0.3727383613586426, "learning_rate": 4.2830308159834535e-06, "loss": 0.2345, "step": 38420 }, { "epoch": 0.6937974418419893, "grad_norm": 0.36739301681518555, "learning_rate": 4.280704008360161e-06, "loss": 0.2195, "step": 38425 }, { "epoch": 0.6938877212748901, "grad_norm": 0.524774968624115, "learning_rate": 4.278377660812651e-06, "loss": 0.2525, "step": 38430 }, { "epoch": 0.6939780007077907, "grad_norm": 0.5487887859344482, "learning_rate": 4.276051773528056e-06, "loss": 0.2036, "step": 38435 }, { "epoch": 0.6940682801406914, "grad_norm": 0.4065292477607727, "learning_rate": 4.273726346693481e-06, "loss": 0.3146, "step": 38440 }, { "epoch": 0.6941585595735922, "grad_norm": 0.3451765179634094, "learning_rate": 4.271401380495984e-06, "loss": 0.2192, "step": 38445 }, { "epoch": 0.6942488390064929, "grad_norm": 0.456271231174469, "learning_rate": 4.269076875122598e-06, "loss": 0.2119, "step": 38450 }, { "epoch": 0.6943391184393937, "grad_norm": 0.45741716027259827, "learning_rate": 4.266752830760307e-06, "loss": 0.2886, "step": 38455 }, { "epoch": 0.6944293978722943, "grad_norm": 0.5923446416854858, "learning_rate": 4.264429247596066e-06, "loss": 0.2288, "step": 38460 }, { "epoch": 0.694519677305195, "grad_norm": 0.4566524922847748, "learning_rate": 4.262106125816789e-06, "loss": 0.299, "step": 38465 }, { "epoch": 0.6946099567380958, "grad_norm": 0.6255629658699036, "learning_rate": 4.259783465609361e-06, "loss": 0.1874, "step": 38470 }, { "epoch": 0.6947002361709965, "grad_norm": 0.5031535625457764, "learning_rate": 4.257461267160614e-06, "loss": 0.2611, "step": 38475 }, { "epoch": 0.6947905156038972, "grad_norm": 1.097783088684082, "learning_rate": 4.255139530657359e-06, "loss": 0.1741, "step": 38480 }, { "epoch": 0.6948807950367979, "grad_norm": 0.5664388537406921, "learning_rate": 4.252818256286355e-06, "loss": 0.3781, "step": 38485 }, { "epoch": 0.6949710744696986, "grad_norm": 0.7404657006263733, "learning_rate": 4.2504974442343374e-06, "loss": 0.2243, "step": 38490 }, { "epoch": 0.6950613539025994, "grad_norm": 0.37026259303092957, "learning_rate": 4.248177094688e-06, "loss": 0.1814, "step": 38495 }, { "epoch": 0.6951516333355, "grad_norm": 1.072288155555725, "learning_rate": 4.2458572078339935e-06, "loss": 0.256, "step": 38500 }, { "epoch": 0.6952419127684007, "grad_norm": 0.369281142950058, "learning_rate": 4.243537783858941e-06, "loss": 0.1408, "step": 38505 }, { "epoch": 0.6953321922013015, "grad_norm": 0.35067546367645264, "learning_rate": 4.241218822949416e-06, "loss": 0.2327, "step": 38510 }, { "epoch": 0.6954224716342022, "grad_norm": 0.5609583258628845, "learning_rate": 4.238900325291971e-06, "loss": 0.2028, "step": 38515 }, { "epoch": 0.6955127510671029, "grad_norm": 0.2797073721885681, "learning_rate": 4.236582291073104e-06, "loss": 0.2367, "step": 38520 }, { "epoch": 0.6956030305000036, "grad_norm": 0.3344927728176117, "learning_rate": 4.234264720479286e-06, "loss": 0.2538, "step": 38525 }, { "epoch": 0.6956933099329043, "grad_norm": 0.7331802845001221, "learning_rate": 4.231947613696951e-06, "loss": 0.1726, "step": 38530 }, { "epoch": 0.6957835893658051, "grad_norm": 0.6007274389266968, "learning_rate": 4.229630970912497e-06, "loss": 0.2451, "step": 38535 }, { "epoch": 0.6958738687987057, "grad_norm": 0.32419684529304504, "learning_rate": 4.227314792312273e-06, "loss": 0.267, "step": 38540 }, { "epoch": 0.6959641482316065, "grad_norm": 0.6841797828674316, "learning_rate": 4.224999078082606e-06, "loss": 0.2077, "step": 38545 }, { "epoch": 0.6960544276645072, "grad_norm": 0.37495937943458557, "learning_rate": 4.2226838284097704e-06, "loss": 0.2689, "step": 38550 }, { "epoch": 0.6961447070974079, "grad_norm": 0.4198375642299652, "learning_rate": 4.220369043480016e-06, "loss": 0.1817, "step": 38555 }, { "epoch": 0.6962349865303086, "grad_norm": 0.26349231600761414, "learning_rate": 4.21805472347955e-06, "loss": 0.1551, "step": 38560 }, { "epoch": 0.6963252659632093, "grad_norm": 0.4598008096218109, "learning_rate": 4.2157408685945466e-06, "loss": 0.2686, "step": 38565 }, { "epoch": 0.6964155453961101, "grad_norm": 0.2681581377983093, "learning_rate": 4.213427479011132e-06, "loss": 0.2999, "step": 38570 }, { "epoch": 0.6965058248290108, "grad_norm": 0.556578516960144, "learning_rate": 4.211114554915407e-06, "loss": 0.2522, "step": 38575 }, { "epoch": 0.6965961042619114, "grad_norm": 0.4224514067173004, "learning_rate": 4.208802096493423e-06, "loss": 0.2756, "step": 38580 }, { "epoch": 0.6966863836948122, "grad_norm": 0.4554044306278229, "learning_rate": 4.206490103931208e-06, "loss": 0.29, "step": 38585 }, { "epoch": 0.6967766631277129, "grad_norm": 0.6609202027320862, "learning_rate": 4.204178577414737e-06, "loss": 0.2144, "step": 38590 }, { "epoch": 0.6968669425606137, "grad_norm": 0.4529894292354584, "learning_rate": 4.2018675171299604e-06, "loss": 0.1861, "step": 38595 }, { "epoch": 0.6969572219935143, "grad_norm": 0.32098129391670227, "learning_rate": 4.1995569232627895e-06, "loss": 0.2179, "step": 38600 }, { "epoch": 0.697047501426415, "grad_norm": 0.5516554713249207, "learning_rate": 4.197246795999087e-06, "loss": 0.2387, "step": 38605 }, { "epoch": 0.6971377808593158, "grad_norm": 0.4384140968322754, "learning_rate": 4.194937135524694e-06, "loss": 0.1449, "step": 38610 }, { "epoch": 0.6972280602922165, "grad_norm": 0.5471542477607727, "learning_rate": 4.192627942025397e-06, "loss": 0.3431, "step": 38615 }, { "epoch": 0.6973183397251171, "grad_norm": 0.340838223695755, "learning_rate": 4.1903192156869576e-06, "loss": 0.2758, "step": 38620 }, { "epoch": 0.6974086191580179, "grad_norm": 0.4325224757194519, "learning_rate": 4.188010956695098e-06, "loss": 0.2174, "step": 38625 }, { "epoch": 0.6974988985909186, "grad_norm": 0.3328706920146942, "learning_rate": 4.185703165235503e-06, "loss": 0.2745, "step": 38630 }, { "epoch": 0.6975891780238194, "grad_norm": 0.394483745098114, "learning_rate": 4.183395841493811e-06, "loss": 0.2074, "step": 38635 }, { "epoch": 0.69767945745672, "grad_norm": 0.34542450308799744, "learning_rate": 4.1810889856556355e-06, "loss": 0.1403, "step": 38640 }, { "epoch": 0.6977697368896207, "grad_norm": 0.26991739869117737, "learning_rate": 4.178782597906541e-06, "loss": 0.174, "step": 38645 }, { "epoch": 0.6978600163225215, "grad_norm": 0.38894426822662354, "learning_rate": 4.176476678432066e-06, "loss": 0.2506, "step": 38650 }, { "epoch": 0.6979502957554222, "grad_norm": 0.5923386216163635, "learning_rate": 4.174171227417696e-06, "loss": 0.2821, "step": 38655 }, { "epoch": 0.6980405751883229, "grad_norm": 0.3713725507259369, "learning_rate": 4.171866245048894e-06, "loss": 0.1851, "step": 38660 }, { "epoch": 0.6981308546212236, "grad_norm": 0.4045335054397583, "learning_rate": 4.169561731511078e-06, "loss": 0.2421, "step": 38665 }, { "epoch": 0.6982211340541243, "grad_norm": 0.2156153917312622, "learning_rate": 4.167257686989632e-06, "loss": 0.2216, "step": 38670 }, { "epoch": 0.6983114134870251, "grad_norm": 0.35402876138687134, "learning_rate": 4.164954111669895e-06, "loss": 0.324, "step": 38675 }, { "epoch": 0.6984016929199257, "grad_norm": 0.639041543006897, "learning_rate": 4.162651005737177e-06, "loss": 0.1975, "step": 38680 }, { "epoch": 0.6984919723528265, "grad_norm": 0.38943997025489807, "learning_rate": 4.16034836937674e-06, "loss": 0.2254, "step": 38685 }, { "epoch": 0.6985822517857272, "grad_norm": 0.44801610708236694, "learning_rate": 4.158046202773819e-06, "loss": 0.2389, "step": 38690 }, { "epoch": 0.6986725312186279, "grad_norm": 0.5811364054679871, "learning_rate": 4.15574450611361e-06, "loss": 0.1561, "step": 38695 }, { "epoch": 0.6987628106515286, "grad_norm": 0.22529548406600952, "learning_rate": 4.153443279581259e-06, "loss": 0.2403, "step": 38700 }, { "epoch": 0.6988530900844293, "grad_norm": 0.9179227352142334, "learning_rate": 4.151142523361892e-06, "loss": 0.1893, "step": 38705 }, { "epoch": 0.69894336951733, "grad_norm": 0.6835107207298279, "learning_rate": 4.14884223764058e-06, "loss": 0.2053, "step": 38710 }, { "epoch": 0.6990336489502308, "grad_norm": 0.18289843201637268, "learning_rate": 4.146542422602372e-06, "loss": 0.1904, "step": 38715 }, { "epoch": 0.6991239283831314, "grad_norm": 1.0548094511032104, "learning_rate": 4.144243078432263e-06, "loss": 0.2543, "step": 38720 }, { "epoch": 0.6992142078160322, "grad_norm": 0.4621219336986542, "learning_rate": 4.141944205315223e-06, "loss": 0.1281, "step": 38725 }, { "epoch": 0.6993044872489329, "grad_norm": 0.3635997474193573, "learning_rate": 4.13964580343618e-06, "loss": 0.346, "step": 38730 }, { "epoch": 0.6993947666818336, "grad_norm": 0.5849667191505432, "learning_rate": 4.137347872980026e-06, "loss": 0.2862, "step": 38735 }, { "epoch": 0.6994850461147343, "grad_norm": 0.32722166180610657, "learning_rate": 4.135050414131606e-06, "loss": 0.2472, "step": 38740 }, { "epoch": 0.699575325547635, "grad_norm": 0.39641356468200684, "learning_rate": 4.132753427075743e-06, "loss": 0.1414, "step": 38745 }, { "epoch": 0.6996656049805358, "grad_norm": 0.8139769434928894, "learning_rate": 4.130456911997202e-06, "loss": 0.3283, "step": 38750 }, { "epoch": 0.6997558844134365, "grad_norm": 0.7956682443618774, "learning_rate": 4.12816086908073e-06, "loss": 0.1844, "step": 38755 }, { "epoch": 0.6998461638463371, "grad_norm": 0.4433048963546753, "learning_rate": 4.125865298511022e-06, "loss": 0.391, "step": 38760 }, { "epoch": 0.6999364432792379, "grad_norm": 0.4708251655101776, "learning_rate": 4.1235702004727444e-06, "loss": 0.2378, "step": 38765 }, { "epoch": 0.7000267227121386, "grad_norm": 0.6387839317321777, "learning_rate": 4.1212755751505184e-06, "loss": 0.2308, "step": 38770 }, { "epoch": 0.7001170021450394, "grad_norm": 0.4458686113357544, "learning_rate": 4.118981422728927e-06, "loss": 0.2072, "step": 38775 }, { "epoch": 0.70020728157794, "grad_norm": 0.6436731219291687, "learning_rate": 4.116687743392524e-06, "loss": 0.3366, "step": 38780 }, { "epoch": 0.7002975610108407, "grad_norm": 0.5403485894203186, "learning_rate": 4.1143945373258145e-06, "loss": 0.1223, "step": 38785 }, { "epoch": 0.7003878404437415, "grad_norm": 0.49621668457984924, "learning_rate": 4.11210180471327e-06, "loss": 0.2061, "step": 38790 }, { "epoch": 0.7004781198766422, "grad_norm": 0.2502276301383972, "learning_rate": 4.109809545739327e-06, "loss": 0.3227, "step": 38795 }, { "epoch": 0.7005683993095428, "grad_norm": 0.34249821305274963, "learning_rate": 4.107517760588384e-06, "loss": 0.2294, "step": 38800 }, { "epoch": 0.7006586787424436, "grad_norm": 0.3874669671058655, "learning_rate": 4.10522644944479e-06, "loss": 0.3392, "step": 38805 }, { "epoch": 0.7007489581753443, "grad_norm": 0.48335301876068115, "learning_rate": 4.102935612492874e-06, "loss": 0.2238, "step": 38810 }, { "epoch": 0.7008392376082451, "grad_norm": 0.12804335355758667, "learning_rate": 4.100645249916908e-06, "loss": 0.1596, "step": 38815 }, { "epoch": 0.7009295170411457, "grad_norm": 0.4075047969818115, "learning_rate": 4.0983553619011395e-06, "loss": 0.1921, "step": 38820 }, { "epoch": 0.7010197964740464, "grad_norm": 0.7569358348846436, "learning_rate": 4.096065948629773e-06, "loss": 0.3234, "step": 38825 }, { "epoch": 0.7011100759069472, "grad_norm": 0.375936359167099, "learning_rate": 4.093777010286978e-06, "loss": 0.3345, "step": 38830 }, { "epoch": 0.7012003553398479, "grad_norm": 0.6364753842353821, "learning_rate": 4.091488547056876e-06, "loss": 0.2591, "step": 38835 }, { "epoch": 0.7012906347727487, "grad_norm": 0.3923836648464203, "learning_rate": 4.089200559123566e-06, "loss": 0.1587, "step": 38840 }, { "epoch": 0.7013809142056493, "grad_norm": 0.4723087251186371, "learning_rate": 4.086913046671093e-06, "loss": 0.1805, "step": 38845 }, { "epoch": 0.70147119363855, "grad_norm": 0.41670605540275574, "learning_rate": 4.0846260098834745e-06, "loss": 0.1714, "step": 38850 }, { "epoch": 0.7015614730714508, "grad_norm": 0.17346037924289703, "learning_rate": 4.082339448944682e-06, "loss": 0.2007, "step": 38855 }, { "epoch": 0.7016517525043515, "grad_norm": 0.3650917410850525, "learning_rate": 4.080053364038655e-06, "loss": 0.2103, "step": 38860 }, { "epoch": 0.7017420319372522, "grad_norm": 0.33513301610946655, "learning_rate": 4.077767755349297e-06, "loss": 0.3319, "step": 38865 }, { "epoch": 0.7018323113701529, "grad_norm": 0.3749558627605438, "learning_rate": 4.075482623060459e-06, "loss": 0.2826, "step": 38870 }, { "epoch": 0.7019225908030536, "grad_norm": 0.3748433589935303, "learning_rate": 4.073197967355974e-06, "loss": 0.2341, "step": 38875 }, { "epoch": 0.7020128702359544, "grad_norm": 0.3942178189754486, "learning_rate": 4.070913788419616e-06, "loss": 0.2216, "step": 38880 }, { "epoch": 0.702103149668855, "grad_norm": 1.1604423522949219, "learning_rate": 4.068630086435136e-06, "loss": 0.2147, "step": 38885 }, { "epoch": 0.7021934291017558, "grad_norm": 0.6453258991241455, "learning_rate": 4.066346861586239e-06, "loss": 0.2069, "step": 38890 }, { "epoch": 0.7022837085346565, "grad_norm": 0.4149574637413025, "learning_rate": 4.064064114056599e-06, "loss": 0.1968, "step": 38895 }, { "epoch": 0.7023739879675572, "grad_norm": 0.23456250131130219, "learning_rate": 4.0617818440298406e-06, "loss": 0.246, "step": 38900 }, { "epoch": 0.7024642674004579, "grad_norm": 0.20911572873592377, "learning_rate": 4.05950005168956e-06, "loss": 0.169, "step": 38905 }, { "epoch": 0.7025545468333586, "grad_norm": 0.36427533626556396, "learning_rate": 4.057218737219305e-06, "loss": 0.191, "step": 38910 }, { "epoch": 0.7026448262662593, "grad_norm": 0.47539886832237244, "learning_rate": 4.054937900802598e-06, "loss": 0.1408, "step": 38915 }, { "epoch": 0.7027351056991601, "grad_norm": 0.3398011326789856, "learning_rate": 4.0526575426229086e-06, "loss": 0.1917, "step": 38920 }, { "epoch": 0.7028253851320607, "grad_norm": 0.3850838840007782, "learning_rate": 4.0503776628636796e-06, "loss": 0.1996, "step": 38925 }, { "epoch": 0.7029156645649615, "grad_norm": 0.49528321623802185, "learning_rate": 4.048098261708309e-06, "loss": 0.2035, "step": 38930 }, { "epoch": 0.7030059439978622, "grad_norm": 0.5522703528404236, "learning_rate": 4.045819339340161e-06, "loss": 0.2571, "step": 38935 }, { "epoch": 0.703096223430763, "grad_norm": 0.5673417448997498, "learning_rate": 4.043540895942554e-06, "loss": 0.292, "step": 38940 }, { "epoch": 0.7031865028636636, "grad_norm": 0.2567455470561981, "learning_rate": 4.041262931698778e-06, "loss": 0.2587, "step": 38945 }, { "epoch": 0.7032767822965643, "grad_norm": 0.20778410136699677, "learning_rate": 4.0389854467920695e-06, "loss": 0.258, "step": 38950 }, { "epoch": 0.7033670617294651, "grad_norm": 0.3900172710418701, "learning_rate": 4.0367084414056435e-06, "loss": 0.1686, "step": 38955 }, { "epoch": 0.7034573411623658, "grad_norm": 0.5585681200027466, "learning_rate": 4.034431915722669e-06, "loss": 0.2299, "step": 38960 }, { "epoch": 0.7035476205952664, "grad_norm": 0.2830726206302643, "learning_rate": 4.032155869926268e-06, "loss": 0.2741, "step": 38965 }, { "epoch": 0.7036379000281672, "grad_norm": 0.4869314432144165, "learning_rate": 4.029880304199541e-06, "loss": 0.2591, "step": 38970 }, { "epoch": 0.7037281794610679, "grad_norm": 0.4794871211051941, "learning_rate": 4.027605218725533e-06, "loss": 0.247, "step": 38975 }, { "epoch": 0.7038184588939687, "grad_norm": 0.6267117857933044, "learning_rate": 4.025330613687266e-06, "loss": 0.2555, "step": 38980 }, { "epoch": 0.7039087383268693, "grad_norm": 0.26632171869277954, "learning_rate": 4.023056489267707e-06, "loss": 0.2156, "step": 38985 }, { "epoch": 0.70399901775977, "grad_norm": 0.33814334869384766, "learning_rate": 4.020782845649798e-06, "loss": 0.1978, "step": 38990 }, { "epoch": 0.7040892971926708, "grad_norm": 0.21557562053203583, "learning_rate": 4.0185096830164335e-06, "loss": 0.1851, "step": 38995 }, { "epoch": 0.7041795766255715, "grad_norm": 0.5119245052337646, "learning_rate": 4.01623700155048e-06, "loss": 0.24, "step": 39000 }, { "epoch": 0.7042698560584721, "grad_norm": 0.2194567173719406, "learning_rate": 4.013964801434751e-06, "loss": 0.1603, "step": 39005 }, { "epoch": 0.7043601354913729, "grad_norm": 0.6126335859298706, "learning_rate": 4.011693082852034e-06, "loss": 0.2107, "step": 39010 }, { "epoch": 0.7044504149242736, "grad_norm": 0.5091403126716614, "learning_rate": 4.009421845985065e-06, "loss": 0.1837, "step": 39015 }, { "epoch": 0.7045406943571744, "grad_norm": 0.4852842092514038, "learning_rate": 4.0071510910165545e-06, "loss": 0.2375, "step": 39020 }, { "epoch": 0.704630973790075, "grad_norm": 0.5118070244789124, "learning_rate": 4.004880818129166e-06, "loss": 0.1604, "step": 39025 }, { "epoch": 0.7047212532229757, "grad_norm": 0.6322970986366272, "learning_rate": 4.002611027505529e-06, "loss": 0.286, "step": 39030 }, { "epoch": 0.7048115326558765, "grad_norm": 0.3018209934234619, "learning_rate": 4.000341719328227e-06, "loss": 0.2361, "step": 39035 }, { "epoch": 0.7049018120887772, "grad_norm": 0.5549687147140503, "learning_rate": 3.9980728937798155e-06, "loss": 0.224, "step": 39040 }, { "epoch": 0.7049920915216779, "grad_norm": 0.34325161576271057, "learning_rate": 3.995804551042798e-06, "loss": 0.2376, "step": 39045 }, { "epoch": 0.7050823709545786, "grad_norm": 0.31609299778938293, "learning_rate": 3.993536691299653e-06, "loss": 0.2662, "step": 39050 }, { "epoch": 0.7051726503874793, "grad_norm": 0.4124249815940857, "learning_rate": 3.991269314732806e-06, "loss": 0.217, "step": 39055 }, { "epoch": 0.7052629298203801, "grad_norm": 0.27731990814208984, "learning_rate": 3.989002421524656e-06, "loss": 0.2298, "step": 39060 }, { "epoch": 0.7053532092532807, "grad_norm": 0.6760087013244629, "learning_rate": 3.9867360118575605e-06, "loss": 0.3089, "step": 39065 }, { "epoch": 0.7054434886861815, "grad_norm": 0.6529771685600281, "learning_rate": 3.984470085913828e-06, "loss": 0.3373, "step": 39070 }, { "epoch": 0.7055337681190822, "grad_norm": 0.3386461138725281, "learning_rate": 3.982204643875744e-06, "loss": 0.2443, "step": 39075 }, { "epoch": 0.7056240475519829, "grad_norm": 0.22765114903450012, "learning_rate": 3.979939685925538e-06, "loss": 0.219, "step": 39080 }, { "epoch": 0.7057143269848836, "grad_norm": 0.307463675737381, "learning_rate": 3.9776752122454154e-06, "loss": 0.2476, "step": 39085 }, { "epoch": 0.7058046064177843, "grad_norm": 0.7050151228904724, "learning_rate": 3.975411223017536e-06, "loss": 0.2473, "step": 39090 }, { "epoch": 0.705894885850685, "grad_norm": 0.636820912361145, "learning_rate": 3.973147718424023e-06, "loss": 0.1865, "step": 39095 }, { "epoch": 0.7059851652835858, "grad_norm": 0.4979107975959778, "learning_rate": 3.970884698646953e-06, "loss": 0.1859, "step": 39100 }, { "epoch": 0.7060754447164864, "grad_norm": 0.2656373381614685, "learning_rate": 3.968622163868378e-06, "loss": 0.1779, "step": 39105 }, { "epoch": 0.7061657241493872, "grad_norm": 0.43536001443862915, "learning_rate": 3.966360114270292e-06, "loss": 0.2214, "step": 39110 }, { "epoch": 0.7062560035822879, "grad_norm": 0.49916452169418335, "learning_rate": 3.964098550034671e-06, "loss": 0.1997, "step": 39115 }, { "epoch": 0.7063462830151886, "grad_norm": 0.29856258630752563, "learning_rate": 3.9618374713434295e-06, "loss": 0.1341, "step": 39120 }, { "epoch": 0.7064365624480893, "grad_norm": 0.35987964272499084, "learning_rate": 3.959576878378468e-06, "loss": 0.2526, "step": 39125 }, { "epoch": 0.70652684188099, "grad_norm": 0.25284314155578613, "learning_rate": 3.957316771321625e-06, "loss": 0.1639, "step": 39130 }, { "epoch": 0.7066171213138908, "grad_norm": 0.536403238773346, "learning_rate": 3.955057150354717e-06, "loss": 0.2413, "step": 39135 }, { "epoch": 0.7067074007467915, "grad_norm": 0.4459882378578186, "learning_rate": 3.952798015659511e-06, "loss": 0.281, "step": 39140 }, { "epoch": 0.7067976801796921, "grad_norm": 0.4276878237724304, "learning_rate": 3.950539367417733e-06, "loss": 0.2388, "step": 39145 }, { "epoch": 0.7068879596125929, "grad_norm": 0.3131605386734009, "learning_rate": 3.948281205811078e-06, "loss": 0.1389, "step": 39150 }, { "epoch": 0.7069782390454936, "grad_norm": 0.5553649067878723, "learning_rate": 3.946023531021201e-06, "loss": 0.2079, "step": 39155 }, { "epoch": 0.7070685184783944, "grad_norm": 0.7444565892219543, "learning_rate": 3.943766343229718e-06, "loss": 0.2878, "step": 39160 }, { "epoch": 0.707158797911295, "grad_norm": 0.6670951843261719, "learning_rate": 3.9415096426181955e-06, "loss": 0.1765, "step": 39165 }, { "epoch": 0.7072490773441957, "grad_norm": 0.5706706643104553, "learning_rate": 3.939253429368177e-06, "loss": 0.1805, "step": 39170 }, { "epoch": 0.7073393567770965, "grad_norm": 0.34905120730400085, "learning_rate": 3.9369977036611494e-06, "loss": 0.1743, "step": 39175 }, { "epoch": 0.7074296362099972, "grad_norm": 0.5026421546936035, "learning_rate": 3.934742465678579e-06, "loss": 0.2911, "step": 39180 }, { "epoch": 0.7075199156428978, "grad_norm": 0.44103628396987915, "learning_rate": 3.932487715601875e-06, "loss": 0.2584, "step": 39185 }, { "epoch": 0.7076101950757986, "grad_norm": 0.16476301848888397, "learning_rate": 3.9302334536124186e-06, "loss": 0.25, "step": 39190 }, { "epoch": 0.7077004745086993, "grad_norm": 0.5779272317886353, "learning_rate": 3.92797967989155e-06, "loss": 0.1826, "step": 39195 }, { "epoch": 0.7077907539416001, "grad_norm": 0.3234092593193054, "learning_rate": 3.92572639462057e-06, "loss": 0.2257, "step": 39200 }, { "epoch": 0.7078810333745007, "grad_norm": 0.40954142808914185, "learning_rate": 3.923473597980736e-06, "loss": 0.2394, "step": 39205 }, { "epoch": 0.7079713128074014, "grad_norm": 0.41970279812812805, "learning_rate": 3.921221290153273e-06, "loss": 0.1453, "step": 39210 }, { "epoch": 0.7080615922403022, "grad_norm": 0.5298666954040527, "learning_rate": 3.918969471319357e-06, "loss": 0.2304, "step": 39215 }, { "epoch": 0.7081518716732029, "grad_norm": 0.40422743558883667, "learning_rate": 3.916718141660133e-06, "loss": 0.28, "step": 39220 }, { "epoch": 0.7082421511061036, "grad_norm": 0.517111599445343, "learning_rate": 3.914467301356706e-06, "loss": 0.2784, "step": 39225 }, { "epoch": 0.7083324305390043, "grad_norm": 0.36019113659858704, "learning_rate": 3.912216950590141e-06, "loss": 0.1932, "step": 39230 }, { "epoch": 0.708422709971905, "grad_norm": 0.46564367413520813, "learning_rate": 3.90996708954146e-06, "loss": 0.1829, "step": 39235 }, { "epoch": 0.7085129894048058, "grad_norm": 0.3894612193107605, "learning_rate": 3.907717718391645e-06, "loss": 0.159, "step": 39240 }, { "epoch": 0.7086032688377065, "grad_norm": 0.4303584098815918, "learning_rate": 3.905468837321648e-06, "loss": 0.261, "step": 39245 }, { "epoch": 0.7086935482706072, "grad_norm": 0.5752771496772766, "learning_rate": 3.903220446512367e-06, "loss": 0.2723, "step": 39250 }, { "epoch": 0.7087838277035079, "grad_norm": 0.5472204089164734, "learning_rate": 3.900972546144674e-06, "loss": 0.1747, "step": 39255 }, { "epoch": 0.7088741071364086, "grad_norm": 0.5496330261230469, "learning_rate": 3.898725136399394e-06, "loss": 0.2243, "step": 39260 }, { "epoch": 0.7089643865693094, "grad_norm": 0.8196171522140503, "learning_rate": 3.8964782174573215e-06, "loss": 0.2567, "step": 39265 }, { "epoch": 0.70905466600221, "grad_norm": 0.5102502107620239, "learning_rate": 3.8942317894991946e-06, "loss": 0.2294, "step": 39270 }, { "epoch": 0.7091449454351108, "grad_norm": 0.3395414352416992, "learning_rate": 3.891985852705731e-06, "loss": 0.1976, "step": 39275 }, { "epoch": 0.7092352248680115, "grad_norm": 0.5208386778831482, "learning_rate": 3.889740407257593e-06, "loss": 0.2594, "step": 39280 }, { "epoch": 0.7093255043009122, "grad_norm": 0.6493802666664124, "learning_rate": 3.887495453335412e-06, "loss": 0.2917, "step": 39285 }, { "epoch": 0.7094157837338129, "grad_norm": 0.864579439163208, "learning_rate": 3.885250991119781e-06, "loss": 0.2464, "step": 39290 }, { "epoch": 0.7095060631667136, "grad_norm": 0.7394172549247742, "learning_rate": 3.883007020791253e-06, "loss": 0.1616, "step": 39295 }, { "epoch": 0.7095963425996143, "grad_norm": 0.341521292924881, "learning_rate": 3.880763542530331e-06, "loss": 0.135, "step": 39300 }, { "epoch": 0.7096866220325151, "grad_norm": 0.31069761514663696, "learning_rate": 3.878520556517495e-06, "loss": 0.1188, "step": 39305 }, { "epoch": 0.7097769014654157, "grad_norm": 0.2305673062801361, "learning_rate": 3.8762780629331685e-06, "loss": 0.1908, "step": 39310 }, { "epoch": 0.7098671808983165, "grad_norm": 0.6119630336761475, "learning_rate": 3.874036061957752e-06, "loss": 0.2356, "step": 39315 }, { "epoch": 0.7099574603312172, "grad_norm": 0.3898882567882538, "learning_rate": 3.871794553771588e-06, "loss": 0.2726, "step": 39320 }, { "epoch": 0.710047739764118, "grad_norm": 0.7064270973205566, "learning_rate": 3.8695535385550005e-06, "loss": 0.2798, "step": 39325 }, { "epoch": 0.7101380191970186, "grad_norm": 0.5756570100784302, "learning_rate": 3.86731301648826e-06, "loss": 0.2828, "step": 39330 }, { "epoch": 0.7102282986299193, "grad_norm": 0.35779905319213867, "learning_rate": 3.865072987751594e-06, "loss": 0.1907, "step": 39335 }, { "epoch": 0.7103185780628201, "grad_norm": 0.31914791464805603, "learning_rate": 3.862833452525206e-06, "loss": 0.2182, "step": 39340 }, { "epoch": 0.7104088574957208, "grad_norm": 0.7411733269691467, "learning_rate": 3.86059441098924e-06, "loss": 0.2162, "step": 39345 }, { "epoch": 0.7104991369286214, "grad_norm": 0.4457027018070221, "learning_rate": 3.858355863323816e-06, "loss": 0.2336, "step": 39350 }, { "epoch": 0.7105894163615222, "grad_norm": 0.4871555268764496, "learning_rate": 3.856117809709009e-06, "loss": 0.2216, "step": 39355 }, { "epoch": 0.7106796957944229, "grad_norm": 0.725056529045105, "learning_rate": 3.853880250324858e-06, "loss": 0.2181, "step": 39360 }, { "epoch": 0.7107699752273237, "grad_norm": 0.24869728088378906, "learning_rate": 3.85164318535135e-06, "loss": 0.188, "step": 39365 }, { "epoch": 0.7108602546602243, "grad_norm": 0.34141483902931213, "learning_rate": 3.84940661496845e-06, "loss": 0.2649, "step": 39370 }, { "epoch": 0.710950534093125, "grad_norm": 0.6450366973876953, "learning_rate": 3.847170539356063e-06, "loss": 0.3282, "step": 39375 }, { "epoch": 0.7110408135260258, "grad_norm": 0.5957616567611694, "learning_rate": 3.844934958694075e-06, "loss": 0.2584, "step": 39380 }, { "epoch": 0.7111310929589265, "grad_norm": 0.5537654757499695, "learning_rate": 3.842699873162311e-06, "loss": 0.2418, "step": 39385 }, { "epoch": 0.7112213723918271, "grad_norm": 0.1669919788837433, "learning_rate": 3.840465282940581e-06, "loss": 0.1438, "step": 39390 }, { "epoch": 0.7113116518247279, "grad_norm": 0.2690705358982086, "learning_rate": 3.838231188208632e-06, "loss": 0.2292, "step": 39395 }, { "epoch": 0.7114019312576286, "grad_norm": 0.2518727779388428, "learning_rate": 3.835997589146187e-06, "loss": 0.1737, "step": 39400 }, { "epoch": 0.7114922106905294, "grad_norm": 0.5274950861930847, "learning_rate": 3.833764485932915e-06, "loss": 0.252, "step": 39405 }, { "epoch": 0.71158249012343, "grad_norm": 0.6367514133453369, "learning_rate": 3.8315318787484615e-06, "loss": 0.2363, "step": 39410 }, { "epoch": 0.7116727695563307, "grad_norm": 0.32732975482940674, "learning_rate": 3.829299767772416e-06, "loss": 0.226, "step": 39415 }, { "epoch": 0.7117630489892315, "grad_norm": 0.338902086019516, "learning_rate": 3.827068153184338e-06, "loss": 0.1735, "step": 39420 }, { "epoch": 0.7118533284221322, "grad_norm": 0.7373274564743042, "learning_rate": 3.824837035163748e-06, "loss": 0.2093, "step": 39425 }, { "epoch": 0.7119436078550329, "grad_norm": 0.36015719175338745, "learning_rate": 3.82260641389012e-06, "loss": 0.1947, "step": 39430 }, { "epoch": 0.7120338872879336, "grad_norm": 0.6400065422058105, "learning_rate": 3.820376289542893e-06, "loss": 0.143, "step": 39435 }, { "epoch": 0.7121241667208343, "grad_norm": 0.4007337987422943, "learning_rate": 3.8181466623014616e-06, "loss": 0.3211, "step": 39440 }, { "epoch": 0.7122144461537351, "grad_norm": 0.3060721457004547, "learning_rate": 3.815917532345188e-06, "loss": 0.1998, "step": 39445 }, { "epoch": 0.7123047255866357, "grad_norm": 0.5131520628929138, "learning_rate": 3.813688899853378e-06, "loss": 0.2637, "step": 39450 }, { "epoch": 0.7123950050195365, "grad_norm": 0.6646853089332581, "learning_rate": 3.811460765005326e-06, "loss": 0.336, "step": 39455 }, { "epoch": 0.7124852844524372, "grad_norm": 0.5253194570541382, "learning_rate": 3.8092331279802554e-06, "loss": 0.2323, "step": 39460 }, { "epoch": 0.7125755638853379, "grad_norm": 0.59714674949646, "learning_rate": 3.807005988957373e-06, "loss": 0.1693, "step": 39465 }, { "epoch": 0.7126658433182386, "grad_norm": 0.3074837028980255, "learning_rate": 3.8047793481158267e-06, "loss": 0.1404, "step": 39470 }, { "epoch": 0.7127561227511393, "grad_norm": 0.5420733094215393, "learning_rate": 3.8025532056347426e-06, "loss": 0.3196, "step": 39475 }, { "epoch": 0.71284640218404, "grad_norm": 0.423129677772522, "learning_rate": 3.80032756169319e-06, "loss": 0.248, "step": 39480 }, { "epoch": 0.7129366816169408, "grad_norm": 0.4089096486568451, "learning_rate": 3.7981024164702083e-06, "loss": 0.2264, "step": 39485 }, { "epoch": 0.7130269610498414, "grad_norm": 0.3753942847251892, "learning_rate": 3.7958777701447956e-06, "loss": 0.2404, "step": 39490 }, { "epoch": 0.7131172404827422, "grad_norm": 0.4123792052268982, "learning_rate": 3.793653622895912e-06, "loss": 0.2162, "step": 39495 }, { "epoch": 0.7132075199156429, "grad_norm": 0.4545404613018036, "learning_rate": 3.7914299749024652e-06, "loss": 0.2731, "step": 39500 }, { "epoch": 0.7132977993485436, "grad_norm": 0.5128106474876404, "learning_rate": 3.789206826343341e-06, "loss": 0.2263, "step": 39505 }, { "epoch": 0.7133880787814443, "grad_norm": 0.5913051962852478, "learning_rate": 3.7869841773973703e-06, "loss": 0.2951, "step": 39510 }, { "epoch": 0.713478358214345, "grad_norm": 0.429202675819397, "learning_rate": 3.7847620282433416e-06, "loss": 0.2091, "step": 39515 }, { "epoch": 0.7135686376472458, "grad_norm": 0.45054930448532104, "learning_rate": 3.782540379060027e-06, "loss": 0.2771, "step": 39520 }, { "epoch": 0.7136589170801465, "grad_norm": 0.29461824893951416, "learning_rate": 3.7803192300261294e-06, "loss": 0.238, "step": 39525 }, { "epoch": 0.7137491965130471, "grad_norm": 0.4052782952785492, "learning_rate": 3.778098581320332e-06, "loss": 0.2751, "step": 39530 }, { "epoch": 0.7138394759459479, "grad_norm": 0.4605269134044647, "learning_rate": 3.775878433121262e-06, "loss": 0.223, "step": 39535 }, { "epoch": 0.7139297553788486, "grad_norm": 0.5917302966117859, "learning_rate": 3.7736587856075224e-06, "loss": 0.2594, "step": 39540 }, { "epoch": 0.7140200348117494, "grad_norm": 0.4540460705757141, "learning_rate": 3.77143963895766e-06, "loss": 0.2928, "step": 39545 }, { "epoch": 0.71411031424465, "grad_norm": 0.8969790935516357, "learning_rate": 3.769220993350193e-06, "loss": 0.1789, "step": 39550 }, { "epoch": 0.7142005936775507, "grad_norm": 0.4711844027042389, "learning_rate": 3.7670028489635947e-06, "loss": 0.2453, "step": 39555 }, { "epoch": 0.7142908731104515, "grad_norm": 0.7896589040756226, "learning_rate": 3.7647852059763024e-06, "loss": 0.2987, "step": 39560 }, { "epoch": 0.7143811525433522, "grad_norm": 0.6333085298538208, "learning_rate": 3.762568064566702e-06, "loss": 0.295, "step": 39565 }, { "epoch": 0.7144714319762528, "grad_norm": 0.3821159303188324, "learning_rate": 3.7603514249131545e-06, "loss": 0.1993, "step": 39570 }, { "epoch": 0.7145617114091536, "grad_norm": 0.3645440340042114, "learning_rate": 3.7581352871939636e-06, "loss": 0.2361, "step": 39575 }, { "epoch": 0.7146519908420543, "grad_norm": 0.7931193113327026, "learning_rate": 3.7559196515874073e-06, "loss": 0.2554, "step": 39580 }, { "epoch": 0.7147422702749551, "grad_norm": 0.6085796356201172, "learning_rate": 3.7537045182717157e-06, "loss": 0.3057, "step": 39585 }, { "epoch": 0.7148325497078557, "grad_norm": 0.4631308913230896, "learning_rate": 3.7514898874250837e-06, "loss": 0.2324, "step": 39590 }, { "epoch": 0.7149228291407564, "grad_norm": 0.4935637414455414, "learning_rate": 3.749275759225657e-06, "loss": 0.2384, "step": 39595 }, { "epoch": 0.7150131085736572, "grad_norm": 0.6024258136749268, "learning_rate": 3.7470621338515513e-06, "loss": 0.3232, "step": 39600 }, { "epoch": 0.7151033880065579, "grad_norm": 0.41318437457084656, "learning_rate": 3.7448490114808332e-06, "loss": 0.2129, "step": 39605 }, { "epoch": 0.7151936674394586, "grad_norm": 0.6920496821403503, "learning_rate": 3.7426363922915287e-06, "loss": 0.2365, "step": 39610 }, { "epoch": 0.7152839468723593, "grad_norm": 0.6129166483879089, "learning_rate": 3.7404242764616315e-06, "loss": 0.1805, "step": 39615 }, { "epoch": 0.71537422630526, "grad_norm": 0.28114911913871765, "learning_rate": 3.738212664169091e-06, "loss": 0.261, "step": 39620 }, { "epoch": 0.7154645057381608, "grad_norm": 0.34122586250305176, "learning_rate": 3.736001555591816e-06, "loss": 0.2624, "step": 39625 }, { "epoch": 0.7155547851710615, "grad_norm": 0.5609122514724731, "learning_rate": 3.7337909509076696e-06, "loss": 0.1916, "step": 39630 }, { "epoch": 0.7156450646039622, "grad_norm": 0.45785167813301086, "learning_rate": 3.731580850294485e-06, "loss": 0.2253, "step": 39635 }, { "epoch": 0.7157353440368629, "grad_norm": 0.32274383306503296, "learning_rate": 3.729371253930041e-06, "loss": 0.2932, "step": 39640 }, { "epoch": 0.7158256234697636, "grad_norm": 0.6230811476707458, "learning_rate": 3.7271621619920915e-06, "loss": 0.2504, "step": 39645 }, { "epoch": 0.7159159029026644, "grad_norm": 0.7151699662208557, "learning_rate": 3.724953574658331e-06, "loss": 0.1631, "step": 39650 }, { "epoch": 0.716006182335565, "grad_norm": 0.5752794146537781, "learning_rate": 3.722745492106439e-06, "loss": 0.231, "step": 39655 }, { "epoch": 0.7160964617684658, "grad_norm": 0.6475176215171814, "learning_rate": 3.7205379145140286e-06, "loss": 0.2439, "step": 39660 }, { "epoch": 0.7161867412013665, "grad_norm": 0.6292634606361389, "learning_rate": 3.7183308420586895e-06, "loss": 0.2328, "step": 39665 }, { "epoch": 0.7162770206342672, "grad_norm": 0.34139055013656616, "learning_rate": 3.7161242749179593e-06, "loss": 0.1496, "step": 39670 }, { "epoch": 0.7163673000671679, "grad_norm": 0.6831566691398621, "learning_rate": 3.7139182132693463e-06, "loss": 0.1922, "step": 39675 }, { "epoch": 0.7164575795000686, "grad_norm": 0.26857325434684753, "learning_rate": 3.711712657290305e-06, "loss": 0.2363, "step": 39680 }, { "epoch": 0.7165478589329694, "grad_norm": 0.24632221460342407, "learning_rate": 3.7095076071582605e-06, "loss": 0.1825, "step": 39685 }, { "epoch": 0.7166381383658701, "grad_norm": 0.556376039981842, "learning_rate": 3.7073030630505923e-06, "loss": 0.1967, "step": 39690 }, { "epoch": 0.7167284177987707, "grad_norm": 0.26366910338401794, "learning_rate": 3.705099025144644e-06, "loss": 0.2337, "step": 39695 }, { "epoch": 0.7168186972316715, "grad_norm": 0.37259313464164734, "learning_rate": 3.702895493617711e-06, "loss": 0.2366, "step": 39700 }, { "epoch": 0.7169089766645722, "grad_norm": 0.4399190843105316, "learning_rate": 3.700692468647047e-06, "loss": 0.2512, "step": 39705 }, { "epoch": 0.716999256097473, "grad_norm": 0.5776998400688171, "learning_rate": 3.698489950409878e-06, "loss": 0.2542, "step": 39710 }, { "epoch": 0.7170895355303736, "grad_norm": 0.7476668953895569, "learning_rate": 3.6962879390833684e-06, "loss": 0.2446, "step": 39715 }, { "epoch": 0.7171798149632743, "grad_norm": 0.6083337664604187, "learning_rate": 3.6940864348446693e-06, "loss": 0.2465, "step": 39720 }, { "epoch": 0.7172700943961751, "grad_norm": 0.3944800794124603, "learning_rate": 3.691885437870865e-06, "loss": 0.1738, "step": 39725 }, { "epoch": 0.7173603738290758, "grad_norm": 0.7190540432929993, "learning_rate": 3.689684948339014e-06, "loss": 0.2022, "step": 39730 }, { "epoch": 0.7174506532619764, "grad_norm": 0.7269082069396973, "learning_rate": 3.6874849664261268e-06, "loss": 0.279, "step": 39735 }, { "epoch": 0.7175409326948772, "grad_norm": 0.4217861294746399, "learning_rate": 3.6852854923091806e-06, "loss": 0.1765, "step": 39740 }, { "epoch": 0.7176312121277779, "grad_norm": 0.5064873695373535, "learning_rate": 3.6830865261651015e-06, "loss": 0.2081, "step": 39745 }, { "epoch": 0.7177214915606787, "grad_norm": 0.626589298248291, "learning_rate": 3.6808880681707827e-06, "loss": 0.2431, "step": 39750 }, { "epoch": 0.7178117709935793, "grad_norm": 0.3502759337425232, "learning_rate": 3.678690118503073e-06, "loss": 0.1681, "step": 39755 }, { "epoch": 0.71790205042648, "grad_norm": 0.661049485206604, "learning_rate": 3.6764926773387876e-06, "loss": 0.2551, "step": 39760 }, { "epoch": 0.7179923298593808, "grad_norm": 0.3535442054271698, "learning_rate": 3.6742957448546867e-06, "loss": 0.2815, "step": 39765 }, { "epoch": 0.7180826092922815, "grad_norm": 0.47060221433639526, "learning_rate": 3.6720993212275037e-06, "loss": 0.3189, "step": 39770 }, { "epoch": 0.7181728887251821, "grad_norm": 0.8172930479049683, "learning_rate": 3.669903406633919e-06, "loss": 0.2129, "step": 39775 }, { "epoch": 0.7182631681580829, "grad_norm": 0.23240995407104492, "learning_rate": 3.6677080012505805e-06, "loss": 0.2062, "step": 39780 }, { "epoch": 0.7183534475909836, "grad_norm": 0.8371350169181824, "learning_rate": 3.665513105254097e-06, "loss": 0.2468, "step": 39785 }, { "epoch": 0.7184437270238844, "grad_norm": 0.5324345827102661, "learning_rate": 3.663318718821025e-06, "loss": 0.2794, "step": 39790 }, { "epoch": 0.718534006456785, "grad_norm": 0.2858458161354065, "learning_rate": 3.6611248421278923e-06, "loss": 0.2306, "step": 39795 }, { "epoch": 0.7186242858896857, "grad_norm": 0.38325032591819763, "learning_rate": 3.658931475351175e-06, "loss": 0.233, "step": 39800 }, { "epoch": 0.7187145653225865, "grad_norm": 0.5278452038764954, "learning_rate": 3.6567386186673203e-06, "loss": 0.1667, "step": 39805 }, { "epoch": 0.7188048447554872, "grad_norm": 0.47225314378738403, "learning_rate": 3.654546272252719e-06, "loss": 0.1887, "step": 39810 }, { "epoch": 0.7188951241883879, "grad_norm": 0.35314756631851196, "learning_rate": 3.652354436283736e-06, "loss": 0.1849, "step": 39815 }, { "epoch": 0.7189854036212886, "grad_norm": 0.5568872094154358, "learning_rate": 3.6501631109366855e-06, "loss": 0.1449, "step": 39820 }, { "epoch": 0.7190756830541893, "grad_norm": 0.6113433241844177, "learning_rate": 3.6479722963878483e-06, "loss": 0.2002, "step": 39825 }, { "epoch": 0.7191659624870901, "grad_norm": 0.5883828997612, "learning_rate": 3.6457819928134542e-06, "loss": 0.304, "step": 39830 }, { "epoch": 0.7192562419199907, "grad_norm": 0.5464839339256287, "learning_rate": 3.6435922003897006e-06, "loss": 0.2504, "step": 39835 }, { "epoch": 0.7193465213528915, "grad_norm": 0.36738574504852295, "learning_rate": 3.641402919292737e-06, "loss": 0.1838, "step": 39840 }, { "epoch": 0.7194368007857922, "grad_norm": 0.3718404769897461, "learning_rate": 3.6392141496986766e-06, "loss": 0.2372, "step": 39845 }, { "epoch": 0.7195270802186929, "grad_norm": 0.3584044277667999, "learning_rate": 3.63702589178359e-06, "loss": 0.1749, "step": 39850 }, { "epoch": 0.7196173596515936, "grad_norm": 0.5096855163574219, "learning_rate": 3.6348381457235115e-06, "loss": 0.247, "step": 39855 }, { "epoch": 0.7197076390844943, "grad_norm": 0.4133935272693634, "learning_rate": 3.6326509116944208e-06, "loss": 0.2273, "step": 39860 }, { "epoch": 0.719797918517395, "grad_norm": 0.6275957226753235, "learning_rate": 3.6304641898722726e-06, "loss": 0.2563, "step": 39865 }, { "epoch": 0.7198881979502958, "grad_norm": 0.480309396982193, "learning_rate": 3.6282779804329658e-06, "loss": 0.1821, "step": 39870 }, { "epoch": 0.7199784773831964, "grad_norm": 0.4104933738708496, "learning_rate": 3.6260922835523726e-06, "loss": 0.2553, "step": 39875 }, { "epoch": 0.7200687568160972, "grad_norm": 0.5115457773208618, "learning_rate": 3.6239070994063087e-06, "loss": 0.2359, "step": 39880 }, { "epoch": 0.7201590362489979, "grad_norm": 0.15795692801475525, "learning_rate": 3.62172242817056e-06, "loss": 0.1431, "step": 39885 }, { "epoch": 0.7202493156818986, "grad_norm": 0.3097035586833954, "learning_rate": 3.6195382700208704e-06, "loss": 0.2475, "step": 39890 }, { "epoch": 0.7203395951147993, "grad_norm": 0.5861602425575256, "learning_rate": 3.6173546251329337e-06, "loss": 0.2818, "step": 39895 }, { "epoch": 0.7204298745477, "grad_norm": 0.5597209334373474, "learning_rate": 3.6151714936824146e-06, "loss": 0.1739, "step": 39900 }, { "epoch": 0.7205201539806008, "grad_norm": 0.32762089371681213, "learning_rate": 3.612988875844923e-06, "loss": 0.1982, "step": 39905 }, { "epoch": 0.7206104334135015, "grad_norm": 0.4768185615539551, "learning_rate": 3.610806771796038e-06, "loss": 0.255, "step": 39910 }, { "epoch": 0.7207007128464021, "grad_norm": 0.6743978261947632, "learning_rate": 3.608625181711294e-06, "loss": 0.2388, "step": 39915 }, { "epoch": 0.7207909922793029, "grad_norm": 0.570248007774353, "learning_rate": 3.606444105766188e-06, "loss": 0.2112, "step": 39920 }, { "epoch": 0.7208812717122036, "grad_norm": 0.47991472482681274, "learning_rate": 3.604263544136165e-06, "loss": 0.2188, "step": 39925 }, { "epoch": 0.7209715511451044, "grad_norm": 0.3377138078212738, "learning_rate": 3.6020834969966423e-06, "loss": 0.192, "step": 39930 }, { "epoch": 0.721061830578005, "grad_norm": 0.2957462668418884, "learning_rate": 3.5999039645229806e-06, "loss": 0.2086, "step": 39935 }, { "epoch": 0.7211521100109057, "grad_norm": 0.3006899058818817, "learning_rate": 3.597724946890515e-06, "loss": 0.1798, "step": 39940 }, { "epoch": 0.7212423894438065, "grad_norm": 0.3699197471141815, "learning_rate": 3.595546444274526e-06, "loss": 0.244, "step": 39945 }, { "epoch": 0.7213326688767072, "grad_norm": 0.21937566995620728, "learning_rate": 3.5933684568502603e-06, "loss": 0.2092, "step": 39950 }, { "epoch": 0.7214229483096078, "grad_norm": 0.6600390672683716, "learning_rate": 3.5911909847929206e-06, "loss": 0.254, "step": 39955 }, { "epoch": 0.7215132277425086, "grad_norm": 0.9043104648590088, "learning_rate": 3.5890140282776743e-06, "loss": 0.2628, "step": 39960 }, { "epoch": 0.7216035071754093, "grad_norm": 0.4741593897342682, "learning_rate": 3.5868375874796326e-06, "loss": 0.2633, "step": 39965 }, { "epoch": 0.7216937866083101, "grad_norm": 0.40252596139907837, "learning_rate": 3.584661662573883e-06, "loss": 0.2537, "step": 39970 }, { "epoch": 0.7217840660412107, "grad_norm": 0.3973434865474701, "learning_rate": 3.582486253735454e-06, "loss": 0.1877, "step": 39975 }, { "epoch": 0.7218743454741114, "grad_norm": 0.42567405104637146, "learning_rate": 3.5803113611393482e-06, "loss": 0.1551, "step": 39980 }, { "epoch": 0.7219646249070122, "grad_norm": 0.7808625102043152, "learning_rate": 3.5781369849605197e-06, "loss": 0.287, "step": 39985 }, { "epoch": 0.7220549043399129, "grad_norm": 0.6534682512283325, "learning_rate": 3.575963125373876e-06, "loss": 0.2023, "step": 39990 }, { "epoch": 0.7221451837728136, "grad_norm": 0.3858139216899872, "learning_rate": 3.5737897825542954e-06, "loss": 0.1907, "step": 39995 }, { "epoch": 0.7222354632057143, "grad_norm": 0.2853589355945587, "learning_rate": 3.5716169566766e-06, "loss": 0.1961, "step": 40000 }, { "epoch": 0.722325742638615, "grad_norm": 0.5486742854118347, "learning_rate": 3.569444647915584e-06, "loss": 0.2662, "step": 40005 }, { "epoch": 0.7224160220715158, "grad_norm": 0.7488285899162292, "learning_rate": 3.5672728564459903e-06, "loss": 0.246, "step": 40010 }, { "epoch": 0.7225063015044164, "grad_norm": 0.45054709911346436, "learning_rate": 3.5651015824425226e-06, "loss": 0.199, "step": 40015 }, { "epoch": 0.7225965809373172, "grad_norm": 0.3069791793823242, "learning_rate": 3.5629308260798477e-06, "loss": 0.1964, "step": 40020 }, { "epoch": 0.7226868603702179, "grad_norm": 0.7028543949127197, "learning_rate": 3.560760587532589e-06, "loss": 0.2688, "step": 40025 }, { "epoch": 0.7227771398031186, "grad_norm": 0.3445713520050049, "learning_rate": 3.55859086697532e-06, "loss": 0.2132, "step": 40030 }, { "epoch": 0.7228674192360194, "grad_norm": 0.3838486671447754, "learning_rate": 3.5564216645825856e-06, "loss": 0.2215, "step": 40035 }, { "epoch": 0.72295769866892, "grad_norm": 0.5086146593093872, "learning_rate": 3.554252980528875e-06, "loss": 0.151, "step": 40040 }, { "epoch": 0.7230479781018208, "grad_norm": 0.4288047254085541, "learning_rate": 3.5520848149886467e-06, "loss": 0.183, "step": 40045 }, { "epoch": 0.7231382575347215, "grad_norm": 0.5946467518806458, "learning_rate": 3.5499171681363152e-06, "loss": 0.1896, "step": 40050 }, { "epoch": 0.7232285369676222, "grad_norm": 0.9705565571784973, "learning_rate": 3.547750040146254e-06, "loss": 0.2119, "step": 40055 }, { "epoch": 0.7233188164005229, "grad_norm": 0.2487785518169403, "learning_rate": 3.5455834311927895e-06, "loss": 0.2979, "step": 40060 }, { "epoch": 0.7234090958334236, "grad_norm": 0.5289493203163147, "learning_rate": 3.543417341450206e-06, "loss": 0.2717, "step": 40065 }, { "epoch": 0.7234993752663244, "grad_norm": 0.48151740431785583, "learning_rate": 3.5412517710927572e-06, "loss": 0.2648, "step": 40070 }, { "epoch": 0.7235896546992251, "grad_norm": 0.2873721420764923, "learning_rate": 3.53908672029464e-06, "loss": 0.2116, "step": 40075 }, { "epoch": 0.7236799341321257, "grad_norm": 0.44618523120880127, "learning_rate": 3.5369221892300223e-06, "loss": 0.2741, "step": 40080 }, { "epoch": 0.7237702135650265, "grad_norm": 0.2919872999191284, "learning_rate": 3.534758178073021e-06, "loss": 0.3187, "step": 40085 }, { "epoch": 0.7238604929979272, "grad_norm": 0.6174708604812622, "learning_rate": 3.5325946869977225e-06, "loss": 0.2481, "step": 40090 }, { "epoch": 0.723950772430828, "grad_norm": 0.3890153765678406, "learning_rate": 3.5304317161781556e-06, "loss": 0.3056, "step": 40095 }, { "epoch": 0.7240410518637286, "grad_norm": 0.5483882427215576, "learning_rate": 3.5282692657883212e-06, "loss": 0.222, "step": 40100 }, { "epoch": 0.7241313312966293, "grad_norm": 0.4131814241409302, "learning_rate": 3.526107336002168e-06, "loss": 0.314, "step": 40105 }, { "epoch": 0.7242216107295301, "grad_norm": 0.38478797674179077, "learning_rate": 3.5239459269936092e-06, "loss": 0.3439, "step": 40110 }, { "epoch": 0.7243118901624308, "grad_norm": 0.3770439028739929, "learning_rate": 3.521785038936515e-06, "loss": 0.1963, "step": 40115 }, { "epoch": 0.7244021695953314, "grad_norm": 0.649714469909668, "learning_rate": 3.5196246720047166e-06, "loss": 0.1478, "step": 40120 }, { "epoch": 0.7244924490282322, "grad_norm": 0.20353087782859802, "learning_rate": 3.5174648263719935e-06, "loss": 0.244, "step": 40125 }, { "epoch": 0.7245827284611329, "grad_norm": 0.47791555523872375, "learning_rate": 3.5153055022120952e-06, "loss": 0.1752, "step": 40130 }, { "epoch": 0.7246730078940337, "grad_norm": 0.31488651037216187, "learning_rate": 3.5131466996987183e-06, "loss": 0.1311, "step": 40135 }, { "epoch": 0.7247632873269343, "grad_norm": 0.35288169980049133, "learning_rate": 3.510988419005529e-06, "loss": 0.2397, "step": 40140 }, { "epoch": 0.724853566759835, "grad_norm": 0.4234815537929535, "learning_rate": 3.5088306603061385e-06, "loss": 0.1695, "step": 40145 }, { "epoch": 0.7249438461927358, "grad_norm": 0.6899263262748718, "learning_rate": 3.506673423774125e-06, "loss": 0.2204, "step": 40150 }, { "epoch": 0.7250341256256365, "grad_norm": 0.3410181701183319, "learning_rate": 3.5045167095830268e-06, "loss": 0.3102, "step": 40155 }, { "epoch": 0.7251244050585371, "grad_norm": 0.6206501722335815, "learning_rate": 3.5023605179063303e-06, "loss": 0.2645, "step": 40160 }, { "epoch": 0.7252146844914379, "grad_norm": 0.6185318231582642, "learning_rate": 3.50020484891749e-06, "loss": 0.2006, "step": 40165 }, { "epoch": 0.7253049639243386, "grad_norm": 0.2537311017513275, "learning_rate": 3.4980497027899098e-06, "loss": 0.2067, "step": 40170 }, { "epoch": 0.7253952433572394, "grad_norm": 0.6332979798316956, "learning_rate": 3.495895079696956e-06, "loss": 0.2341, "step": 40175 }, { "epoch": 0.72548552279014, "grad_norm": 0.4089825749397278, "learning_rate": 3.4937409798119537e-06, "loss": 0.2999, "step": 40180 }, { "epoch": 0.7255758022230407, "grad_norm": 0.4892581105232239, "learning_rate": 3.4915874033081877e-06, "loss": 0.2095, "step": 40185 }, { "epoch": 0.7256660816559415, "grad_norm": 0.4545522630214691, "learning_rate": 3.489434350358891e-06, "loss": 0.2304, "step": 40190 }, { "epoch": 0.7257563610888422, "grad_norm": 0.5012370944023132, "learning_rate": 3.487281821137268e-06, "loss": 0.2239, "step": 40195 }, { "epoch": 0.7258466405217429, "grad_norm": 0.7334614992141724, "learning_rate": 3.485129815816467e-06, "loss": 0.2329, "step": 40200 }, { "epoch": 0.7259369199546436, "grad_norm": 0.2023577094078064, "learning_rate": 3.482978334569609e-06, "loss": 0.1647, "step": 40205 }, { "epoch": 0.7260271993875443, "grad_norm": 0.3257547914981842, "learning_rate": 3.4808273775697566e-06, "loss": 0.1549, "step": 40210 }, { "epoch": 0.7261174788204451, "grad_norm": 0.44293710589408875, "learning_rate": 3.4786769449899414e-06, "loss": 0.2437, "step": 40215 }, { "epoch": 0.7262077582533457, "grad_norm": 0.4812375009059906, "learning_rate": 3.4765270370031533e-06, "loss": 0.1852, "step": 40220 }, { "epoch": 0.7262980376862465, "grad_norm": 0.49362730979919434, "learning_rate": 3.474377653782337e-06, "loss": 0.2395, "step": 40225 }, { "epoch": 0.7263883171191472, "grad_norm": 0.5562480688095093, "learning_rate": 3.4722287955003897e-06, "loss": 0.2293, "step": 40230 }, { "epoch": 0.7264785965520479, "grad_norm": 0.3648661971092224, "learning_rate": 3.4700804623301778e-06, "loss": 0.1545, "step": 40235 }, { "epoch": 0.7265688759849486, "grad_norm": 0.6169965267181396, "learning_rate": 3.4679326544445122e-06, "loss": 0.2442, "step": 40240 }, { "epoch": 0.7266591554178493, "grad_norm": 0.3303978443145752, "learning_rate": 3.4657853720161718e-06, "loss": 0.1603, "step": 40245 }, { "epoch": 0.72674943485075, "grad_norm": 0.3425135314464569, "learning_rate": 3.4636386152178935e-06, "loss": 0.2981, "step": 40250 }, { "epoch": 0.7268397142836508, "grad_norm": 0.48774453997612, "learning_rate": 3.4614923842223603e-06, "loss": 0.22, "step": 40255 }, { "epoch": 0.7269299937165514, "grad_norm": 0.5561987161636353, "learning_rate": 3.4593466792022288e-06, "loss": 0.2558, "step": 40260 }, { "epoch": 0.7270202731494522, "grad_norm": 0.2400379329919815, "learning_rate": 3.4572015003301e-06, "loss": 0.1867, "step": 40265 }, { "epoch": 0.7271105525823529, "grad_norm": 0.5912076234817505, "learning_rate": 3.455056847778542e-06, "loss": 0.1759, "step": 40270 }, { "epoch": 0.7272008320152537, "grad_norm": 0.38932791352272034, "learning_rate": 3.452912721720071e-06, "loss": 0.1969, "step": 40275 }, { "epoch": 0.7272911114481543, "grad_norm": 0.38241592049598694, "learning_rate": 3.4507691223271687e-06, "loss": 0.2328, "step": 40280 }, { "epoch": 0.727381390881055, "grad_norm": 0.525311291217804, "learning_rate": 3.4486260497722745e-06, "loss": 0.2274, "step": 40285 }, { "epoch": 0.7274716703139558, "grad_norm": 0.40287983417510986, "learning_rate": 3.4464835042277844e-06, "loss": 0.1836, "step": 40290 }, { "epoch": 0.7275619497468565, "grad_norm": 0.44902732968330383, "learning_rate": 3.444341485866044e-06, "loss": 0.2354, "step": 40295 }, { "epoch": 0.7276522291797571, "grad_norm": 0.6186977624893188, "learning_rate": 3.44219999485937e-06, "loss": 0.1643, "step": 40300 }, { "epoch": 0.7277425086126579, "grad_norm": 0.22817963361740112, "learning_rate": 3.4400590313800243e-06, "loss": 0.1861, "step": 40305 }, { "epoch": 0.7278327880455586, "grad_norm": 0.6198724508285522, "learning_rate": 3.4379185956002325e-06, "loss": 0.2969, "step": 40310 }, { "epoch": 0.7279230674784594, "grad_norm": 0.4639147222042084, "learning_rate": 3.4357786876921806e-06, "loss": 0.1743, "step": 40315 }, { "epoch": 0.72801334691136, "grad_norm": 0.2476786971092224, "learning_rate": 3.433639307828008e-06, "loss": 0.2198, "step": 40320 }, { "epoch": 0.7281036263442607, "grad_norm": 0.7034710645675659, "learning_rate": 3.4315004561798095e-06, "loss": 0.2146, "step": 40325 }, { "epoch": 0.7281939057771615, "grad_norm": 0.29537856578826904, "learning_rate": 3.429362132919645e-06, "loss": 0.2649, "step": 40330 }, { "epoch": 0.7282841852100622, "grad_norm": 0.5975651144981384, "learning_rate": 3.427224338219519e-06, "loss": 0.1971, "step": 40335 }, { "epoch": 0.7283744646429628, "grad_norm": 0.36948174238204956, "learning_rate": 3.4250870722514105e-06, "loss": 0.1718, "step": 40340 }, { "epoch": 0.7284647440758636, "grad_norm": 0.7214956283569336, "learning_rate": 3.4229503351872396e-06, "loss": 0.3039, "step": 40345 }, { "epoch": 0.7285550235087643, "grad_norm": 0.5402421355247498, "learning_rate": 3.420814127198895e-06, "loss": 0.2184, "step": 40350 }, { "epoch": 0.7286453029416651, "grad_norm": 0.6028202176094055, "learning_rate": 3.4186784484582204e-06, "loss": 0.336, "step": 40355 }, { "epoch": 0.7287355823745657, "grad_norm": 0.43091797828674316, "learning_rate": 3.4165432991370117e-06, "loss": 0.2733, "step": 40360 }, { "epoch": 0.7288258618074664, "grad_norm": 0.2607382535934448, "learning_rate": 3.414408679407031e-06, "loss": 0.2299, "step": 40365 }, { "epoch": 0.7289161412403672, "grad_norm": 0.27409112453460693, "learning_rate": 3.4122745894399866e-06, "loss": 0.2266, "step": 40370 }, { "epoch": 0.7290064206732679, "grad_norm": 0.2401573210954666, "learning_rate": 3.4101410294075544e-06, "loss": 0.1303, "step": 40375 }, { "epoch": 0.7290967001061686, "grad_norm": 1.1108605861663818, "learning_rate": 3.4080079994813643e-06, "loss": 0.282, "step": 40380 }, { "epoch": 0.7291869795390693, "grad_norm": 0.5349571704864502, "learning_rate": 3.4058754998330044e-06, "loss": 0.2915, "step": 40385 }, { "epoch": 0.72927725897197, "grad_norm": 0.646102786064148, "learning_rate": 3.4037435306340137e-06, "loss": 0.1459, "step": 40390 }, { "epoch": 0.7293675384048708, "grad_norm": 0.3781536817550659, "learning_rate": 3.401612092055899e-06, "loss": 0.2009, "step": 40395 }, { "epoch": 0.7294578178377714, "grad_norm": 0.44515880942344666, "learning_rate": 3.3994811842701147e-06, "loss": 0.2621, "step": 40400 }, { "epoch": 0.7295480972706722, "grad_norm": 0.4086102545261383, "learning_rate": 3.397350807448081e-06, "loss": 0.2204, "step": 40405 }, { "epoch": 0.7296383767035729, "grad_norm": 0.3024944067001343, "learning_rate": 3.3952209617611665e-06, "loss": 0.2066, "step": 40410 }, { "epoch": 0.7297286561364736, "grad_norm": 0.28862929344177246, "learning_rate": 3.393091647380704e-06, "loss": 0.2145, "step": 40415 }, { "epoch": 0.7298189355693743, "grad_norm": 0.41927874088287354, "learning_rate": 3.3909628644779813e-06, "loss": 0.229, "step": 40420 }, { "epoch": 0.729909215002275, "grad_norm": 0.6619019508361816, "learning_rate": 3.3888346132242466e-06, "loss": 0.2426, "step": 40425 }, { "epoch": 0.7299994944351758, "grad_norm": 0.7908322215080261, "learning_rate": 3.3867068937906998e-06, "loss": 0.2189, "step": 40430 }, { "epoch": 0.7300897738680765, "grad_norm": 0.5456673502922058, "learning_rate": 3.3845797063484973e-06, "loss": 0.1597, "step": 40435 }, { "epoch": 0.7301800533009772, "grad_norm": 0.39288800954818726, "learning_rate": 3.382453051068759e-06, "loss": 0.1385, "step": 40440 }, { "epoch": 0.7302703327338779, "grad_norm": 0.45614194869995117, "learning_rate": 3.380326928122558e-06, "loss": 0.1652, "step": 40445 }, { "epoch": 0.7303606121667786, "grad_norm": 0.3323318064212799, "learning_rate": 3.3782013376809287e-06, "loss": 0.2654, "step": 40450 }, { "epoch": 0.7304508915996794, "grad_norm": 0.2699224054813385, "learning_rate": 3.376076279914854e-06, "loss": 0.2126, "step": 40455 }, { "epoch": 0.7305411710325801, "grad_norm": 0.7620146870613098, "learning_rate": 3.373951754995285e-06, "loss": 0.2983, "step": 40460 }, { "epoch": 0.7306314504654807, "grad_norm": 0.4448334574699402, "learning_rate": 3.3718277630931186e-06, "loss": 0.1626, "step": 40465 }, { "epoch": 0.7307217298983815, "grad_norm": 0.3676196038722992, "learning_rate": 3.36970430437922e-06, "loss": 0.192, "step": 40470 }, { "epoch": 0.7308120093312822, "grad_norm": 0.39222726225852966, "learning_rate": 3.3675813790243993e-06, "loss": 0.2017, "step": 40475 }, { "epoch": 0.730902288764183, "grad_norm": 0.3409358561038971, "learning_rate": 3.365458987199436e-06, "loss": 0.145, "step": 40480 }, { "epoch": 0.7309925681970836, "grad_norm": 0.4742923676967621, "learning_rate": 3.363337129075058e-06, "loss": 0.1871, "step": 40485 }, { "epoch": 0.7310828476299843, "grad_norm": 0.4943235516548157, "learning_rate": 3.361215804821958e-06, "loss": 0.2017, "step": 40490 }, { "epoch": 0.7311731270628851, "grad_norm": 0.5879418849945068, "learning_rate": 3.3590950146107748e-06, "loss": 0.2369, "step": 40495 }, { "epoch": 0.7312634064957858, "grad_norm": 0.3419423997402191, "learning_rate": 3.356974758612117e-06, "loss": 0.1675, "step": 40500 }, { "epoch": 0.7313536859286864, "grad_norm": 0.5356691479682922, "learning_rate": 3.3548550369965383e-06, "loss": 0.1861, "step": 40505 }, { "epoch": 0.7314439653615872, "grad_norm": 0.5443263649940491, "learning_rate": 3.352735849934555e-06, "loss": 0.2761, "step": 40510 }, { "epoch": 0.7315342447944879, "grad_norm": 0.36747556924819946, "learning_rate": 3.350617197596644e-06, "loss": 0.1448, "step": 40515 }, { "epoch": 0.7316245242273887, "grad_norm": 0.44932252168655396, "learning_rate": 3.3484990801532357e-06, "loss": 0.2697, "step": 40520 }, { "epoch": 0.7317148036602893, "grad_norm": 0.37425053119659424, "learning_rate": 3.3463814977747153e-06, "loss": 0.2599, "step": 40525 }, { "epoch": 0.73180508309319, "grad_norm": 0.4571848511695862, "learning_rate": 3.344264450631425e-06, "loss": 0.249, "step": 40530 }, { "epoch": 0.7318953625260908, "grad_norm": 0.3737792372703552, "learning_rate": 3.3421479388936708e-06, "loss": 0.2467, "step": 40535 }, { "epoch": 0.7319856419589915, "grad_norm": 0.3214986026287079, "learning_rate": 3.3400319627317034e-06, "loss": 0.1831, "step": 40540 }, { "epoch": 0.7320759213918921, "grad_norm": 0.35493019223213196, "learning_rate": 3.337916522315743e-06, "loss": 0.1996, "step": 40545 }, { "epoch": 0.7321662008247929, "grad_norm": 0.4135398864746094, "learning_rate": 3.3358016178159593e-06, "loss": 0.1702, "step": 40550 }, { "epoch": 0.7322564802576936, "grad_norm": 0.8642098307609558, "learning_rate": 3.3336872494024852e-06, "loss": 0.2168, "step": 40555 }, { "epoch": 0.7323467596905944, "grad_norm": 0.3693483769893646, "learning_rate": 3.3315734172454007e-06, "loss": 0.2346, "step": 40560 }, { "epoch": 0.732437039123495, "grad_norm": 0.3232543468475342, "learning_rate": 3.329460121514754e-06, "loss": 0.1765, "step": 40565 }, { "epoch": 0.7325273185563957, "grad_norm": 0.46592220664024353, "learning_rate": 3.3273473623805376e-06, "loss": 0.2903, "step": 40570 }, { "epoch": 0.7326175979892965, "grad_norm": 0.4236691892147064, "learning_rate": 3.3252351400127105e-06, "loss": 0.3021, "step": 40575 }, { "epoch": 0.7327078774221972, "grad_norm": 0.3750850260257721, "learning_rate": 3.3231234545811863e-06, "loss": 0.2136, "step": 40580 }, { "epoch": 0.7327981568550979, "grad_norm": 0.3952043950557709, "learning_rate": 3.3210123062558374e-06, "loss": 0.285, "step": 40585 }, { "epoch": 0.7328884362879986, "grad_norm": 0.25859534740448, "learning_rate": 3.3189016952064855e-06, "loss": 0.2055, "step": 40590 }, { "epoch": 0.7329787157208993, "grad_norm": 0.34520620107650757, "learning_rate": 3.316791621602918e-06, "loss": 0.2823, "step": 40595 }, { "epoch": 0.7330689951538001, "grad_norm": 0.5526063442230225, "learning_rate": 3.314682085614871e-06, "loss": 0.0921, "step": 40600 }, { "epoch": 0.7331592745867007, "grad_norm": 0.3728067874908447, "learning_rate": 3.3125730874120453e-06, "loss": 0.2365, "step": 40605 }, { "epoch": 0.7332495540196015, "grad_norm": 0.49789491295814514, "learning_rate": 3.3104646271640894e-06, "loss": 0.2342, "step": 40610 }, { "epoch": 0.7333398334525022, "grad_norm": 0.6995531916618347, "learning_rate": 3.3083567050406173e-06, "loss": 0.3228, "step": 40615 }, { "epoch": 0.7334301128854029, "grad_norm": 0.394559383392334, "learning_rate": 3.306249321211198e-06, "loss": 0.3184, "step": 40620 }, { "epoch": 0.7335203923183036, "grad_norm": 0.48034948110580444, "learning_rate": 3.304142475845349e-06, "loss": 0.1532, "step": 40625 }, { "epoch": 0.7336106717512043, "grad_norm": 0.5348523259162903, "learning_rate": 3.302036169112558e-06, "loss": 0.1852, "step": 40630 }, { "epoch": 0.733700951184105, "grad_norm": 0.4209555685520172, "learning_rate": 3.2999304011822552e-06, "loss": 0.2359, "step": 40635 }, { "epoch": 0.7337912306170058, "grad_norm": 1.5587431192398071, "learning_rate": 3.2978251722238376e-06, "loss": 0.2462, "step": 40640 }, { "epoch": 0.7338815100499064, "grad_norm": 0.3898302912712097, "learning_rate": 3.2957204824066546e-06, "loss": 0.2945, "step": 40645 }, { "epoch": 0.7339717894828072, "grad_norm": 0.42389562726020813, "learning_rate": 3.293616331900017e-06, "loss": 0.1962, "step": 40650 }, { "epoch": 0.7340620689157079, "grad_norm": 0.44758307933807373, "learning_rate": 3.2915127208731835e-06, "loss": 0.1647, "step": 40655 }, { "epoch": 0.7341523483486087, "grad_norm": 0.45123669505119324, "learning_rate": 3.2894096494953774e-06, "loss": 0.2992, "step": 40660 }, { "epoch": 0.7342426277815093, "grad_norm": 0.4321818947792053, "learning_rate": 3.287307117935773e-06, "loss": 0.1862, "step": 40665 }, { "epoch": 0.73433290721441, "grad_norm": 0.4173588454723358, "learning_rate": 3.2852051263635077e-06, "loss": 0.2836, "step": 40670 }, { "epoch": 0.7344231866473108, "grad_norm": 0.6080979704856873, "learning_rate": 3.283103674947665e-06, "loss": 0.2901, "step": 40675 }, { "epoch": 0.7345134660802115, "grad_norm": 0.6286442875862122, "learning_rate": 3.281002763857295e-06, "loss": 0.2525, "step": 40680 }, { "epoch": 0.7346037455131121, "grad_norm": 0.3144855797290802, "learning_rate": 3.2789023932614017e-06, "loss": 0.1769, "step": 40685 }, { "epoch": 0.7346940249460129, "grad_norm": 0.6229161620140076, "learning_rate": 3.276802563328946e-06, "loss": 0.2706, "step": 40690 }, { "epoch": 0.7347843043789136, "grad_norm": 0.6532561182975769, "learning_rate": 3.2747032742288386e-06, "loss": 0.2707, "step": 40695 }, { "epoch": 0.7348745838118144, "grad_norm": 0.264279305934906, "learning_rate": 3.272604526129959e-06, "loss": 0.2004, "step": 40700 }, { "epoch": 0.734964863244715, "grad_norm": 0.43015000224113464, "learning_rate": 3.270506319201129e-06, "loss": 0.1812, "step": 40705 }, { "epoch": 0.7350551426776157, "grad_norm": 0.5014654994010925, "learning_rate": 3.268408653611137e-06, "loss": 0.2445, "step": 40710 }, { "epoch": 0.7351454221105165, "grad_norm": 0.36967700719833374, "learning_rate": 3.2663115295287296e-06, "loss": 0.1601, "step": 40715 }, { "epoch": 0.7352357015434172, "grad_norm": 0.5202431678771973, "learning_rate": 3.264214947122597e-06, "loss": 0.1835, "step": 40720 }, { "epoch": 0.7353259809763179, "grad_norm": 0.6178275942802429, "learning_rate": 3.262118906561401e-06, "loss": 0.3235, "step": 40725 }, { "epoch": 0.7354162604092186, "grad_norm": 0.4469394385814667, "learning_rate": 3.2600234080137473e-06, "loss": 0.175, "step": 40730 }, { "epoch": 0.7355065398421193, "grad_norm": 0.3345348536968231, "learning_rate": 3.257928451648209e-06, "loss": 0.2191, "step": 40735 }, { "epoch": 0.7355968192750201, "grad_norm": 0.567681074142456, "learning_rate": 3.255834037633303e-06, "loss": 0.218, "step": 40740 }, { "epoch": 0.7356870987079207, "grad_norm": 0.3793160617351532, "learning_rate": 3.2537401661375155e-06, "loss": 0.2864, "step": 40745 }, { "epoch": 0.7357773781408214, "grad_norm": 0.3733515441417694, "learning_rate": 3.251646837329281e-06, "loss": 0.2865, "step": 40750 }, { "epoch": 0.7358676575737222, "grad_norm": 0.5380032658576965, "learning_rate": 3.2495540513769962e-06, "loss": 0.2304, "step": 40755 }, { "epoch": 0.7359579370066229, "grad_norm": 0.24043571949005127, "learning_rate": 3.247461808449004e-06, "loss": 0.214, "step": 40760 }, { "epoch": 0.7360482164395236, "grad_norm": 0.5552172064781189, "learning_rate": 3.245370108713617e-06, "loss": 0.2595, "step": 40765 }, { "epoch": 0.7361384958724243, "grad_norm": 0.5105094909667969, "learning_rate": 3.243278952339091e-06, "loss": 0.1932, "step": 40770 }, { "epoch": 0.736228775305325, "grad_norm": 0.3858696520328522, "learning_rate": 3.2411883394936463e-06, "loss": 0.2166, "step": 40775 }, { "epoch": 0.7363190547382258, "grad_norm": 0.6489410996437073, "learning_rate": 3.239098270345459e-06, "loss": 0.2125, "step": 40780 }, { "epoch": 0.7364093341711264, "grad_norm": 0.5374500751495361, "learning_rate": 3.237008745062663e-06, "loss": 0.2938, "step": 40785 }, { "epoch": 0.7364996136040272, "grad_norm": 0.8062470555305481, "learning_rate": 3.234919763813338e-06, "loss": 0.2541, "step": 40790 }, { "epoch": 0.7365898930369279, "grad_norm": 0.7905746698379517, "learning_rate": 3.232831326765534e-06, "loss": 0.3326, "step": 40795 }, { "epoch": 0.7366801724698286, "grad_norm": 0.812006413936615, "learning_rate": 3.230743434087249e-06, "loss": 0.1899, "step": 40800 }, { "epoch": 0.7367704519027293, "grad_norm": 0.46172842383384705, "learning_rate": 3.2286560859464335e-06, "loss": 0.2729, "step": 40805 }, { "epoch": 0.73686073133563, "grad_norm": 0.41269052028656006, "learning_rate": 3.226569282511003e-06, "loss": 0.2156, "step": 40810 }, { "epoch": 0.7369510107685308, "grad_norm": 0.4248383045196533, "learning_rate": 3.2244830239488265e-06, "loss": 0.2346, "step": 40815 }, { "epoch": 0.7370412902014315, "grad_norm": 0.39304444193840027, "learning_rate": 3.2223973104277305e-06, "loss": 0.2313, "step": 40820 }, { "epoch": 0.7371315696343322, "grad_norm": 0.3386858403682709, "learning_rate": 3.2203121421154905e-06, "loss": 0.198, "step": 40825 }, { "epoch": 0.7372218490672329, "grad_norm": 0.8953399062156677, "learning_rate": 3.2182275191798483e-06, "loss": 0.223, "step": 40830 }, { "epoch": 0.7373121285001336, "grad_norm": 0.3431897759437561, "learning_rate": 3.216143441788492e-06, "loss": 0.2881, "step": 40835 }, { "epoch": 0.7374024079330344, "grad_norm": 0.5578610301017761, "learning_rate": 3.2140599101090706e-06, "loss": 0.2263, "step": 40840 }, { "epoch": 0.7374926873659351, "grad_norm": 0.4074469804763794, "learning_rate": 3.2119769243091925e-06, "loss": 0.2254, "step": 40845 }, { "epoch": 0.7375829667988357, "grad_norm": 0.3962375521659851, "learning_rate": 3.2098944845564196e-06, "loss": 0.2721, "step": 40850 }, { "epoch": 0.7376732462317365, "grad_norm": 0.37833014130592346, "learning_rate": 3.2078125910182634e-06, "loss": 0.158, "step": 40855 }, { "epoch": 0.7377635256646372, "grad_norm": 0.5450956225395203, "learning_rate": 3.2057312438622033e-06, "loss": 0.1927, "step": 40860 }, { "epoch": 0.737853805097538, "grad_norm": 0.42087388038635254, "learning_rate": 3.2036504432556627e-06, "loss": 0.2228, "step": 40865 }, { "epoch": 0.7379440845304386, "grad_norm": 0.47205621004104614, "learning_rate": 3.2015701893660323e-06, "loss": 0.2313, "step": 40870 }, { "epoch": 0.7380343639633393, "grad_norm": 0.36260485649108887, "learning_rate": 3.199490482360644e-06, "loss": 0.2527, "step": 40875 }, { "epoch": 0.7381246433962401, "grad_norm": 0.5770822763442993, "learning_rate": 3.1974113224068095e-06, "loss": 0.25, "step": 40880 }, { "epoch": 0.7382149228291408, "grad_norm": 0.23731869459152222, "learning_rate": 3.1953327096717713e-06, "loss": 0.1618, "step": 40885 }, { "epoch": 0.7383052022620414, "grad_norm": 0.33724477887153625, "learning_rate": 3.1932546443227443e-06, "loss": 0.2892, "step": 40890 }, { "epoch": 0.7383954816949422, "grad_norm": 0.46661415696144104, "learning_rate": 3.191177126526892e-06, "loss": 0.2314, "step": 40895 }, { "epoch": 0.7384857611278429, "grad_norm": 0.4015856981277466, "learning_rate": 3.189100156451331e-06, "loss": 0.2721, "step": 40900 }, { "epoch": 0.7385760405607437, "grad_norm": 0.4695940911769867, "learning_rate": 3.187023734263144e-06, "loss": 0.231, "step": 40905 }, { "epoch": 0.7386663199936443, "grad_norm": 0.46987631916999817, "learning_rate": 3.1849478601293603e-06, "loss": 0.2431, "step": 40910 }, { "epoch": 0.738756599426545, "grad_norm": 0.2321973294019699, "learning_rate": 3.1828725342169753e-06, "loss": 0.1568, "step": 40915 }, { "epoch": 0.7388468788594458, "grad_norm": 0.37069231271743774, "learning_rate": 3.180797756692927e-06, "loss": 0.1882, "step": 40920 }, { "epoch": 0.7389371582923465, "grad_norm": 0.3208655118942261, "learning_rate": 3.1787235277241213e-06, "loss": 0.2385, "step": 40925 }, { "epoch": 0.7390274377252471, "grad_norm": 0.3854255676269531, "learning_rate": 3.176649847477409e-06, "loss": 0.186, "step": 40930 }, { "epoch": 0.7391177171581479, "grad_norm": 0.4488772749900818, "learning_rate": 3.1745767161196094e-06, "loss": 0.2726, "step": 40935 }, { "epoch": 0.7392079965910486, "grad_norm": 0.6385711431503296, "learning_rate": 3.1725041338174844e-06, "loss": 0.2031, "step": 40940 }, { "epoch": 0.7392982760239494, "grad_norm": 0.6290650367736816, "learning_rate": 3.1704321007377603e-06, "loss": 0.2538, "step": 40945 }, { "epoch": 0.73938855545685, "grad_norm": 0.3569958806037903, "learning_rate": 3.168360617047118e-06, "loss": 0.4083, "step": 40950 }, { "epoch": 0.7394788348897507, "grad_norm": 0.7097567319869995, "learning_rate": 3.166289682912197e-06, "loss": 0.2972, "step": 40955 }, { "epoch": 0.7395691143226515, "grad_norm": 0.35611993074417114, "learning_rate": 3.164219298499582e-06, "loss": 0.2326, "step": 40960 }, { "epoch": 0.7396593937555522, "grad_norm": 0.4563051462173462, "learning_rate": 3.1621494639758266e-06, "loss": 0.2146, "step": 40965 }, { "epoch": 0.7397496731884529, "grad_norm": 0.3569638729095459, "learning_rate": 3.1600801795074276e-06, "loss": 0.3027, "step": 40970 }, { "epoch": 0.7398399526213536, "grad_norm": 0.4573405683040619, "learning_rate": 3.1580114452608468e-06, "loss": 0.2431, "step": 40975 }, { "epoch": 0.7399302320542543, "grad_norm": 0.3699833154678345, "learning_rate": 3.1559432614024997e-06, "loss": 0.1661, "step": 40980 }, { "epoch": 0.7400205114871551, "grad_norm": 0.4052470922470093, "learning_rate": 3.153875628098759e-06, "loss": 0.2584, "step": 40985 }, { "epoch": 0.7401107909200557, "grad_norm": 0.4142111837863922, "learning_rate": 3.1518085455159485e-06, "loss": 0.2181, "step": 40990 }, { "epoch": 0.7402010703529565, "grad_norm": 0.46312415599823, "learning_rate": 3.149742013820345e-06, "loss": 0.1124, "step": 40995 }, { "epoch": 0.7402913497858572, "grad_norm": 0.346750944852829, "learning_rate": 3.1476760331781943e-06, "loss": 0.2567, "step": 41000 }, { "epoch": 0.7403816292187579, "grad_norm": 0.40103939175605774, "learning_rate": 3.145610603755682e-06, "loss": 0.1935, "step": 41005 }, { "epoch": 0.7404719086516586, "grad_norm": 0.36617350578308105, "learning_rate": 3.1435457257189605e-06, "loss": 0.2619, "step": 41010 }, { "epoch": 0.7405621880845593, "grad_norm": 0.4069492518901825, "learning_rate": 3.141481399234134e-06, "loss": 0.1913, "step": 41015 }, { "epoch": 0.7406524675174601, "grad_norm": 0.41923683881759644, "learning_rate": 3.139417624467267e-06, "loss": 0.2532, "step": 41020 }, { "epoch": 0.7407427469503608, "grad_norm": 0.4082649052143097, "learning_rate": 3.137354401584366e-06, "loss": 0.1974, "step": 41025 }, { "epoch": 0.7408330263832614, "grad_norm": 0.406902551651001, "learning_rate": 3.1352917307514107e-06, "loss": 0.1547, "step": 41030 }, { "epoch": 0.7409233058161622, "grad_norm": 0.3015364706516266, "learning_rate": 3.133229612134322e-06, "loss": 0.2613, "step": 41035 }, { "epoch": 0.7410135852490629, "grad_norm": 0.23433537781238556, "learning_rate": 3.1311680458989847e-06, "loss": 0.1593, "step": 41040 }, { "epoch": 0.7411038646819637, "grad_norm": 0.694824755191803, "learning_rate": 3.129107032211236e-06, "loss": 0.2223, "step": 41045 }, { "epoch": 0.7411941441148643, "grad_norm": 0.8749660849571228, "learning_rate": 3.127046571236875e-06, "loss": 0.3951, "step": 41050 }, { "epoch": 0.741284423547765, "grad_norm": 0.5870570540428162, "learning_rate": 3.1249866631416414e-06, "loss": 0.2568, "step": 41055 }, { "epoch": 0.7413747029806658, "grad_norm": 0.6222681999206543, "learning_rate": 3.122927308091248e-06, "loss": 0.2059, "step": 41060 }, { "epoch": 0.7414649824135665, "grad_norm": 0.38352474570274353, "learning_rate": 3.120868506251349e-06, "loss": 0.2695, "step": 41065 }, { "epoch": 0.7415552618464671, "grad_norm": 0.5242384672164917, "learning_rate": 3.1188102577875657e-06, "loss": 0.181, "step": 41070 }, { "epoch": 0.7416455412793679, "grad_norm": 0.44780153036117554, "learning_rate": 3.1167525628654615e-06, "loss": 0.2373, "step": 41075 }, { "epoch": 0.7417358207122686, "grad_norm": 0.6195186972618103, "learning_rate": 3.1146954216505685e-06, "loss": 0.3313, "step": 41080 }, { "epoch": 0.7418261001451694, "grad_norm": 0.37485525012016296, "learning_rate": 3.1126388343083714e-06, "loss": 0.2259, "step": 41085 }, { "epoch": 0.74191637957807, "grad_norm": 0.5623783469200134, "learning_rate": 3.1105828010043013e-06, "loss": 0.2085, "step": 41090 }, { "epoch": 0.7420066590109707, "grad_norm": 0.540580153465271, "learning_rate": 3.108527321903756e-06, "loss": 0.1922, "step": 41095 }, { "epoch": 0.7420969384438715, "grad_norm": 0.3516944944858551, "learning_rate": 3.10647239717208e-06, "loss": 0.2026, "step": 41100 }, { "epoch": 0.7421872178767722, "grad_norm": 0.37779971957206726, "learning_rate": 3.1044180269745773e-06, "loss": 0.2623, "step": 41105 }, { "epoch": 0.7422774973096729, "grad_norm": 0.4179331660270691, "learning_rate": 3.1023642114765094e-06, "loss": 0.2363, "step": 41110 }, { "epoch": 0.7423677767425736, "grad_norm": 0.35786205530166626, "learning_rate": 3.100310950843093e-06, "loss": 0.2671, "step": 41115 }, { "epoch": 0.7424580561754743, "grad_norm": 0.35624074935913086, "learning_rate": 3.0982582452394927e-06, "loss": 0.3233, "step": 41120 }, { "epoch": 0.7425483356083751, "grad_norm": 0.29139894247055054, "learning_rate": 3.096206094830838e-06, "loss": 0.1685, "step": 41125 }, { "epoch": 0.7426386150412757, "grad_norm": 0.547553300857544, "learning_rate": 3.094154499782205e-06, "loss": 0.1926, "step": 41130 }, { "epoch": 0.7427288944741764, "grad_norm": 0.4268980622291565, "learning_rate": 3.0921034602586354e-06, "loss": 0.1671, "step": 41135 }, { "epoch": 0.7428191739070772, "grad_norm": 0.23809778690338135, "learning_rate": 3.090052976425111e-06, "loss": 0.1866, "step": 41140 }, { "epoch": 0.7429094533399779, "grad_norm": 0.4401637613773346, "learning_rate": 3.0880030484465896e-06, "loss": 0.2484, "step": 41145 }, { "epoch": 0.7429997327728786, "grad_norm": 0.6036712527275085, "learning_rate": 3.0859536764879662e-06, "loss": 0.3163, "step": 41150 }, { "epoch": 0.7430900122057793, "grad_norm": 0.3984891176223755, "learning_rate": 3.083904860714102e-06, "loss": 0.1377, "step": 41155 }, { "epoch": 0.74318029163868, "grad_norm": 0.47382864356040955, "learning_rate": 3.081856601289803e-06, "loss": 0.2435, "step": 41160 }, { "epoch": 0.7432705710715808, "grad_norm": 0.5753681659698486, "learning_rate": 3.0798088983798447e-06, "loss": 0.2523, "step": 41165 }, { "epoch": 0.7433608505044814, "grad_norm": 0.37586915493011475, "learning_rate": 3.0777617521489424e-06, "loss": 0.1963, "step": 41170 }, { "epoch": 0.7434511299373822, "grad_norm": 0.1946060210466385, "learning_rate": 3.0757151627617776e-06, "loss": 0.1702, "step": 41175 }, { "epoch": 0.7435414093702829, "grad_norm": 0.4404691755771637, "learning_rate": 3.073669130382988e-06, "loss": 0.2525, "step": 41180 }, { "epoch": 0.7436316888031836, "grad_norm": 0.639215350151062, "learning_rate": 3.0716236551771526e-06, "loss": 0.2088, "step": 41185 }, { "epoch": 0.7437219682360843, "grad_norm": 0.5188442468643188, "learning_rate": 3.0695787373088236e-06, "loss": 0.2945, "step": 41190 }, { "epoch": 0.743812247668985, "grad_norm": 0.2617374360561371, "learning_rate": 3.0675343769424936e-06, "loss": 0.2652, "step": 41195 }, { "epoch": 0.7439025271018858, "grad_norm": 0.4658057689666748, "learning_rate": 3.065490574242622e-06, "loss": 0.2763, "step": 41200 }, { "epoch": 0.7439928065347865, "grad_norm": 0.3671187162399292, "learning_rate": 3.0634473293736087e-06, "loss": 0.2873, "step": 41205 }, { "epoch": 0.7440830859676871, "grad_norm": 0.43951717019081116, "learning_rate": 3.06140464249983e-06, "loss": 0.1686, "step": 41210 }, { "epoch": 0.7441733654005879, "grad_norm": 0.5031009912490845, "learning_rate": 3.0593625137855965e-06, "loss": 0.1683, "step": 41215 }, { "epoch": 0.7442636448334886, "grad_norm": 0.4004661738872528, "learning_rate": 3.057320943395189e-06, "loss": 0.2477, "step": 41220 }, { "epoch": 0.7443539242663894, "grad_norm": 0.568417489528656, "learning_rate": 3.0552799314928295e-06, "loss": 0.2861, "step": 41225 }, { "epoch": 0.7444442036992901, "grad_norm": 0.3609898090362549, "learning_rate": 3.0532394782427098e-06, "loss": 0.2234, "step": 41230 }, { "epoch": 0.7445344831321907, "grad_norm": 0.30836221575737, "learning_rate": 3.0511995838089634e-06, "loss": 0.2409, "step": 41235 }, { "epoch": 0.7446247625650915, "grad_norm": 0.5751001834869385, "learning_rate": 3.049160248355688e-06, "loss": 0.1996, "step": 41240 }, { "epoch": 0.7447150419979922, "grad_norm": 0.24405145645141602, "learning_rate": 3.0471214720469324e-06, "loss": 0.1719, "step": 41245 }, { "epoch": 0.744805321430893, "grad_norm": 0.3759881556034088, "learning_rate": 3.0450832550467056e-06, "loss": 0.1612, "step": 41250 }, { "epoch": 0.7448956008637936, "grad_norm": 0.5241115093231201, "learning_rate": 3.043045597518961e-06, "loss": 0.2724, "step": 41255 }, { "epoch": 0.7449858802966943, "grad_norm": 0.49353301525115967, "learning_rate": 3.041008499627619e-06, "loss": 0.215, "step": 41260 }, { "epoch": 0.7450761597295951, "grad_norm": 0.22127854824066162, "learning_rate": 3.0389719615365465e-06, "loss": 0.224, "step": 41265 }, { "epoch": 0.7451664391624958, "grad_norm": 0.6090020537376404, "learning_rate": 3.0369359834095615e-06, "loss": 0.218, "step": 41270 }, { "epoch": 0.7452567185953964, "grad_norm": 0.9602436423301697, "learning_rate": 3.0349005654104567e-06, "loss": 0.2295, "step": 41275 }, { "epoch": 0.7453469980282972, "grad_norm": 0.3734925091266632, "learning_rate": 3.0328657077029577e-06, "loss": 0.1783, "step": 41280 }, { "epoch": 0.7454372774611979, "grad_norm": 0.35964828729629517, "learning_rate": 3.0308314104507597e-06, "loss": 0.1745, "step": 41285 }, { "epoch": 0.7455275568940987, "grad_norm": 0.44735097885131836, "learning_rate": 3.0287976738174995e-06, "loss": 0.2827, "step": 41290 }, { "epoch": 0.7456178363269993, "grad_norm": 0.3094846308231354, "learning_rate": 3.0267644979667854e-06, "loss": 0.2236, "step": 41295 }, { "epoch": 0.7457081157599, "grad_norm": 0.2760784924030304, "learning_rate": 3.024731883062163e-06, "loss": 0.2225, "step": 41300 }, { "epoch": 0.7457983951928008, "grad_norm": 0.49946433305740356, "learning_rate": 3.022699829267146e-06, "loss": 0.1663, "step": 41305 }, { "epoch": 0.7458886746257015, "grad_norm": 0.19271667301654816, "learning_rate": 3.0206683367451985e-06, "loss": 0.0996, "step": 41310 }, { "epoch": 0.7459789540586022, "grad_norm": 0.41054147481918335, "learning_rate": 3.018637405659741e-06, "loss": 0.2102, "step": 41315 }, { "epoch": 0.7460692334915029, "grad_norm": 0.49323397874832153, "learning_rate": 3.0166070361741417e-06, "loss": 0.2042, "step": 41320 }, { "epoch": 0.7461595129244036, "grad_norm": 0.3955391049385071, "learning_rate": 3.014577228451736e-06, "loss": 0.2037, "step": 41325 }, { "epoch": 0.7462497923573044, "grad_norm": 1.6140589714050293, "learning_rate": 3.012547982655799e-06, "loss": 0.2341, "step": 41330 }, { "epoch": 0.746340071790205, "grad_norm": 0.285859078168869, "learning_rate": 3.0105192989495736e-06, "loss": 0.2124, "step": 41335 }, { "epoch": 0.7464303512231057, "grad_norm": 0.41897910833358765, "learning_rate": 3.008491177496252e-06, "loss": 0.2757, "step": 41340 }, { "epoch": 0.7465206306560065, "grad_norm": 0.4707546532154083, "learning_rate": 3.006463618458986e-06, "loss": 0.2367, "step": 41345 }, { "epoch": 0.7466109100889072, "grad_norm": 0.5632999539375305, "learning_rate": 3.0044366220008738e-06, "loss": 0.28, "step": 41350 }, { "epoch": 0.7467011895218079, "grad_norm": 0.2610125243663788, "learning_rate": 3.002410188284969e-06, "loss": 0.1549, "step": 41355 }, { "epoch": 0.7467914689547086, "grad_norm": 0.5858786702156067, "learning_rate": 3.000384317474291e-06, "loss": 0.2081, "step": 41360 }, { "epoch": 0.7468817483876093, "grad_norm": 0.24929028749465942, "learning_rate": 2.998359009731798e-06, "loss": 0.2276, "step": 41365 }, { "epoch": 0.7469720278205101, "grad_norm": 0.3487606346607208, "learning_rate": 2.9963342652204163e-06, "loss": 0.1951, "step": 41370 }, { "epoch": 0.7470623072534107, "grad_norm": 0.5782601237297058, "learning_rate": 2.9943100841030225e-06, "loss": 0.1691, "step": 41375 }, { "epoch": 0.7471525866863115, "grad_norm": 0.4717338979244232, "learning_rate": 2.9922864665424487e-06, "loss": 0.2257, "step": 41380 }, { "epoch": 0.7472428661192122, "grad_norm": 0.22838842868804932, "learning_rate": 2.990263412701474e-06, "loss": 0.2086, "step": 41385 }, { "epoch": 0.7473331455521129, "grad_norm": 0.35909873247146606, "learning_rate": 2.9882409227428454e-06, "loss": 0.1488, "step": 41390 }, { "epoch": 0.7474234249850136, "grad_norm": 0.7137178778648376, "learning_rate": 2.986218996829251e-06, "loss": 0.3026, "step": 41395 }, { "epoch": 0.7475137044179143, "grad_norm": 0.4564216732978821, "learning_rate": 2.984197635123344e-06, "loss": 0.2638, "step": 41400 }, { "epoch": 0.7476039838508151, "grad_norm": 0.43131279945373535, "learning_rate": 2.982176837787727e-06, "loss": 0.1893, "step": 41405 }, { "epoch": 0.7476942632837158, "grad_norm": 0.41147229075431824, "learning_rate": 2.9801566049849617e-06, "loss": 0.2538, "step": 41410 }, { "epoch": 0.7477845427166164, "grad_norm": 0.5008199214935303, "learning_rate": 2.9781369368775546e-06, "loss": 0.3361, "step": 41415 }, { "epoch": 0.7478748221495172, "grad_norm": 0.4060599207878113, "learning_rate": 2.9761178336279817e-06, "loss": 0.2744, "step": 41420 }, { "epoch": 0.7479651015824179, "grad_norm": 0.46722060441970825, "learning_rate": 2.9740992953986558e-06, "loss": 0.2239, "step": 41425 }, { "epoch": 0.7480553810153187, "grad_norm": 0.2274867445230484, "learning_rate": 2.972081322351963e-06, "loss": 0.2617, "step": 41430 }, { "epoch": 0.7481456604482193, "grad_norm": 0.5291921496391296, "learning_rate": 2.9700639146502264e-06, "loss": 0.2695, "step": 41435 }, { "epoch": 0.74823593988112, "grad_norm": 0.5454273819923401, "learning_rate": 2.968047072455734e-06, "loss": 0.1875, "step": 41440 }, { "epoch": 0.7483262193140208, "grad_norm": 0.3729475736618042, "learning_rate": 2.966030795930732e-06, "loss": 0.1642, "step": 41445 }, { "epoch": 0.7484164987469215, "grad_norm": 0.45739078521728516, "learning_rate": 2.964015085237407e-06, "loss": 0.2755, "step": 41450 }, { "epoch": 0.7485067781798221, "grad_norm": 0.2924373149871826, "learning_rate": 2.961999940537915e-06, "loss": 0.2015, "step": 41455 }, { "epoch": 0.7485970576127229, "grad_norm": 0.296959787607193, "learning_rate": 2.959985361994353e-06, "loss": 0.2333, "step": 41460 }, { "epoch": 0.7486873370456236, "grad_norm": 0.37825894355773926, "learning_rate": 2.9579713497687866e-06, "loss": 0.2877, "step": 41465 }, { "epoch": 0.7487776164785244, "grad_norm": 0.5115907788276672, "learning_rate": 2.955957904023218e-06, "loss": 0.3644, "step": 41470 }, { "epoch": 0.748867895911425, "grad_norm": 0.48065969347953796, "learning_rate": 2.9539450249196267e-06, "loss": 0.2066, "step": 41475 }, { "epoch": 0.7489581753443257, "grad_norm": 0.5366142392158508, "learning_rate": 2.9519327126199258e-06, "loss": 0.1929, "step": 41480 }, { "epoch": 0.7490484547772265, "grad_norm": 0.597087562084198, "learning_rate": 2.9499209672859963e-06, "loss": 0.2874, "step": 41485 }, { "epoch": 0.7491387342101272, "grad_norm": 0.35067883133888245, "learning_rate": 2.947909789079664e-06, "loss": 0.1681, "step": 41490 }, { "epoch": 0.7492290136430279, "grad_norm": 0.4782887101173401, "learning_rate": 2.945899178162718e-06, "loss": 0.269, "step": 41495 }, { "epoch": 0.7493192930759286, "grad_norm": 0.4069378674030304, "learning_rate": 2.9438891346968913e-06, "loss": 0.2355, "step": 41500 }, { "epoch": 0.7494095725088293, "grad_norm": 0.2894822955131531, "learning_rate": 2.9418796588438814e-06, "loss": 0.2093, "step": 41505 }, { "epoch": 0.7494998519417301, "grad_norm": 0.45250555872917175, "learning_rate": 2.9398707507653345e-06, "loss": 0.2883, "step": 41510 }, { "epoch": 0.7495901313746307, "grad_norm": 0.3528308868408203, "learning_rate": 2.937862410622858e-06, "loss": 0.1596, "step": 41515 }, { "epoch": 0.7496804108075314, "grad_norm": 0.3167407214641571, "learning_rate": 2.9358546385780006e-06, "loss": 0.2779, "step": 41520 }, { "epoch": 0.7497706902404322, "grad_norm": 0.5783354043960571, "learning_rate": 2.9338474347922787e-06, "loss": 0.1904, "step": 41525 }, { "epoch": 0.7498609696733329, "grad_norm": 0.35650262236595154, "learning_rate": 2.9318407994271514e-06, "loss": 0.2287, "step": 41530 }, { "epoch": 0.7499512491062336, "grad_norm": 0.5059142112731934, "learning_rate": 2.9298347326440424e-06, "loss": 0.2607, "step": 41535 }, { "epoch": 0.7500415285391343, "grad_norm": 0.4670112729072571, "learning_rate": 2.9278292346043268e-06, "loss": 0.2889, "step": 41540 }, { "epoch": 0.750131807972035, "grad_norm": 0.3478320837020874, "learning_rate": 2.9258243054693257e-06, "loss": 0.2606, "step": 41545 }, { "epoch": 0.7502220874049358, "grad_norm": 0.5915669798851013, "learning_rate": 2.923819945400329e-06, "loss": 0.2795, "step": 41550 }, { "epoch": 0.7503123668378364, "grad_norm": 0.37255313992500305, "learning_rate": 2.9218161545585654e-06, "loss": 0.1787, "step": 41555 }, { "epoch": 0.7504026462707372, "grad_norm": 0.3664399981498718, "learning_rate": 2.9198129331052318e-06, "loss": 0.1552, "step": 41560 }, { "epoch": 0.7504929257036379, "grad_norm": 0.32734349370002747, "learning_rate": 2.9178102812014674e-06, "loss": 0.1657, "step": 41565 }, { "epoch": 0.7505832051365386, "grad_norm": 0.3999686539173126, "learning_rate": 2.9158081990083732e-06, "loss": 0.1633, "step": 41570 }, { "epoch": 0.7506734845694393, "grad_norm": 0.4873049557209015, "learning_rate": 2.913806686687002e-06, "loss": 0.2261, "step": 41575 }, { "epoch": 0.75076376400234, "grad_norm": 0.4038124978542328, "learning_rate": 2.9118057443983637e-06, "loss": 0.2136, "step": 41580 }, { "epoch": 0.7508540434352408, "grad_norm": 0.5271873474121094, "learning_rate": 2.909805372303415e-06, "loss": 0.2043, "step": 41585 }, { "epoch": 0.7509443228681415, "grad_norm": 0.3286977708339691, "learning_rate": 2.9078055705630758e-06, "loss": 0.2283, "step": 41590 }, { "epoch": 0.7510346023010421, "grad_norm": 0.6366809010505676, "learning_rate": 2.905806339338211e-06, "loss": 0.2817, "step": 41595 }, { "epoch": 0.7511248817339429, "grad_norm": 0.24309015274047852, "learning_rate": 2.903807678789646e-06, "loss": 0.2005, "step": 41600 }, { "epoch": 0.7512151611668436, "grad_norm": 0.23062177002429962, "learning_rate": 2.9018095890781585e-06, "loss": 0.266, "step": 41605 }, { "epoch": 0.7513054405997444, "grad_norm": 0.533152163028717, "learning_rate": 2.899812070364484e-06, "loss": 0.2617, "step": 41610 }, { "epoch": 0.7513957200326451, "grad_norm": 0.4984569847583771, "learning_rate": 2.897815122809302e-06, "loss": 0.2927, "step": 41615 }, { "epoch": 0.7514859994655457, "grad_norm": 0.4147002398967743, "learning_rate": 2.895818746573259e-06, "loss": 0.2625, "step": 41620 }, { "epoch": 0.7515762788984465, "grad_norm": 0.515230655670166, "learning_rate": 2.893822941816945e-06, "loss": 0.168, "step": 41625 }, { "epoch": 0.7516665583313472, "grad_norm": 0.4705766439437866, "learning_rate": 2.891827708700906e-06, "loss": 0.2495, "step": 41630 }, { "epoch": 0.751756837764248, "grad_norm": 0.2987738847732544, "learning_rate": 2.889833047385646e-06, "loss": 0.2963, "step": 41635 }, { "epoch": 0.7518471171971486, "grad_norm": 0.6711522340774536, "learning_rate": 2.887838958031621e-06, "loss": 0.1937, "step": 41640 }, { "epoch": 0.7519373966300493, "grad_norm": 0.5347040891647339, "learning_rate": 2.8858454407992455e-06, "loss": 0.2265, "step": 41645 }, { "epoch": 0.7520276760629501, "grad_norm": 0.8567517399787903, "learning_rate": 2.8838524958488754e-06, "loss": 0.2047, "step": 41650 }, { "epoch": 0.7521179554958508, "grad_norm": 0.3282148838043213, "learning_rate": 2.881860123340836e-06, "loss": 0.1848, "step": 41655 }, { "epoch": 0.7522082349287514, "grad_norm": 0.42307010293006897, "learning_rate": 2.879868323435393e-06, "loss": 0.1674, "step": 41660 }, { "epoch": 0.7522985143616522, "grad_norm": 0.5557116270065308, "learning_rate": 2.8778770962927748e-06, "loss": 0.1023, "step": 41665 }, { "epoch": 0.7523887937945529, "grad_norm": 0.6449236273765564, "learning_rate": 2.8758864420731612e-06, "loss": 0.2737, "step": 41670 }, { "epoch": 0.7524790732274537, "grad_norm": 0.24184074997901917, "learning_rate": 2.873896360936689e-06, "loss": 0.1679, "step": 41675 }, { "epoch": 0.7525693526603543, "grad_norm": 0.7325660586357117, "learning_rate": 2.8719068530434403e-06, "loss": 0.2481, "step": 41680 }, { "epoch": 0.752659632093255, "grad_norm": 0.3087085485458374, "learning_rate": 2.869917918553462e-06, "loss": 0.1987, "step": 41685 }, { "epoch": 0.7527499115261558, "grad_norm": 0.5672470927238464, "learning_rate": 2.867929557626743e-06, "loss": 0.1882, "step": 41690 }, { "epoch": 0.7528401909590565, "grad_norm": 0.6781942248344421, "learning_rate": 2.8659417704232383e-06, "loss": 0.1773, "step": 41695 }, { "epoch": 0.7529304703919572, "grad_norm": 0.3354477882385254, "learning_rate": 2.863954557102847e-06, "loss": 0.1943, "step": 41700 }, { "epoch": 0.7530207498248579, "grad_norm": 0.34309548139572144, "learning_rate": 2.8619679178254267e-06, "loss": 0.208, "step": 41705 }, { "epoch": 0.7531110292577586, "grad_norm": 0.4000491201877594, "learning_rate": 2.8599818527507896e-06, "loss": 0.1707, "step": 41710 }, { "epoch": 0.7532013086906594, "grad_norm": 0.5786740183830261, "learning_rate": 2.8579963620387007e-06, "loss": 0.2568, "step": 41715 }, { "epoch": 0.75329158812356, "grad_norm": 0.4471411406993866, "learning_rate": 2.8560114458488784e-06, "loss": 0.181, "step": 41720 }, { "epoch": 0.7533818675564607, "grad_norm": 0.5158851742744446, "learning_rate": 2.8540271043409907e-06, "loss": 0.2814, "step": 41725 }, { "epoch": 0.7534721469893615, "grad_norm": 0.2408069670200348, "learning_rate": 2.852043337674666e-06, "loss": 0.2056, "step": 41730 }, { "epoch": 0.7535624264222622, "grad_norm": 0.33224543929100037, "learning_rate": 2.8500601460094847e-06, "loss": 0.2299, "step": 41735 }, { "epoch": 0.7536527058551629, "grad_norm": 1.0748296976089478, "learning_rate": 2.848077529504981e-06, "loss": 0.2426, "step": 41740 }, { "epoch": 0.7537429852880636, "grad_norm": 0.5358622670173645, "learning_rate": 2.8460954883206383e-06, "loss": 0.2196, "step": 41745 }, { "epoch": 0.7538332647209643, "grad_norm": 0.6004823446273804, "learning_rate": 2.8441140226159038e-06, "loss": 0.289, "step": 41750 }, { "epoch": 0.7539235441538651, "grad_norm": 0.4540747106075287, "learning_rate": 2.8421331325501633e-06, "loss": 0.2017, "step": 41755 }, { "epoch": 0.7540138235867657, "grad_norm": 0.2679804563522339, "learning_rate": 2.840152818282773e-06, "loss": 0.2331, "step": 41760 }, { "epoch": 0.7541041030196665, "grad_norm": 0.39254021644592285, "learning_rate": 2.8381730799730287e-06, "loss": 0.1945, "step": 41765 }, { "epoch": 0.7541943824525672, "grad_norm": 0.7232952117919922, "learning_rate": 2.836193917780188e-06, "loss": 0.2052, "step": 41770 }, { "epoch": 0.7542846618854679, "grad_norm": 0.43765270709991455, "learning_rate": 2.8342153318634613e-06, "loss": 0.2896, "step": 41775 }, { "epoch": 0.7543749413183686, "grad_norm": 0.88607257604599, "learning_rate": 2.8322373223820134e-06, "loss": 0.2851, "step": 41780 }, { "epoch": 0.7544652207512693, "grad_norm": 0.5815815329551697, "learning_rate": 2.830259889494955e-06, "loss": 0.2658, "step": 41785 }, { "epoch": 0.7545555001841701, "grad_norm": 0.666411280632019, "learning_rate": 2.828283033361363e-06, "loss": 0.1633, "step": 41790 }, { "epoch": 0.7546457796170708, "grad_norm": 0.5315019488334656, "learning_rate": 2.8263067541402543e-06, "loss": 0.1446, "step": 41795 }, { "epoch": 0.7547360590499714, "grad_norm": 0.44192075729370117, "learning_rate": 2.8243310519906096e-06, "loss": 0.1463, "step": 41800 }, { "epoch": 0.7548263384828722, "grad_norm": 0.3611176609992981, "learning_rate": 2.8223559270713597e-06, "loss": 0.2946, "step": 41805 }, { "epoch": 0.7549166179157729, "grad_norm": 0.6399714350700378, "learning_rate": 2.8203813795413926e-06, "loss": 0.1436, "step": 41810 }, { "epoch": 0.7550068973486737, "grad_norm": 0.25619006156921387, "learning_rate": 2.8184074095595416e-06, "loss": 0.2205, "step": 41815 }, { "epoch": 0.7550971767815743, "grad_norm": 0.4509105384349823, "learning_rate": 2.816434017284596e-06, "loss": 0.2073, "step": 41820 }, { "epoch": 0.755187456214475, "grad_norm": 0.6260651350021362, "learning_rate": 2.814461202875308e-06, "loss": 0.1968, "step": 41825 }, { "epoch": 0.7552777356473758, "grad_norm": 0.45519912242889404, "learning_rate": 2.81248896649037e-06, "loss": 0.2017, "step": 41830 }, { "epoch": 0.7553680150802765, "grad_norm": 0.5440272688865662, "learning_rate": 2.810517308288435e-06, "loss": 0.2817, "step": 41835 }, { "epoch": 0.7554582945131771, "grad_norm": 0.5860805511474609, "learning_rate": 2.8085462284281094e-06, "loss": 0.1735, "step": 41840 }, { "epoch": 0.7555485739460779, "grad_norm": 0.38434216380119324, "learning_rate": 2.806575727067957e-06, "loss": 0.2317, "step": 41845 }, { "epoch": 0.7556388533789786, "grad_norm": 0.345718115568161, "learning_rate": 2.8046058043664837e-06, "loss": 0.2065, "step": 41850 }, { "epoch": 0.7557291328118794, "grad_norm": 0.6720041632652283, "learning_rate": 2.8026364604821597e-06, "loss": 0.2969, "step": 41855 }, { "epoch": 0.75581941224478, "grad_norm": 0.36170029640197754, "learning_rate": 2.8006676955734e-06, "loss": 0.1878, "step": 41860 }, { "epoch": 0.7559096916776807, "grad_norm": 0.4976137578487396, "learning_rate": 2.798699509798579e-06, "loss": 0.2749, "step": 41865 }, { "epoch": 0.7559999711105815, "grad_norm": 0.23476167023181915, "learning_rate": 2.7967319033160255e-06, "loss": 0.278, "step": 41870 }, { "epoch": 0.7560902505434822, "grad_norm": 0.7942454218864441, "learning_rate": 2.7947648762840194e-06, "loss": 0.2802, "step": 41875 }, { "epoch": 0.7561805299763829, "grad_norm": 0.32001301646232605, "learning_rate": 2.79279842886079e-06, "loss": 0.2193, "step": 41880 }, { "epoch": 0.7562708094092836, "grad_norm": 0.44344019889831543, "learning_rate": 2.7908325612045283e-06, "loss": 0.1445, "step": 41885 }, { "epoch": 0.7563610888421843, "grad_norm": 0.4995500445365906, "learning_rate": 2.788867273473368e-06, "loss": 0.266, "step": 41890 }, { "epoch": 0.7564513682750851, "grad_norm": 0.4097002148628235, "learning_rate": 2.786902565825408e-06, "loss": 0.2314, "step": 41895 }, { "epoch": 0.7565416477079857, "grad_norm": 0.5643100142478943, "learning_rate": 2.7849384384186893e-06, "loss": 0.2094, "step": 41900 }, { "epoch": 0.7566319271408865, "grad_norm": 0.3898535668849945, "learning_rate": 2.7829748914112154e-06, "loss": 0.3218, "step": 41905 }, { "epoch": 0.7567222065737872, "grad_norm": 0.5345604419708252, "learning_rate": 2.7810119249609413e-06, "loss": 0.1929, "step": 41910 }, { "epoch": 0.7568124860066879, "grad_norm": 0.46164649724960327, "learning_rate": 2.7790495392257675e-06, "loss": 0.1711, "step": 41915 }, { "epoch": 0.7569027654395886, "grad_norm": 0.1710173636674881, "learning_rate": 2.7770877343635603e-06, "loss": 0.227, "step": 41920 }, { "epoch": 0.7569930448724893, "grad_norm": 0.7308053970336914, "learning_rate": 2.7751265105321266e-06, "loss": 0.1467, "step": 41925 }, { "epoch": 0.75708332430539, "grad_norm": 0.5215123295783997, "learning_rate": 2.7731658678892346e-06, "loss": 0.2196, "step": 41930 }, { "epoch": 0.7571736037382908, "grad_norm": 0.29790088534355164, "learning_rate": 2.771205806592605e-06, "loss": 0.1777, "step": 41935 }, { "epoch": 0.7572638831711914, "grad_norm": 0.5989565253257751, "learning_rate": 2.7692463267999127e-06, "loss": 0.2497, "step": 41940 }, { "epoch": 0.7573541626040922, "grad_norm": 0.5519168972969055, "learning_rate": 2.767287428668777e-06, "loss": 0.216, "step": 41945 }, { "epoch": 0.7574444420369929, "grad_norm": 0.7208122611045837, "learning_rate": 2.765329112356784e-06, "loss": 0.2529, "step": 41950 }, { "epoch": 0.7575347214698936, "grad_norm": 0.5940227508544922, "learning_rate": 2.76337137802146e-06, "loss": 0.2009, "step": 41955 }, { "epoch": 0.7576250009027943, "grad_norm": 0.49036723375320435, "learning_rate": 2.7614142258202957e-06, "loss": 0.1871, "step": 41960 }, { "epoch": 0.757715280335695, "grad_norm": 0.37940332293510437, "learning_rate": 2.7594576559107254e-06, "loss": 0.1681, "step": 41965 }, { "epoch": 0.7578055597685958, "grad_norm": 0.8369633555412292, "learning_rate": 2.757501668450142e-06, "loss": 0.1977, "step": 41970 }, { "epoch": 0.7578958392014965, "grad_norm": 0.5507857203483582, "learning_rate": 2.755546263595892e-06, "loss": 0.2498, "step": 41975 }, { "epoch": 0.7579861186343971, "grad_norm": 0.38975781202316284, "learning_rate": 2.7535914415052754e-06, "loss": 0.1758, "step": 41980 }, { "epoch": 0.7580763980672979, "grad_norm": 0.5902056694030762, "learning_rate": 2.7516372023355374e-06, "loss": 0.1492, "step": 41985 }, { "epoch": 0.7581666775001986, "grad_norm": 0.5487368702888489, "learning_rate": 2.74968354624389e-06, "loss": 0.1325, "step": 41990 }, { "epoch": 0.7582569569330994, "grad_norm": 0.41757887601852417, "learning_rate": 2.7477304733874823e-06, "loss": 0.0984, "step": 41995 }, { "epoch": 0.758347236366, "grad_norm": 1.0859988927841187, "learning_rate": 2.74577798392343e-06, "loss": 0.2213, "step": 42000 }, { "epoch": 0.7584375157989007, "grad_norm": 0.35026293992996216, "learning_rate": 2.7438260780087975e-06, "loss": 0.1343, "step": 42005 }, { "epoch": 0.7585277952318015, "grad_norm": 0.48994943499565125, "learning_rate": 2.7418747558005964e-06, "loss": 0.2131, "step": 42010 }, { "epoch": 0.7586180746647022, "grad_norm": 0.44541123509407043, "learning_rate": 2.739924017455804e-06, "loss": 0.2064, "step": 42015 }, { "epoch": 0.758708354097603, "grad_norm": 0.34418150782585144, "learning_rate": 2.7379738631313346e-06, "loss": 0.1252, "step": 42020 }, { "epoch": 0.7587986335305036, "grad_norm": 0.3524698317050934, "learning_rate": 2.7360242929840708e-06, "loss": 0.2012, "step": 42025 }, { "epoch": 0.7588889129634043, "grad_norm": 0.907588541507721, "learning_rate": 2.734075307170835e-06, "loss": 0.2177, "step": 42030 }, { "epoch": 0.7589791923963051, "grad_norm": 0.4300106167793274, "learning_rate": 2.7321269058484133e-06, "loss": 0.2384, "step": 42035 }, { "epoch": 0.7590694718292058, "grad_norm": 0.2557997405529022, "learning_rate": 2.7301790891735392e-06, "loss": 0.1753, "step": 42040 }, { "epoch": 0.7591597512621064, "grad_norm": 0.38408249616622925, "learning_rate": 2.728231857302903e-06, "loss": 0.1843, "step": 42045 }, { "epoch": 0.7592500306950072, "grad_norm": 0.5381377935409546, "learning_rate": 2.72628521039314e-06, "loss": 0.1846, "step": 42050 }, { "epoch": 0.7593403101279079, "grad_norm": 0.4469461739063263, "learning_rate": 2.724339148600851e-06, "loss": 0.2237, "step": 42055 }, { "epoch": 0.7594305895608087, "grad_norm": 0.5588510036468506, "learning_rate": 2.722393672082575e-06, "loss": 0.2207, "step": 42060 }, { "epoch": 0.7595208689937093, "grad_norm": 0.5202145576477051, "learning_rate": 2.7204487809948145e-06, "loss": 0.1477, "step": 42065 }, { "epoch": 0.75961114842661, "grad_norm": 1.5417118072509766, "learning_rate": 2.7185044754940215e-06, "loss": 0.2629, "step": 42070 }, { "epoch": 0.7597014278595108, "grad_norm": 0.6991029977798462, "learning_rate": 2.7165607557366057e-06, "loss": 0.2387, "step": 42075 }, { "epoch": 0.7597917072924115, "grad_norm": 0.650897204875946, "learning_rate": 2.714617621878918e-06, "loss": 0.195, "step": 42080 }, { "epoch": 0.7598819867253122, "grad_norm": 0.5020387768745422, "learning_rate": 2.7126750740772755e-06, "loss": 0.1731, "step": 42085 }, { "epoch": 0.7599722661582129, "grad_norm": 0.4681573510169983, "learning_rate": 2.71073311248794e-06, "loss": 0.1819, "step": 42090 }, { "epoch": 0.7600625455911136, "grad_norm": 0.42313507199287415, "learning_rate": 2.7087917372671248e-06, "loss": 0.2363, "step": 42095 }, { "epoch": 0.7601528250240144, "grad_norm": 0.2385631799697876, "learning_rate": 2.7068509485710004e-06, "loss": 0.2184, "step": 42100 }, { "epoch": 0.760243104456915, "grad_norm": 0.3899441957473755, "learning_rate": 2.704910746555692e-06, "loss": 0.2377, "step": 42105 }, { "epoch": 0.7603333838898158, "grad_norm": 0.5357471704483032, "learning_rate": 2.7029711313772767e-06, "loss": 0.2082, "step": 42110 }, { "epoch": 0.7604236633227165, "grad_norm": 0.5904387831687927, "learning_rate": 2.7010321031917765e-06, "loss": 0.2638, "step": 42115 }, { "epoch": 0.7605139427556172, "grad_norm": 0.6223477125167847, "learning_rate": 2.699093662155178e-06, "loss": 0.2409, "step": 42120 }, { "epoch": 0.7606042221885179, "grad_norm": 0.468454509973526, "learning_rate": 2.6971558084234085e-06, "loss": 0.1239, "step": 42125 }, { "epoch": 0.7606945016214186, "grad_norm": 0.5519012808799744, "learning_rate": 2.695218542152357e-06, "loss": 0.2196, "step": 42130 }, { "epoch": 0.7607847810543193, "grad_norm": 0.5590959787368774, "learning_rate": 2.693281863497863e-06, "loss": 0.218, "step": 42135 }, { "epoch": 0.7608750604872201, "grad_norm": 0.6511601805686951, "learning_rate": 2.691345772615721e-06, "loss": 0.263, "step": 42140 }, { "epoch": 0.7609653399201207, "grad_norm": 0.33294880390167236, "learning_rate": 2.6894102696616707e-06, "loss": 0.2152, "step": 42145 }, { "epoch": 0.7610556193530215, "grad_norm": 0.2821709215641022, "learning_rate": 2.6874753547914132e-06, "loss": 0.1989, "step": 42150 }, { "epoch": 0.7611458987859222, "grad_norm": 0.4125169515609741, "learning_rate": 2.6855410281605933e-06, "loss": 0.1377, "step": 42155 }, { "epoch": 0.761236178218823, "grad_norm": 0.7620353102684021, "learning_rate": 2.683607289924819e-06, "loss": 0.2553, "step": 42160 }, { "epoch": 0.7613264576517236, "grad_norm": 0.40569189190864563, "learning_rate": 2.6816741402396407e-06, "loss": 0.2181, "step": 42165 }, { "epoch": 0.7614167370846243, "grad_norm": 0.4498937427997589, "learning_rate": 2.6797415792605673e-06, "loss": 0.3055, "step": 42170 }, { "epoch": 0.7615070165175251, "grad_norm": 0.4452691674232483, "learning_rate": 2.6778096071430616e-06, "loss": 0.349, "step": 42175 }, { "epoch": 0.7615972959504258, "grad_norm": 0.4897069036960602, "learning_rate": 2.6758782240425374e-06, "loss": 0.1811, "step": 42180 }, { "epoch": 0.7616875753833264, "grad_norm": 0.7254247665405273, "learning_rate": 2.67394743011436e-06, "loss": 0.2535, "step": 42185 }, { "epoch": 0.7617778548162272, "grad_norm": 0.5436791181564331, "learning_rate": 2.672017225513842e-06, "loss": 0.1872, "step": 42190 }, { "epoch": 0.7618681342491279, "grad_norm": 0.5768334269523621, "learning_rate": 2.670087610396259e-06, "loss": 0.3027, "step": 42195 }, { "epoch": 0.7619584136820287, "grad_norm": 0.5182393193244934, "learning_rate": 2.6681585849168346e-06, "loss": 0.2969, "step": 42200 }, { "epoch": 0.7620486931149293, "grad_norm": 0.3409898281097412, "learning_rate": 2.6662301492307474e-06, "loss": 0.2224, "step": 42205 }, { "epoch": 0.76213897254783, "grad_norm": 0.39048269391059875, "learning_rate": 2.6643023034931193e-06, "loss": 0.262, "step": 42210 }, { "epoch": 0.7622292519807308, "grad_norm": 0.28336530923843384, "learning_rate": 2.662375047859039e-06, "loss": 0.2134, "step": 42215 }, { "epoch": 0.7623195314136315, "grad_norm": 0.663398265838623, "learning_rate": 2.6604483824835325e-06, "loss": 0.2351, "step": 42220 }, { "epoch": 0.7624098108465321, "grad_norm": 0.13834026455879211, "learning_rate": 2.6585223075215937e-06, "loss": 0.1265, "step": 42225 }, { "epoch": 0.7625000902794329, "grad_norm": 0.4230329692363739, "learning_rate": 2.6565968231281545e-06, "loss": 0.203, "step": 42230 }, { "epoch": 0.7625903697123336, "grad_norm": 0.3502117395401001, "learning_rate": 2.654671929458109e-06, "loss": 0.1546, "step": 42235 }, { "epoch": 0.7626806491452344, "grad_norm": 0.5848438739776611, "learning_rate": 2.652747626666301e-06, "loss": 0.2122, "step": 42240 }, { "epoch": 0.762770928578135, "grad_norm": 0.2812301516532898, "learning_rate": 2.65082391490753e-06, "loss": 0.2429, "step": 42245 }, { "epoch": 0.7628612080110357, "grad_norm": 0.6477394700050354, "learning_rate": 2.6489007943365388e-06, "loss": 0.2137, "step": 42250 }, { "epoch": 0.7629514874439365, "grad_norm": 0.09040392935276031, "learning_rate": 2.6469782651080323e-06, "loss": 0.1736, "step": 42255 }, { "epoch": 0.7630417668768372, "grad_norm": 0.4947648048400879, "learning_rate": 2.6450563273766615e-06, "loss": 0.2682, "step": 42260 }, { "epoch": 0.7631320463097379, "grad_norm": 0.48899325728416443, "learning_rate": 2.643134981297033e-06, "loss": 0.2245, "step": 42265 }, { "epoch": 0.7632223257426386, "grad_norm": 0.3385583162307739, "learning_rate": 2.6412142270237083e-06, "loss": 0.1647, "step": 42270 }, { "epoch": 0.7633126051755393, "grad_norm": 0.4164818525314331, "learning_rate": 2.639294064711193e-06, "loss": 0.2203, "step": 42275 }, { "epoch": 0.7634028846084401, "grad_norm": 0.7585594058036804, "learning_rate": 2.637374494513956e-06, "loss": 0.3093, "step": 42280 }, { "epoch": 0.7634931640413407, "grad_norm": 0.6199455261230469, "learning_rate": 2.6354555165864047e-06, "loss": 0.1823, "step": 42285 }, { "epoch": 0.7635834434742415, "grad_norm": 0.626918613910675, "learning_rate": 2.633537131082916e-06, "loss": 0.1591, "step": 42290 }, { "epoch": 0.7636737229071422, "grad_norm": 0.6108911037445068, "learning_rate": 2.6316193381578035e-06, "loss": 0.3038, "step": 42295 }, { "epoch": 0.7637640023400429, "grad_norm": 0.5488325953483582, "learning_rate": 2.629702137965341e-06, "loss": 0.1507, "step": 42300 }, { "epoch": 0.7638542817729436, "grad_norm": 0.25596383213996887, "learning_rate": 2.6277855306597543e-06, "loss": 0.1498, "step": 42305 }, { "epoch": 0.7639445612058443, "grad_norm": 0.7998791933059692, "learning_rate": 2.625869516395223e-06, "loss": 0.2152, "step": 42310 }, { "epoch": 0.764034840638745, "grad_norm": 0.4003254473209381, "learning_rate": 2.6239540953258724e-06, "loss": 0.2383, "step": 42315 }, { "epoch": 0.7641251200716458, "grad_norm": 0.38832858204841614, "learning_rate": 2.6220392676057873e-06, "loss": 0.1456, "step": 42320 }, { "epoch": 0.7642153995045464, "grad_norm": 0.5294615626335144, "learning_rate": 2.6201250333889984e-06, "loss": 0.2623, "step": 42325 }, { "epoch": 0.7643056789374472, "grad_norm": 0.38336092233657837, "learning_rate": 2.6182113928294926e-06, "loss": 0.2714, "step": 42330 }, { "epoch": 0.7643959583703479, "grad_norm": 0.44061025977134705, "learning_rate": 2.6162983460812097e-06, "loss": 0.1364, "step": 42335 }, { "epoch": 0.7644862378032486, "grad_norm": 0.4322599768638611, "learning_rate": 2.6143858932980437e-06, "loss": 0.1758, "step": 42340 }, { "epoch": 0.7645765172361493, "grad_norm": 0.5041878819465637, "learning_rate": 2.6124740346338305e-06, "loss": 0.1915, "step": 42345 }, { "epoch": 0.76466679666905, "grad_norm": 0.36872994899749756, "learning_rate": 2.610562770242372e-06, "loss": 0.24, "step": 42350 }, { "epoch": 0.7647570761019508, "grad_norm": 0.4666585326194763, "learning_rate": 2.6086521002774092e-06, "loss": 0.2747, "step": 42355 }, { "epoch": 0.7648473555348515, "grad_norm": 0.48667874932289124, "learning_rate": 2.6067420248926477e-06, "loss": 0.2137, "step": 42360 }, { "epoch": 0.7649376349677521, "grad_norm": 0.34638234972953796, "learning_rate": 2.6048325442417334e-06, "loss": 0.2217, "step": 42365 }, { "epoch": 0.7650279144006529, "grad_norm": 0.5076397657394409, "learning_rate": 2.6029236584782727e-06, "loss": 0.1999, "step": 42370 }, { "epoch": 0.7651181938335536, "grad_norm": 0.5190479159355164, "learning_rate": 2.6010153677558237e-06, "loss": 0.1606, "step": 42375 }, { "epoch": 0.7652084732664544, "grad_norm": 0.2692440152168274, "learning_rate": 2.599107672227891e-06, "loss": 0.182, "step": 42380 }, { "epoch": 0.765298752699355, "grad_norm": 0.4179719090461731, "learning_rate": 2.5972005720479377e-06, "loss": 0.2905, "step": 42385 }, { "epoch": 0.7653890321322557, "grad_norm": 0.493459016084671, "learning_rate": 2.5952940673693726e-06, "loss": 0.1793, "step": 42390 }, { "epoch": 0.7654793115651565, "grad_norm": 0.45662614703178406, "learning_rate": 2.5933881583455632e-06, "loss": 0.2157, "step": 42395 }, { "epoch": 0.7655695909980572, "grad_norm": 0.8813372254371643, "learning_rate": 2.5914828451298248e-06, "loss": 0.2523, "step": 42400 }, { "epoch": 0.765659870430958, "grad_norm": 0.6023253798484802, "learning_rate": 2.5895781278754286e-06, "loss": 0.253, "step": 42405 }, { "epoch": 0.7657501498638586, "grad_norm": 0.546947717666626, "learning_rate": 2.587674006735591e-06, "loss": 0.2306, "step": 42410 }, { "epoch": 0.7658404292967593, "grad_norm": 0.48942071199417114, "learning_rate": 2.585770481863489e-06, "loss": 0.3523, "step": 42415 }, { "epoch": 0.7659307087296601, "grad_norm": 0.20931553840637207, "learning_rate": 2.5838675534122428e-06, "loss": 0.2231, "step": 42420 }, { "epoch": 0.7660209881625608, "grad_norm": 1.144301414489746, "learning_rate": 2.5819652215349334e-06, "loss": 0.303, "step": 42425 }, { "epoch": 0.7661112675954614, "grad_norm": 0.4387619197368622, "learning_rate": 2.580063486384585e-06, "loss": 0.2749, "step": 42430 }, { "epoch": 0.7662015470283622, "grad_norm": 0.5536942481994629, "learning_rate": 2.5781623481141817e-06, "loss": 0.2277, "step": 42435 }, { "epoch": 0.7662918264612629, "grad_norm": 0.32391244173049927, "learning_rate": 2.5762618068766553e-06, "loss": 0.1775, "step": 42440 }, { "epoch": 0.7663821058941637, "grad_norm": 0.2672235369682312, "learning_rate": 2.5743618628248934e-06, "loss": 0.2203, "step": 42445 }, { "epoch": 0.7664723853270643, "grad_norm": 0.4057731628417969, "learning_rate": 2.5724625161117265e-06, "loss": 0.2012, "step": 42450 }, { "epoch": 0.766562664759965, "grad_norm": 0.410159170627594, "learning_rate": 2.570563766889951e-06, "loss": 0.254, "step": 42455 }, { "epoch": 0.7666529441928658, "grad_norm": 0.5925469398498535, "learning_rate": 2.568665615312299e-06, "loss": 0.2442, "step": 42460 }, { "epoch": 0.7667432236257665, "grad_norm": 1.049332857131958, "learning_rate": 2.5667680615314685e-06, "loss": 0.2323, "step": 42465 }, { "epoch": 0.7668335030586672, "grad_norm": 0.5431271195411682, "learning_rate": 2.5648711057001052e-06, "loss": 0.1807, "step": 42470 }, { "epoch": 0.7669237824915679, "grad_norm": 0.33623602986335754, "learning_rate": 2.562974747970799e-06, "loss": 0.248, "step": 42475 }, { "epoch": 0.7670140619244686, "grad_norm": 0.5575045347213745, "learning_rate": 2.5610789884961063e-06, "loss": 0.1661, "step": 42480 }, { "epoch": 0.7671043413573694, "grad_norm": 0.46629127860069275, "learning_rate": 2.5591838274285185e-06, "loss": 0.2999, "step": 42485 }, { "epoch": 0.76719462079027, "grad_norm": 0.3633790612220764, "learning_rate": 2.557289264920495e-06, "loss": 0.2391, "step": 42490 }, { "epoch": 0.7672849002231708, "grad_norm": 0.2716864049434662, "learning_rate": 2.5553953011244337e-06, "loss": 0.2262, "step": 42495 }, { "epoch": 0.7673751796560715, "grad_norm": 0.5711254477500916, "learning_rate": 2.5535019361926937e-06, "loss": 0.1842, "step": 42500 }, { "epoch": 0.7674654590889722, "grad_norm": 0.5874897241592407, "learning_rate": 2.55160917027758e-06, "loss": 0.2661, "step": 42505 }, { "epoch": 0.7675557385218729, "grad_norm": 0.30902114510536194, "learning_rate": 2.549717003531358e-06, "loss": 0.2317, "step": 42510 }, { "epoch": 0.7676460179547736, "grad_norm": 0.47627681493759155, "learning_rate": 2.5478254361062306e-06, "loss": 0.1844, "step": 42515 }, { "epoch": 0.7677362973876743, "grad_norm": 0.8980509638786316, "learning_rate": 2.5459344681543672e-06, "loss": 0.2205, "step": 42520 }, { "epoch": 0.7678265768205751, "grad_norm": 0.516670286655426, "learning_rate": 2.544044099827877e-06, "loss": 0.2158, "step": 42525 }, { "epoch": 0.7679168562534757, "grad_norm": 0.4446510970592499, "learning_rate": 2.542154331278829e-06, "loss": 0.2529, "step": 42530 }, { "epoch": 0.7680071356863765, "grad_norm": 0.17722104489803314, "learning_rate": 2.540265162659241e-06, "loss": 0.1482, "step": 42535 }, { "epoch": 0.7680974151192772, "grad_norm": 0.4235355257987976, "learning_rate": 2.538376594121086e-06, "loss": 0.211, "step": 42540 }, { "epoch": 0.768187694552178, "grad_norm": 0.38903820514678955, "learning_rate": 2.53648862581628e-06, "loss": 0.2324, "step": 42545 }, { "epoch": 0.7682779739850786, "grad_norm": 0.2501767873764038, "learning_rate": 2.5346012578967017e-06, "loss": 0.3024, "step": 42550 }, { "epoch": 0.7683682534179793, "grad_norm": 0.5181525945663452, "learning_rate": 2.5327144905141733e-06, "loss": 0.272, "step": 42555 }, { "epoch": 0.7684585328508801, "grad_norm": 0.5097023844718933, "learning_rate": 2.530828323820469e-06, "loss": 0.1868, "step": 42560 }, { "epoch": 0.7685488122837808, "grad_norm": 0.8557437658309937, "learning_rate": 2.528942757967321e-06, "loss": 0.225, "step": 42565 }, { "epoch": 0.7686390917166814, "grad_norm": 0.49657967686653137, "learning_rate": 2.527057793106407e-06, "loss": 0.1764, "step": 42570 }, { "epoch": 0.7687293711495822, "grad_norm": 0.6104001998901367, "learning_rate": 2.5251734293893637e-06, "loss": 0.2147, "step": 42575 }, { "epoch": 0.7688196505824829, "grad_norm": 0.5121384859085083, "learning_rate": 2.5232896669677674e-06, "loss": 0.2092, "step": 42580 }, { "epoch": 0.7689099300153837, "grad_norm": 0.276767760515213, "learning_rate": 2.521406505993159e-06, "loss": 0.2346, "step": 42585 }, { "epoch": 0.7690002094482843, "grad_norm": 0.20188720524311066, "learning_rate": 2.5195239466170207e-06, "loss": 0.1299, "step": 42590 }, { "epoch": 0.769090488881185, "grad_norm": 0.5935357213020325, "learning_rate": 2.517641988990791e-06, "loss": 0.2915, "step": 42595 }, { "epoch": 0.7691807683140858, "grad_norm": 0.4150652587413788, "learning_rate": 2.5157606332658624e-06, "loss": 0.232, "step": 42600 }, { "epoch": 0.7692710477469865, "grad_norm": 0.5176143050193787, "learning_rate": 2.513879879593578e-06, "loss": 0.2345, "step": 42605 }, { "epoch": 0.7693613271798871, "grad_norm": 0.4309931695461273, "learning_rate": 2.511999728125225e-06, "loss": 0.1915, "step": 42610 }, { "epoch": 0.7694516066127879, "grad_norm": 0.45065101981163025, "learning_rate": 2.5101201790120545e-06, "loss": 0.2002, "step": 42615 }, { "epoch": 0.7695418860456886, "grad_norm": 0.49554944038391113, "learning_rate": 2.5082412324052553e-06, "loss": 0.1543, "step": 42620 }, { "epoch": 0.7696321654785894, "grad_norm": 0.48984622955322266, "learning_rate": 2.506362888455981e-06, "loss": 0.2093, "step": 42625 }, { "epoch": 0.76972244491149, "grad_norm": 0.5073429942131042, "learning_rate": 2.504485147315323e-06, "loss": 0.2177, "step": 42630 }, { "epoch": 0.7698127243443907, "grad_norm": 0.4508723020553589, "learning_rate": 2.502608009134343e-06, "loss": 0.1841, "step": 42635 }, { "epoch": 0.7699030037772915, "grad_norm": 0.2888708710670471, "learning_rate": 2.5007314740640375e-06, "loss": 0.2321, "step": 42640 }, { "epoch": 0.7699932832101922, "grad_norm": 0.5804747343063354, "learning_rate": 2.498855542255356e-06, "loss": 0.2339, "step": 42645 }, { "epoch": 0.7700835626430929, "grad_norm": 0.742763102054596, "learning_rate": 2.4969802138592125e-06, "loss": 0.1643, "step": 42650 }, { "epoch": 0.7701738420759936, "grad_norm": 0.4516551196575165, "learning_rate": 2.4951054890264536e-06, "loss": 0.2385, "step": 42655 }, { "epoch": 0.7702641215088943, "grad_norm": 0.30727094411849976, "learning_rate": 2.4932313679078936e-06, "loss": 0.2728, "step": 42660 }, { "epoch": 0.7703544009417951, "grad_norm": 0.5855993032455444, "learning_rate": 2.49135785065429e-06, "loss": 0.2481, "step": 42665 }, { "epoch": 0.7704446803746957, "grad_norm": 0.23302792012691498, "learning_rate": 2.4894849374163577e-06, "loss": 0.2134, "step": 42670 }, { "epoch": 0.7705349598075965, "grad_norm": 0.5293843150138855, "learning_rate": 2.4876126283447523e-06, "loss": 0.2389, "step": 42675 }, { "epoch": 0.7706252392404972, "grad_norm": 0.49191197752952576, "learning_rate": 2.4857409235900932e-06, "loss": 0.2098, "step": 42680 }, { "epoch": 0.7707155186733979, "grad_norm": 0.7379592657089233, "learning_rate": 2.48386982330294e-06, "loss": 0.2333, "step": 42685 }, { "epoch": 0.7708057981062986, "grad_norm": 0.483277827501297, "learning_rate": 2.481999327633815e-06, "loss": 0.1944, "step": 42690 }, { "epoch": 0.7708960775391993, "grad_norm": 0.425761342048645, "learning_rate": 2.4801294367331763e-06, "loss": 0.2939, "step": 42695 }, { "epoch": 0.7709863569721, "grad_norm": 0.3765316307544708, "learning_rate": 2.478260150751457e-06, "loss": 0.2718, "step": 42700 }, { "epoch": 0.7710766364050008, "grad_norm": 0.3427625894546509, "learning_rate": 2.4763914698390166e-06, "loss": 0.2891, "step": 42705 }, { "epoch": 0.7711669158379014, "grad_norm": 0.5614849925041199, "learning_rate": 2.474523394146183e-06, "loss": 0.2864, "step": 42710 }, { "epoch": 0.7712571952708022, "grad_norm": 0.5848512649536133, "learning_rate": 2.472655923823225e-06, "loss": 0.2946, "step": 42715 }, { "epoch": 0.7713474747037029, "grad_norm": 1.3261486291885376, "learning_rate": 2.470789059020371e-06, "loss": 0.2106, "step": 42720 }, { "epoch": 0.7714377541366036, "grad_norm": 0.36234983801841736, "learning_rate": 2.4689227998877917e-06, "loss": 0.1517, "step": 42725 }, { "epoch": 0.7715280335695043, "grad_norm": 0.3489014804363251, "learning_rate": 2.4670571465756167e-06, "loss": 0.2127, "step": 42730 }, { "epoch": 0.771618313002405, "grad_norm": 0.49692797660827637, "learning_rate": 2.465192099233927e-06, "loss": 0.2127, "step": 42735 }, { "epoch": 0.7717085924353058, "grad_norm": 1.3462014198303223, "learning_rate": 2.4633276580127474e-06, "loss": 0.3143, "step": 42740 }, { "epoch": 0.7717988718682065, "grad_norm": 0.47123292088508606, "learning_rate": 2.4614638230620645e-06, "loss": 0.2834, "step": 42745 }, { "epoch": 0.7718891513011071, "grad_norm": 0.6388853788375854, "learning_rate": 2.4596005945318024e-06, "loss": 0.1828, "step": 42750 }, { "epoch": 0.7719794307340079, "grad_norm": 0.3754936456680298, "learning_rate": 2.457737972571852e-06, "loss": 0.1666, "step": 42755 }, { "epoch": 0.7720697101669086, "grad_norm": 0.5376711487770081, "learning_rate": 2.455875957332041e-06, "loss": 0.2344, "step": 42760 }, { "epoch": 0.7721599895998094, "grad_norm": 0.2916709780693054, "learning_rate": 2.4540145489621582e-06, "loss": 0.2351, "step": 42765 }, { "epoch": 0.77225026903271, "grad_norm": 0.28260645270347595, "learning_rate": 2.4521537476119407e-06, "loss": 0.2395, "step": 42770 }, { "epoch": 0.7723405484656107, "grad_norm": 0.5174070596694946, "learning_rate": 2.450293553431078e-06, "loss": 0.2205, "step": 42775 }, { "epoch": 0.7724308278985115, "grad_norm": 0.46815547347068787, "learning_rate": 2.4484339665692048e-06, "loss": 0.1975, "step": 42780 }, { "epoch": 0.7725211073314122, "grad_norm": 0.5334517359733582, "learning_rate": 2.446574987175917e-06, "loss": 0.2686, "step": 42785 }, { "epoch": 0.7726113867643128, "grad_norm": 0.4952988028526306, "learning_rate": 2.4447166154007494e-06, "loss": 0.3388, "step": 42790 }, { "epoch": 0.7727016661972136, "grad_norm": 0.4773997366428375, "learning_rate": 2.4428588513931985e-06, "loss": 0.314, "step": 42795 }, { "epoch": 0.7727919456301143, "grad_norm": 0.40349653363227844, "learning_rate": 2.441001695302707e-06, "loss": 0.2495, "step": 42800 }, { "epoch": 0.7728822250630151, "grad_norm": 0.4642648994922638, "learning_rate": 2.439145147278673e-06, "loss": 0.2607, "step": 42805 }, { "epoch": 0.7729725044959158, "grad_norm": 0.5170674324035645, "learning_rate": 2.4372892074704356e-06, "loss": 0.1919, "step": 42810 }, { "epoch": 0.7730627839288164, "grad_norm": 0.46825897693634033, "learning_rate": 2.4354338760272987e-06, "loss": 0.1711, "step": 42815 }, { "epoch": 0.7731530633617172, "grad_norm": 0.6066874861717224, "learning_rate": 2.4335791530985044e-06, "loss": 0.2502, "step": 42820 }, { "epoch": 0.7732433427946179, "grad_norm": 0.37474513053894043, "learning_rate": 2.431725038833256e-06, "loss": 0.3085, "step": 42825 }, { "epoch": 0.7733336222275187, "grad_norm": 0.27002474665641785, "learning_rate": 2.4298715333806987e-06, "loss": 0.2874, "step": 42830 }, { "epoch": 0.7734239016604193, "grad_norm": 0.2877873182296753, "learning_rate": 2.428018636889936e-06, "loss": 0.1283, "step": 42835 }, { "epoch": 0.77351418109332, "grad_norm": 0.5184369683265686, "learning_rate": 2.426166349510024e-06, "loss": 0.2457, "step": 42840 }, { "epoch": 0.7736044605262208, "grad_norm": 0.706448495388031, "learning_rate": 2.424314671389959e-06, "loss": 0.239, "step": 42845 }, { "epoch": 0.7736947399591215, "grad_norm": 0.27848494052886963, "learning_rate": 2.422463602678701e-06, "loss": 0.188, "step": 42850 }, { "epoch": 0.7737850193920222, "grad_norm": 0.711946964263916, "learning_rate": 2.420613143525149e-06, "loss": 0.2974, "step": 42855 }, { "epoch": 0.7738752988249229, "grad_norm": 0.28272512555122375, "learning_rate": 2.4187632940781626e-06, "loss": 0.1606, "step": 42860 }, { "epoch": 0.7739655782578236, "grad_norm": 0.5454052090644836, "learning_rate": 2.416914054486549e-06, "loss": 0.2318, "step": 42865 }, { "epoch": 0.7740558576907244, "grad_norm": 0.4498244524002075, "learning_rate": 2.415065424899067e-06, "loss": 0.2162, "step": 42870 }, { "epoch": 0.774146137123625, "grad_norm": 0.4151298999786377, "learning_rate": 2.413217405464421e-06, "loss": 0.2594, "step": 42875 }, { "epoch": 0.7742364165565258, "grad_norm": 0.40834593772888184, "learning_rate": 2.4113699963312766e-06, "loss": 0.221, "step": 42880 }, { "epoch": 0.7743266959894265, "grad_norm": 0.7377216815948486, "learning_rate": 2.409523197648238e-06, "loss": 0.204, "step": 42885 }, { "epoch": 0.7744169754223272, "grad_norm": 0.7599356770515442, "learning_rate": 2.4076770095638734e-06, "loss": 0.2129, "step": 42890 }, { "epoch": 0.7745072548552279, "grad_norm": 0.5863339900970459, "learning_rate": 2.4058314322266863e-06, "loss": 0.1681, "step": 42895 }, { "epoch": 0.7745975342881286, "grad_norm": 0.4008416533470154, "learning_rate": 2.403986465785151e-06, "loss": 0.3034, "step": 42900 }, { "epoch": 0.7746878137210294, "grad_norm": 0.3215121626853943, "learning_rate": 2.402142110387673e-06, "loss": 0.249, "step": 42905 }, { "epoch": 0.7747780931539301, "grad_norm": 0.4487439692020416, "learning_rate": 2.400298366182624e-06, "loss": 0.2336, "step": 42910 }, { "epoch": 0.7748683725868307, "grad_norm": 0.7740156054496765, "learning_rate": 2.3984552333183164e-06, "loss": 0.3046, "step": 42915 }, { "epoch": 0.7749586520197315, "grad_norm": 0.3982470631599426, "learning_rate": 2.396612711943013e-06, "loss": 0.2324, "step": 42920 }, { "epoch": 0.7750489314526322, "grad_norm": 0.9876501560211182, "learning_rate": 2.3947708022049355e-06, "loss": 0.2852, "step": 42925 }, { "epoch": 0.775139210885533, "grad_norm": 0.3493773639202118, "learning_rate": 2.3929295042522514e-06, "loss": 0.1799, "step": 42930 }, { "epoch": 0.7752294903184336, "grad_norm": 0.7048976421356201, "learning_rate": 2.391088818233083e-06, "loss": 0.2382, "step": 42935 }, { "epoch": 0.7753197697513343, "grad_norm": 1.0359954833984375, "learning_rate": 2.3892487442954935e-06, "loss": 0.192, "step": 42940 }, { "epoch": 0.7754100491842351, "grad_norm": 0.30253368616104126, "learning_rate": 2.3874092825875107e-06, "loss": 0.2552, "step": 42945 }, { "epoch": 0.7755003286171358, "grad_norm": 0.3940982222557068, "learning_rate": 2.3855704332570994e-06, "loss": 0.2129, "step": 42950 }, { "epoch": 0.7755906080500364, "grad_norm": 0.3559393286705017, "learning_rate": 2.3837321964521865e-06, "loss": 0.2135, "step": 42955 }, { "epoch": 0.7756808874829372, "grad_norm": 0.730930745601654, "learning_rate": 2.3818945723206378e-06, "loss": 0.1775, "step": 42960 }, { "epoch": 0.7757711669158379, "grad_norm": 0.4467575252056122, "learning_rate": 2.380057561010287e-06, "loss": 0.3191, "step": 42965 }, { "epoch": 0.7758614463487387, "grad_norm": 0.36840131878852844, "learning_rate": 2.3782211626689013e-06, "loss": 0.3227, "step": 42970 }, { "epoch": 0.7759517257816393, "grad_norm": 0.47732090950012207, "learning_rate": 2.376385377444209e-06, "loss": 0.2419, "step": 42975 }, { "epoch": 0.77604200521454, "grad_norm": 0.32633882761001587, "learning_rate": 2.374550205483881e-06, "loss": 0.2945, "step": 42980 }, { "epoch": 0.7761322846474408, "grad_norm": 0.6281416416168213, "learning_rate": 2.3727156469355504e-06, "loss": 0.304, "step": 42985 }, { "epoch": 0.7762225640803415, "grad_norm": 0.38120004534721375, "learning_rate": 2.3708817019467868e-06, "loss": 0.1746, "step": 42990 }, { "epoch": 0.7763128435132421, "grad_norm": 0.39160090684890747, "learning_rate": 2.3690483706651225e-06, "loss": 0.2276, "step": 42995 }, { "epoch": 0.7764031229461429, "grad_norm": 0.40250253677368164, "learning_rate": 2.367215653238033e-06, "loss": 0.2698, "step": 43000 }, { "epoch": 0.7764934023790436, "grad_norm": 0.3191860318183899, "learning_rate": 2.365383549812952e-06, "loss": 0.2482, "step": 43005 }, { "epoch": 0.7765836818119444, "grad_norm": 0.5613829493522644, "learning_rate": 2.363552060537255e-06, "loss": 0.1981, "step": 43010 }, { "epoch": 0.776673961244845, "grad_norm": 0.38162267208099365, "learning_rate": 2.36172118555827e-06, "loss": 0.2371, "step": 43015 }, { "epoch": 0.7767642406777457, "grad_norm": 0.3718111515045166, "learning_rate": 2.3598909250232816e-06, "loss": 0.1705, "step": 43020 }, { "epoch": 0.7768545201106465, "grad_norm": 0.7747634649276733, "learning_rate": 2.3580612790795154e-06, "loss": 0.2347, "step": 43025 }, { "epoch": 0.7769447995435472, "grad_norm": 0.41002294421195984, "learning_rate": 2.356232247874162e-06, "loss": 0.2013, "step": 43030 }, { "epoch": 0.7770350789764479, "grad_norm": 0.4790644347667694, "learning_rate": 2.354403831554345e-06, "loss": 0.1949, "step": 43035 }, { "epoch": 0.7771253584093486, "grad_norm": 0.23889146745204926, "learning_rate": 2.352576030267154e-06, "loss": 0.2301, "step": 43040 }, { "epoch": 0.7772156378422493, "grad_norm": 0.3431209325790405, "learning_rate": 2.350748844159617e-06, "loss": 0.227, "step": 43045 }, { "epoch": 0.7773059172751501, "grad_norm": 0.34881192445755005, "learning_rate": 2.348922273378723e-06, "loss": 0.2195, "step": 43050 }, { "epoch": 0.7773961967080507, "grad_norm": 0.4788563549518585, "learning_rate": 2.3470963180714e-06, "loss": 0.1821, "step": 43055 }, { "epoch": 0.7774864761409515, "grad_norm": 0.488158255815506, "learning_rate": 2.3452709783845375e-06, "loss": 0.1377, "step": 43060 }, { "epoch": 0.7775767555738522, "grad_norm": 0.5387958884239197, "learning_rate": 2.3434462544649683e-06, "loss": 0.255, "step": 43065 }, { "epoch": 0.7776670350067529, "grad_norm": 0.4991535246372223, "learning_rate": 2.3416221464594826e-06, "loss": 0.3212, "step": 43070 }, { "epoch": 0.7777573144396536, "grad_norm": 0.4338509738445282, "learning_rate": 2.33979865451481e-06, "loss": 0.166, "step": 43075 }, { "epoch": 0.7778475938725543, "grad_norm": 0.4558165669441223, "learning_rate": 2.3379757787776435e-06, "loss": 0.2285, "step": 43080 }, { "epoch": 0.777937873305455, "grad_norm": 0.42054474353790283, "learning_rate": 2.336153519394615e-06, "loss": 0.2826, "step": 43085 }, { "epoch": 0.7780281527383558, "grad_norm": 0.4690871834754944, "learning_rate": 2.3343318765123146e-06, "loss": 0.1927, "step": 43090 }, { "epoch": 0.7781184321712564, "grad_norm": 0.22620423138141632, "learning_rate": 2.33251085027728e-06, "loss": 0.2258, "step": 43095 }, { "epoch": 0.7782087116041572, "grad_norm": 0.343610554933548, "learning_rate": 2.330690440836001e-06, "loss": 0.2006, "step": 43100 }, { "epoch": 0.7782989910370579, "grad_norm": 0.2786353528499603, "learning_rate": 2.3288706483349157e-06, "loss": 0.2464, "step": 43105 }, { "epoch": 0.7783892704699586, "grad_norm": 0.22044533491134644, "learning_rate": 2.3270514729204096e-06, "loss": 0.2396, "step": 43110 }, { "epoch": 0.7784795499028593, "grad_norm": 0.46998295187950134, "learning_rate": 2.3252329147388274e-06, "loss": 0.2202, "step": 43115 }, { "epoch": 0.77856982933576, "grad_norm": 0.4731232821941376, "learning_rate": 2.323414973936453e-06, "loss": 0.2087, "step": 43120 }, { "epoch": 0.7786601087686608, "grad_norm": 0.45158910751342773, "learning_rate": 2.3215976506595296e-06, "loss": 0.2806, "step": 43125 }, { "epoch": 0.7787503882015615, "grad_norm": 0.4839245676994324, "learning_rate": 2.3197809450542487e-06, "loss": 0.1251, "step": 43130 }, { "epoch": 0.7788406676344621, "grad_norm": 1.5681735277175903, "learning_rate": 2.3179648572667522e-06, "loss": 0.2691, "step": 43135 }, { "epoch": 0.7789309470673629, "grad_norm": 0.4585834741592407, "learning_rate": 2.3161493874431272e-06, "loss": 0.1773, "step": 43140 }, { "epoch": 0.7790212265002636, "grad_norm": 0.29608702659606934, "learning_rate": 2.314334535729419e-06, "loss": 0.1347, "step": 43145 }, { "epoch": 0.7791115059331644, "grad_norm": 0.5196985602378845, "learning_rate": 2.3125203022716147e-06, "loss": 0.1933, "step": 43150 }, { "epoch": 0.779201785366065, "grad_norm": 0.4159768223762512, "learning_rate": 2.3107066872156592e-06, "loss": 0.2573, "step": 43155 }, { "epoch": 0.7792920647989657, "grad_norm": 0.6043077707290649, "learning_rate": 2.3088936907074434e-06, "loss": 0.3016, "step": 43160 }, { "epoch": 0.7793823442318665, "grad_norm": 0.3892230689525604, "learning_rate": 2.3070813128928137e-06, "loss": 0.179, "step": 43165 }, { "epoch": 0.7794726236647672, "grad_norm": 0.35871732234954834, "learning_rate": 2.3052695539175562e-06, "loss": 0.2789, "step": 43170 }, { "epoch": 0.7795629030976678, "grad_norm": 0.5863191485404968, "learning_rate": 2.303458413927421e-06, "loss": 0.2052, "step": 43175 }, { "epoch": 0.7796531825305686, "grad_norm": 0.6840908527374268, "learning_rate": 2.301647893068093e-06, "loss": 0.1901, "step": 43180 }, { "epoch": 0.7797434619634693, "grad_norm": 0.6739901304244995, "learning_rate": 2.299837991485223e-06, "loss": 0.3192, "step": 43185 }, { "epoch": 0.7798337413963701, "grad_norm": 0.6026039719581604, "learning_rate": 2.298028709324398e-06, "loss": 0.1811, "step": 43190 }, { "epoch": 0.7799240208292708, "grad_norm": 0.7208412885665894, "learning_rate": 2.2962200467311646e-06, "loss": 0.2616, "step": 43195 }, { "epoch": 0.7800143002621714, "grad_norm": 0.7069392204284668, "learning_rate": 2.2944120038510187e-06, "loss": 0.1668, "step": 43200 }, { "epoch": 0.7801045796950722, "grad_norm": 0.94843989610672, "learning_rate": 2.2926045808294007e-06, "loss": 0.2501, "step": 43205 }, { "epoch": 0.7801948591279729, "grad_norm": 0.47515806555747986, "learning_rate": 2.2907977778117075e-06, "loss": 0.3316, "step": 43210 }, { "epoch": 0.7802851385608737, "grad_norm": 0.11298256367444992, "learning_rate": 2.2889915949432794e-06, "loss": 0.2556, "step": 43215 }, { "epoch": 0.7803754179937743, "grad_norm": 0.2375980019569397, "learning_rate": 2.287186032369416e-06, "loss": 0.2101, "step": 43220 }, { "epoch": 0.780465697426675, "grad_norm": 0.4574558138847351, "learning_rate": 2.2853810902353523e-06, "loss": 0.234, "step": 43225 }, { "epoch": 0.7805559768595758, "grad_norm": 0.4236000180244446, "learning_rate": 2.283576768686296e-06, "loss": 0.2537, "step": 43230 }, { "epoch": 0.7806462562924765, "grad_norm": 0.26832252740859985, "learning_rate": 2.281773067867381e-06, "loss": 0.1987, "step": 43235 }, { "epoch": 0.7807365357253772, "grad_norm": 0.7041072249412537, "learning_rate": 2.279969987923709e-06, "loss": 0.208, "step": 43240 }, { "epoch": 0.7808268151582779, "grad_norm": 0.41026878356933594, "learning_rate": 2.2781675290003192e-06, "loss": 0.176, "step": 43245 }, { "epoch": 0.7809170945911786, "grad_norm": 0.22822080552577972, "learning_rate": 2.2763656912422104e-06, "loss": 0.2001, "step": 43250 }, { "epoch": 0.7810073740240794, "grad_norm": 0.5400646328926086, "learning_rate": 2.274564474794324e-06, "loss": 0.2427, "step": 43255 }, { "epoch": 0.78109765345698, "grad_norm": 0.5550546646118164, "learning_rate": 2.272763879801555e-06, "loss": 0.194, "step": 43260 }, { "epoch": 0.7811879328898808, "grad_norm": 0.6339682340621948, "learning_rate": 2.27096390640875e-06, "loss": 0.3327, "step": 43265 }, { "epoch": 0.7812782123227815, "grad_norm": 0.2583569586277008, "learning_rate": 2.2691645547607057e-06, "loss": 0.2188, "step": 43270 }, { "epoch": 0.7813684917556822, "grad_norm": 0.6766750812530518, "learning_rate": 2.2673658250021613e-06, "loss": 0.215, "step": 43275 }, { "epoch": 0.7814587711885829, "grad_norm": 0.5420390367507935, "learning_rate": 2.265567717277818e-06, "loss": 0.3132, "step": 43280 }, { "epoch": 0.7815490506214836, "grad_norm": 0.6376538872718811, "learning_rate": 2.2637702317323175e-06, "loss": 0.1995, "step": 43285 }, { "epoch": 0.7816393300543844, "grad_norm": 0.5242908596992493, "learning_rate": 2.261973368510247e-06, "loss": 0.2295, "step": 43290 }, { "epoch": 0.7817296094872851, "grad_norm": 0.3954088091850281, "learning_rate": 2.2601771277561646e-06, "loss": 0.246, "step": 43295 }, { "epoch": 0.7818198889201857, "grad_norm": 0.5970197319984436, "learning_rate": 2.2583815096145554e-06, "loss": 0.1949, "step": 43300 }, { "epoch": 0.7819101683530865, "grad_norm": 0.4973948001861572, "learning_rate": 2.2565865142298705e-06, "loss": 0.2085, "step": 43305 }, { "epoch": 0.7820004477859872, "grad_norm": 0.40069374442100525, "learning_rate": 2.254792141746496e-06, "loss": 0.1581, "step": 43310 }, { "epoch": 0.782090727218888, "grad_norm": 0.4839843213558197, "learning_rate": 2.252998392308784e-06, "loss": 0.2285, "step": 43315 }, { "epoch": 0.7821810066517886, "grad_norm": 0.3791256546974182, "learning_rate": 2.2512052660610228e-06, "loss": 0.198, "step": 43320 }, { "epoch": 0.7822712860846893, "grad_norm": 0.17784009873867035, "learning_rate": 2.249412763147458e-06, "loss": 0.2166, "step": 43325 }, { "epoch": 0.7823615655175901, "grad_norm": 0.8475596308708191, "learning_rate": 2.2476208837122848e-06, "loss": 0.2128, "step": 43330 }, { "epoch": 0.7824518449504908, "grad_norm": 0.27115878462791443, "learning_rate": 2.2458296278996495e-06, "loss": 0.2006, "step": 43335 }, { "epoch": 0.7825421243833914, "grad_norm": 0.7656043767929077, "learning_rate": 2.244038995853639e-06, "loss": 0.2685, "step": 43340 }, { "epoch": 0.7826324038162922, "grad_norm": 0.6903694868087769, "learning_rate": 2.242248987718303e-06, "loss": 0.2675, "step": 43345 }, { "epoch": 0.7827226832491929, "grad_norm": 0.5449568033218384, "learning_rate": 2.240459603637628e-06, "loss": 0.241, "step": 43350 }, { "epoch": 0.7828129626820937, "grad_norm": 0.3075452148914337, "learning_rate": 2.2386708437555615e-06, "loss": 0.2468, "step": 43355 }, { "epoch": 0.7829032421149943, "grad_norm": 0.6332199573516846, "learning_rate": 2.2368827082159948e-06, "loss": 0.2719, "step": 43360 }, { "epoch": 0.782993521547895, "grad_norm": 0.7227048277854919, "learning_rate": 2.235095197162774e-06, "loss": 0.2386, "step": 43365 }, { "epoch": 0.7830838009807958, "grad_norm": 0.4199582040309906, "learning_rate": 2.2333083107396856e-06, "loss": 0.2172, "step": 43370 }, { "epoch": 0.7831740804136965, "grad_norm": 0.5018370747566223, "learning_rate": 2.2315220490904775e-06, "loss": 0.2905, "step": 43375 }, { "epoch": 0.7832643598465971, "grad_norm": 0.5064613223075867, "learning_rate": 2.229736412358837e-06, "loss": 0.1765, "step": 43380 }, { "epoch": 0.7833546392794979, "grad_norm": 0.2941718101501465, "learning_rate": 2.2279514006884053e-06, "loss": 0.1625, "step": 43385 }, { "epoch": 0.7834449187123986, "grad_norm": 0.7269048690795898, "learning_rate": 2.2261670142227743e-06, "loss": 0.2063, "step": 43390 }, { "epoch": 0.7835351981452994, "grad_norm": 0.3319731056690216, "learning_rate": 2.2243832531054865e-06, "loss": 0.1869, "step": 43395 }, { "epoch": 0.7836254775782, "grad_norm": 0.7761351466178894, "learning_rate": 2.222600117480034e-06, "loss": 0.2278, "step": 43400 }, { "epoch": 0.7837157570111007, "grad_norm": 0.1965448260307312, "learning_rate": 2.2208176074898523e-06, "loss": 0.1648, "step": 43405 }, { "epoch": 0.7838060364440015, "grad_norm": 0.26565489172935486, "learning_rate": 2.2190357232783367e-06, "loss": 0.2174, "step": 43410 }, { "epoch": 0.7838963158769022, "grad_norm": 0.537438690662384, "learning_rate": 2.2172544649888207e-06, "loss": 0.2583, "step": 43415 }, { "epoch": 0.7839865953098029, "grad_norm": 0.49586981534957886, "learning_rate": 2.2154738327645973e-06, "loss": 0.2747, "step": 43420 }, { "epoch": 0.7840768747427036, "grad_norm": 0.38649171590805054, "learning_rate": 2.2136938267489062e-06, "loss": 0.2154, "step": 43425 }, { "epoch": 0.7841671541756043, "grad_norm": 0.6896102428436279, "learning_rate": 2.211914447084936e-06, "loss": 0.2633, "step": 43430 }, { "epoch": 0.7842574336085051, "grad_norm": 0.4301951825618744, "learning_rate": 2.2101356939158226e-06, "loss": 0.2399, "step": 43435 }, { "epoch": 0.7843477130414057, "grad_norm": 0.18921710550785065, "learning_rate": 2.2083575673846557e-06, "loss": 0.1989, "step": 43440 }, { "epoch": 0.7844379924743065, "grad_norm": 0.2111215591430664, "learning_rate": 2.2065800676344704e-06, "loss": 0.1991, "step": 43445 }, { "epoch": 0.7845282719072072, "grad_norm": 0.5096626281738281, "learning_rate": 2.204803194808257e-06, "loss": 0.2655, "step": 43450 }, { "epoch": 0.7846185513401079, "grad_norm": 0.6938595175743103, "learning_rate": 2.2030269490489476e-06, "loss": 0.2023, "step": 43455 }, { "epoch": 0.7847088307730086, "grad_norm": 0.26685193181037903, "learning_rate": 2.201251330499431e-06, "loss": 0.2811, "step": 43460 }, { "epoch": 0.7847991102059093, "grad_norm": 0.47773346304893494, "learning_rate": 2.199476339302542e-06, "loss": 0.2015, "step": 43465 }, { "epoch": 0.78488938963881, "grad_norm": 0.3084557354450226, "learning_rate": 2.1977019756010688e-06, "loss": 0.169, "step": 43470 }, { "epoch": 0.7849796690717108, "grad_norm": 0.5808871984481812, "learning_rate": 2.1959282395377444e-06, "loss": 0.2921, "step": 43475 }, { "epoch": 0.7850699485046114, "grad_norm": 0.400627076625824, "learning_rate": 2.194155131255249e-06, "loss": 0.1515, "step": 43480 }, { "epoch": 0.7851602279375122, "grad_norm": 0.489260733127594, "learning_rate": 2.192382650896219e-06, "loss": 0.224, "step": 43485 }, { "epoch": 0.7852505073704129, "grad_norm": 0.36692380905151367, "learning_rate": 2.1906107986032376e-06, "loss": 0.2647, "step": 43490 }, { "epoch": 0.7853407868033137, "grad_norm": 0.3548275828361511, "learning_rate": 2.1888395745188406e-06, "loss": 0.1723, "step": 43495 }, { "epoch": 0.7854310662362143, "grad_norm": 0.6279739141464233, "learning_rate": 2.187068978785505e-06, "loss": 0.1765, "step": 43500 }, { "epoch": 0.785521345669115, "grad_norm": 0.3587413728237152, "learning_rate": 2.1852990115456662e-06, "loss": 0.1683, "step": 43505 }, { "epoch": 0.7856116251020158, "grad_norm": 0.5421385169029236, "learning_rate": 2.183529672941701e-06, "loss": 0.2902, "step": 43510 }, { "epoch": 0.7857019045349165, "grad_norm": 0.5938059091567993, "learning_rate": 2.1817609631159465e-06, "loss": 0.2155, "step": 43515 }, { "epoch": 0.7857921839678171, "grad_norm": 0.6020984053611755, "learning_rate": 2.179992882210674e-06, "loss": 0.3028, "step": 43520 }, { "epoch": 0.7858824634007179, "grad_norm": 0.26961055397987366, "learning_rate": 2.1782254303681183e-06, "loss": 0.278, "step": 43525 }, { "epoch": 0.7859727428336186, "grad_norm": 0.6371787190437317, "learning_rate": 2.176458607730457e-06, "loss": 0.2311, "step": 43530 }, { "epoch": 0.7860630222665194, "grad_norm": 0.25425082445144653, "learning_rate": 2.1746924144398205e-06, "loss": 0.1772, "step": 43535 }, { "epoch": 0.78615330169942, "grad_norm": 0.23963913321495056, "learning_rate": 2.1729268506382806e-06, "loss": 0.2656, "step": 43540 }, { "epoch": 0.7862435811323207, "grad_norm": 0.7477115988731384, "learning_rate": 2.17116191646787e-06, "loss": 0.2777, "step": 43545 }, { "epoch": 0.7863338605652215, "grad_norm": 0.4997061789035797, "learning_rate": 2.1693976120705597e-06, "loss": 0.2512, "step": 43550 }, { "epoch": 0.7864241399981222, "grad_norm": 0.7205522656440735, "learning_rate": 2.1676339375882783e-06, "loss": 0.1962, "step": 43555 }, { "epoch": 0.7865144194310228, "grad_norm": 0.529256284236908, "learning_rate": 2.1658708931629014e-06, "loss": 0.1789, "step": 43560 }, { "epoch": 0.7866046988639236, "grad_norm": 0.5814422965049744, "learning_rate": 2.1641084789362486e-06, "loss": 0.1929, "step": 43565 }, { "epoch": 0.7866949782968243, "grad_norm": 0.6365538835525513, "learning_rate": 2.1623466950501004e-06, "loss": 0.2114, "step": 43570 }, { "epoch": 0.7867852577297251, "grad_norm": 0.8101227879524231, "learning_rate": 2.1605855416461706e-06, "loss": 0.2193, "step": 43575 }, { "epoch": 0.7868755371626257, "grad_norm": 0.7182210087776184, "learning_rate": 2.1588250188661406e-06, "loss": 0.26, "step": 43580 }, { "epoch": 0.7869658165955264, "grad_norm": 0.6568745374679565, "learning_rate": 2.157065126851623e-06, "loss": 0.2076, "step": 43585 }, { "epoch": 0.7870560960284272, "grad_norm": 0.402112752199173, "learning_rate": 2.155305865744193e-06, "loss": 0.3074, "step": 43590 }, { "epoch": 0.7871463754613279, "grad_norm": 0.2906830608844757, "learning_rate": 2.1535472356853692e-06, "loss": 0.1138, "step": 43595 }, { "epoch": 0.7872366548942287, "grad_norm": 0.4955861568450928, "learning_rate": 2.151789236816624e-06, "loss": 0.1751, "step": 43600 }, { "epoch": 0.7873269343271293, "grad_norm": 0.4004211723804474, "learning_rate": 2.1500318692793697e-06, "loss": 0.1865, "step": 43605 }, { "epoch": 0.78741721376003, "grad_norm": 0.5741631984710693, "learning_rate": 2.1482751332149797e-06, "loss": 0.2453, "step": 43610 }, { "epoch": 0.7875074931929308, "grad_norm": 0.3867337703704834, "learning_rate": 2.1465190287647654e-06, "loss": 0.2174, "step": 43615 }, { "epoch": 0.7875977726258315, "grad_norm": 0.4493885636329651, "learning_rate": 2.1447635560699943e-06, "loss": 0.1979, "step": 43620 }, { "epoch": 0.7876880520587322, "grad_norm": 0.41886889934539795, "learning_rate": 2.1430087152718815e-06, "loss": 0.2179, "step": 43625 }, { "epoch": 0.7877783314916329, "grad_norm": 0.2565055787563324, "learning_rate": 2.1412545065115953e-06, "loss": 0.2582, "step": 43630 }, { "epoch": 0.7878686109245336, "grad_norm": 0.4966196119785309, "learning_rate": 2.139500929930243e-06, "loss": 0.2175, "step": 43635 }, { "epoch": 0.7879588903574344, "grad_norm": 0.2613748610019684, "learning_rate": 2.137747985668892e-06, "loss": 0.1366, "step": 43640 }, { "epoch": 0.788049169790335, "grad_norm": 0.5003235936164856, "learning_rate": 2.1359956738685483e-06, "loss": 0.2165, "step": 43645 }, { "epoch": 0.7881394492232358, "grad_norm": 0.5621291399002075, "learning_rate": 2.1342439946701788e-06, "loss": 0.2396, "step": 43650 }, { "epoch": 0.7882297286561365, "grad_norm": 0.24266287684440613, "learning_rate": 2.132492948214687e-06, "loss": 0.2551, "step": 43655 }, { "epoch": 0.7883200080890372, "grad_norm": 0.46053916215896606, "learning_rate": 2.1307425346429354e-06, "loss": 0.1598, "step": 43660 }, { "epoch": 0.7884102875219379, "grad_norm": 0.32211849093437195, "learning_rate": 2.1289927540957355e-06, "loss": 0.2583, "step": 43665 }, { "epoch": 0.7885005669548386, "grad_norm": 0.19784227013587952, "learning_rate": 2.127243606713838e-06, "loss": 0.1961, "step": 43670 }, { "epoch": 0.7885908463877394, "grad_norm": 0.2373937964439392, "learning_rate": 2.1254950926379537e-06, "loss": 0.1871, "step": 43675 }, { "epoch": 0.7886811258206401, "grad_norm": 0.5469317436218262, "learning_rate": 2.123747212008733e-06, "loss": 0.2107, "step": 43680 }, { "epoch": 0.7887714052535407, "grad_norm": 1.6750993728637695, "learning_rate": 2.1219999649667844e-06, "loss": 0.2939, "step": 43685 }, { "epoch": 0.7888616846864415, "grad_norm": 0.6998043656349182, "learning_rate": 2.12025335165266e-06, "loss": 0.2774, "step": 43690 }, { "epoch": 0.7889519641193422, "grad_norm": 0.37849679589271545, "learning_rate": 2.118507372206864e-06, "loss": 0.1752, "step": 43695 }, { "epoch": 0.789042243552243, "grad_norm": 0.48230743408203125, "learning_rate": 2.116762026769844e-06, "loss": 0.2855, "step": 43700 }, { "epoch": 0.7891325229851436, "grad_norm": 0.4847571849822998, "learning_rate": 2.115017315482004e-06, "loss": 0.1631, "step": 43705 }, { "epoch": 0.7892228024180443, "grad_norm": 0.6108053922653198, "learning_rate": 2.1132732384836896e-06, "loss": 0.2345, "step": 43710 }, { "epoch": 0.7893130818509451, "grad_norm": 0.46877530217170715, "learning_rate": 2.1115297959152036e-06, "loss": 0.1766, "step": 43715 }, { "epoch": 0.7894033612838458, "grad_norm": 0.24572497606277466, "learning_rate": 2.1097869879167887e-06, "loss": 0.2355, "step": 43720 }, { "epoch": 0.7894936407167464, "grad_norm": 0.35887739062309265, "learning_rate": 2.1080448146286426e-06, "loss": 0.2242, "step": 43725 }, { "epoch": 0.7895839201496472, "grad_norm": 0.5892414450645447, "learning_rate": 2.1063032761909107e-06, "loss": 0.2766, "step": 43730 }, { "epoch": 0.7896741995825479, "grad_norm": 0.2584325969219208, "learning_rate": 2.1045623727436916e-06, "loss": 0.1576, "step": 43735 }, { "epoch": 0.7897644790154487, "grad_norm": 0.5412663817405701, "learning_rate": 2.1028221044270204e-06, "loss": 0.2048, "step": 43740 }, { "epoch": 0.7898547584483493, "grad_norm": 0.567855715751648, "learning_rate": 2.101082471380895e-06, "loss": 0.248, "step": 43745 }, { "epoch": 0.78994503788125, "grad_norm": 0.33024871349334717, "learning_rate": 2.0993434737452535e-06, "loss": 0.2433, "step": 43750 }, { "epoch": 0.7900353173141508, "grad_norm": 0.1798364520072937, "learning_rate": 2.097605111659985e-06, "loss": 0.2805, "step": 43755 }, { "epoch": 0.7901255967470515, "grad_norm": 0.7021406292915344, "learning_rate": 2.0958673852649314e-06, "loss": 0.1943, "step": 43760 }, { "epoch": 0.7902158761799521, "grad_norm": 0.5602862238883972, "learning_rate": 2.0941302946998754e-06, "loss": 0.2384, "step": 43765 }, { "epoch": 0.7903061556128529, "grad_norm": 0.43217143416404724, "learning_rate": 2.0923938401045596e-06, "loss": 0.2101, "step": 43770 }, { "epoch": 0.7903964350457536, "grad_norm": 0.30548378825187683, "learning_rate": 2.090658021618662e-06, "loss": 0.2774, "step": 43775 }, { "epoch": 0.7904867144786544, "grad_norm": 0.5456411242485046, "learning_rate": 2.0889228393818238e-06, "loss": 0.3424, "step": 43780 }, { "epoch": 0.790576993911555, "grad_norm": 0.636239230632782, "learning_rate": 2.087188293533621e-06, "loss": 0.206, "step": 43785 }, { "epoch": 0.7906672733444557, "grad_norm": 0.3845883011817932, "learning_rate": 2.0854543842135877e-06, "loss": 0.1681, "step": 43790 }, { "epoch": 0.7907575527773565, "grad_norm": 0.5687711834907532, "learning_rate": 2.0837211115612056e-06, "loss": 0.24, "step": 43795 }, { "epoch": 0.7908478322102572, "grad_norm": 0.38964512944221497, "learning_rate": 2.081988475715905e-06, "loss": 0.2319, "step": 43800 }, { "epoch": 0.7909381116431579, "grad_norm": 0.6546294093132019, "learning_rate": 2.0802564768170597e-06, "loss": 0.2245, "step": 43805 }, { "epoch": 0.7910283910760586, "grad_norm": 1.599808931350708, "learning_rate": 2.078525115004002e-06, "loss": 0.2973, "step": 43810 }, { "epoch": 0.7911186705089593, "grad_norm": 0.3674757182598114, "learning_rate": 2.0767943904160003e-06, "loss": 0.162, "step": 43815 }, { "epoch": 0.7912089499418601, "grad_norm": 0.43936610221862793, "learning_rate": 2.0750643031922833e-06, "loss": 0.2228, "step": 43820 }, { "epoch": 0.7912992293747607, "grad_norm": 0.3835139572620392, "learning_rate": 2.0733348534720233e-06, "loss": 0.1925, "step": 43825 }, { "epoch": 0.7913895088076615, "grad_norm": 0.5503572821617126, "learning_rate": 2.071606041394345e-06, "loss": 0.1709, "step": 43830 }, { "epoch": 0.7914797882405622, "grad_norm": 0.4535362422466278, "learning_rate": 2.0698778670983144e-06, "loss": 0.3186, "step": 43835 }, { "epoch": 0.7915700676734629, "grad_norm": 0.2782318592071533, "learning_rate": 2.0681503307229502e-06, "loss": 0.2229, "step": 43840 }, { "epoch": 0.7916603471063636, "grad_norm": 0.5089423656463623, "learning_rate": 2.066423432407224e-06, "loss": 0.1557, "step": 43845 }, { "epoch": 0.7917506265392643, "grad_norm": 0.3967224657535553, "learning_rate": 2.0646971722900476e-06, "loss": 0.2635, "step": 43850 }, { "epoch": 0.791840905972165, "grad_norm": 0.20601853728294373, "learning_rate": 2.0629715505102886e-06, "loss": 0.2717, "step": 43855 }, { "epoch": 0.7919311854050658, "grad_norm": 0.5291382670402527, "learning_rate": 2.0612465672067606e-06, "loss": 0.373, "step": 43860 }, { "epoch": 0.7920214648379664, "grad_norm": 0.42321300506591797, "learning_rate": 2.0595222225182286e-06, "loss": 0.2309, "step": 43865 }, { "epoch": 0.7921117442708672, "grad_norm": 0.24983927607536316, "learning_rate": 2.057798516583398e-06, "loss": 0.1551, "step": 43870 }, { "epoch": 0.7922020237037679, "grad_norm": 0.4658087491989136, "learning_rate": 2.056075449540934e-06, "loss": 0.2469, "step": 43875 }, { "epoch": 0.7922923031366687, "grad_norm": 0.501194179058075, "learning_rate": 2.0543530215294396e-06, "loss": 0.2513, "step": 43880 }, { "epoch": 0.7923825825695693, "grad_norm": 0.21056383848190308, "learning_rate": 2.0526312326874742e-06, "loss": 0.161, "step": 43885 }, { "epoch": 0.79247286200247, "grad_norm": 0.3605361580848694, "learning_rate": 2.0509100831535433e-06, "loss": 0.1909, "step": 43890 }, { "epoch": 0.7925631414353708, "grad_norm": 0.8153850436210632, "learning_rate": 2.049189573066103e-06, "loss": 0.1772, "step": 43895 }, { "epoch": 0.7926534208682715, "grad_norm": 0.7240114212036133, "learning_rate": 2.0474697025635516e-06, "loss": 0.3147, "step": 43900 }, { "epoch": 0.7927437003011721, "grad_norm": 0.7593914270401001, "learning_rate": 2.0457504717842436e-06, "loss": 0.2647, "step": 43905 }, { "epoch": 0.7928339797340729, "grad_norm": 0.5896070599555969, "learning_rate": 2.044031880866475e-06, "loss": 0.2271, "step": 43910 }, { "epoch": 0.7929242591669736, "grad_norm": 0.7276526689529419, "learning_rate": 2.0423139299484994e-06, "loss": 0.1991, "step": 43915 }, { "epoch": 0.7930145385998744, "grad_norm": 0.7303504943847656, "learning_rate": 2.0405966191685067e-06, "loss": 0.1968, "step": 43920 }, { "epoch": 0.793104818032775, "grad_norm": 0.27771618962287903, "learning_rate": 2.038879948664646e-06, "loss": 0.2311, "step": 43925 }, { "epoch": 0.7931950974656757, "grad_norm": 0.6631217002868652, "learning_rate": 2.0371639185750138e-06, "loss": 0.1537, "step": 43930 }, { "epoch": 0.7932853768985765, "grad_norm": 0.2898246943950653, "learning_rate": 2.0354485290376457e-06, "loss": 0.2196, "step": 43935 }, { "epoch": 0.7933756563314772, "grad_norm": 0.22012820839881897, "learning_rate": 2.0337337801905378e-06, "loss": 0.1732, "step": 43940 }, { "epoch": 0.7934659357643778, "grad_norm": 0.4568951725959778, "learning_rate": 2.0320196721716245e-06, "loss": 0.2369, "step": 43945 }, { "epoch": 0.7935562151972786, "grad_norm": 0.9336597919464111, "learning_rate": 2.030306205118796e-06, "loss": 0.2489, "step": 43950 }, { "epoch": 0.7936464946301793, "grad_norm": 0.2505965828895569, "learning_rate": 2.0285933791698874e-06, "loss": 0.206, "step": 43955 }, { "epoch": 0.7937367740630801, "grad_norm": 0.4175946116447449, "learning_rate": 2.0268811944626877e-06, "loss": 0.17, "step": 43960 }, { "epoch": 0.7938270534959807, "grad_norm": 0.47757190465927124, "learning_rate": 2.0251696511349216e-06, "loss": 0.1638, "step": 43965 }, { "epoch": 0.7939173329288814, "grad_norm": 0.37691453099250793, "learning_rate": 2.0234587493242776e-06, "loss": 0.2345, "step": 43970 }, { "epoch": 0.7940076123617822, "grad_norm": 0.2829607129096985, "learning_rate": 2.0217484891683805e-06, "loss": 0.2027, "step": 43975 }, { "epoch": 0.7940978917946829, "grad_norm": 0.6008973121643066, "learning_rate": 2.0200388708048112e-06, "loss": 0.1772, "step": 43980 }, { "epoch": 0.7941881712275836, "grad_norm": 0.4292893707752228, "learning_rate": 2.018329894371093e-06, "loss": 0.2053, "step": 43985 }, { "epoch": 0.7942784506604843, "grad_norm": 0.47939789295196533, "learning_rate": 2.016621560004701e-06, "loss": 0.2613, "step": 43990 }, { "epoch": 0.794368730093385, "grad_norm": 0.4499230980873108, "learning_rate": 2.014913867843061e-06, "loss": 0.2316, "step": 43995 }, { "epoch": 0.7944590095262858, "grad_norm": 0.36144039034843445, "learning_rate": 2.0132068180235463e-06, "loss": 0.1648, "step": 44000 }, { "epoch": 0.7945492889591865, "grad_norm": 0.46915683150291443, "learning_rate": 2.01150041068347e-06, "loss": 0.2406, "step": 44005 }, { "epoch": 0.7946395683920872, "grad_norm": 0.3485051095485687, "learning_rate": 2.009794645960107e-06, "loss": 0.2579, "step": 44010 }, { "epoch": 0.7947298478249879, "grad_norm": 0.46306657791137695, "learning_rate": 2.0080895239906674e-06, "loss": 0.2315, "step": 44015 }, { "epoch": 0.7948201272578886, "grad_norm": 0.46812009811401367, "learning_rate": 2.006385044912318e-06, "loss": 0.198, "step": 44020 }, { "epoch": 0.7949104066907894, "grad_norm": 0.6879592537879944, "learning_rate": 2.004681208862176e-06, "loss": 0.3155, "step": 44025 }, { "epoch": 0.79500068612369, "grad_norm": 0.26458317041397095, "learning_rate": 2.002978015977296e-06, "loss": 0.1753, "step": 44030 }, { "epoch": 0.7950909655565908, "grad_norm": 0.5082259774208069, "learning_rate": 2.001275466394693e-06, "loss": 0.2284, "step": 44035 }, { "epoch": 0.7951812449894915, "grad_norm": 0.593024492263794, "learning_rate": 1.999573560251321e-06, "loss": 0.2442, "step": 44040 }, { "epoch": 0.7952715244223922, "grad_norm": 0.4718942940235138, "learning_rate": 1.9978722976840894e-06, "loss": 0.2392, "step": 44045 }, { "epoch": 0.7953618038552929, "grad_norm": 0.3856017291545868, "learning_rate": 1.9961716788298467e-06, "loss": 0.2105, "step": 44050 }, { "epoch": 0.7954520832881936, "grad_norm": 0.3091108500957489, "learning_rate": 1.9944717038254002e-06, "loss": 0.2276, "step": 44055 }, { "epoch": 0.7955423627210944, "grad_norm": 0.6325206160545349, "learning_rate": 1.9927723728074997e-06, "loss": 0.2424, "step": 44060 }, { "epoch": 0.7956326421539951, "grad_norm": 0.39874640107154846, "learning_rate": 1.9910736859128453e-06, "loss": 0.2064, "step": 44065 }, { "epoch": 0.7957229215868957, "grad_norm": 0.3896929919719696, "learning_rate": 1.98937564327808e-06, "loss": 0.2513, "step": 44070 }, { "epoch": 0.7958132010197965, "grad_norm": 0.5646119117736816, "learning_rate": 1.9876782450398036e-06, "loss": 0.2181, "step": 44075 }, { "epoch": 0.7959034804526972, "grad_norm": 0.3404809534549713, "learning_rate": 1.9859814913345556e-06, "loss": 0.1505, "step": 44080 }, { "epoch": 0.795993759885598, "grad_norm": 0.23280753195285797, "learning_rate": 1.9842853822988285e-06, "loss": 0.2858, "step": 44085 }, { "epoch": 0.7960840393184986, "grad_norm": 0.4387173354625702, "learning_rate": 1.9825899180690623e-06, "loss": 0.2395, "step": 44090 }, { "epoch": 0.7961743187513993, "grad_norm": 0.5827388167381287, "learning_rate": 1.9808950987816477e-06, "loss": 0.1909, "step": 44095 }, { "epoch": 0.7962645981843001, "grad_norm": 0.28081467747688293, "learning_rate": 1.979200924572915e-06, "loss": 0.2074, "step": 44100 }, { "epoch": 0.7963548776172008, "grad_norm": 0.36692309379577637, "learning_rate": 1.9775073955791536e-06, "loss": 0.2335, "step": 44105 }, { "epoch": 0.7964451570501014, "grad_norm": 0.5759391188621521, "learning_rate": 1.9758145119365914e-06, "loss": 0.175, "step": 44110 }, { "epoch": 0.7965354364830022, "grad_norm": 0.6587239503860474, "learning_rate": 1.9741222737814113e-06, "loss": 0.1967, "step": 44115 }, { "epoch": 0.7966257159159029, "grad_norm": 0.5736746788024902, "learning_rate": 1.9724306812497394e-06, "loss": 0.3098, "step": 44120 }, { "epoch": 0.7967159953488037, "grad_norm": 0.3512267768383026, "learning_rate": 1.9707397344776524e-06, "loss": 0.1847, "step": 44125 }, { "epoch": 0.7968062747817043, "grad_norm": 0.4105289876461029, "learning_rate": 1.969049433601178e-06, "loss": 0.1587, "step": 44130 }, { "epoch": 0.796896554214605, "grad_norm": 0.3709127604961395, "learning_rate": 1.9673597787562835e-06, "loss": 0.1709, "step": 44135 }, { "epoch": 0.7969868336475058, "grad_norm": 0.24260620772838593, "learning_rate": 1.9656707700788945e-06, "loss": 0.2637, "step": 44140 }, { "epoch": 0.7970771130804065, "grad_norm": 0.3361005485057831, "learning_rate": 1.9639824077048754e-06, "loss": 0.1844, "step": 44145 }, { "epoch": 0.7971673925133071, "grad_norm": 0.6023269295692444, "learning_rate": 1.9622946917700425e-06, "loss": 0.2392, "step": 44150 }, { "epoch": 0.7972576719462079, "grad_norm": 0.4582315981388092, "learning_rate": 1.9606076224101624e-06, "loss": 0.2976, "step": 44155 }, { "epoch": 0.7973479513791086, "grad_norm": 0.1931309700012207, "learning_rate": 1.9589211997609493e-06, "loss": 0.1326, "step": 44160 }, { "epoch": 0.7974382308120094, "grad_norm": 0.5437281131744385, "learning_rate": 1.9572354239580594e-06, "loss": 0.21, "step": 44165 }, { "epoch": 0.79752851024491, "grad_norm": 0.31165415048599243, "learning_rate": 1.955550295137105e-06, "loss": 0.2583, "step": 44170 }, { "epoch": 0.7976187896778107, "grad_norm": 0.3427805006504059, "learning_rate": 1.9538658134336385e-06, "loss": 0.2355, "step": 44175 }, { "epoch": 0.7977090691107115, "grad_norm": 0.30942103266716003, "learning_rate": 1.9521819789831687e-06, "loss": 0.1942, "step": 44180 }, { "epoch": 0.7977993485436122, "grad_norm": 0.4148818552494049, "learning_rate": 1.9504987919211426e-06, "loss": 0.299, "step": 44185 }, { "epoch": 0.7978896279765129, "grad_norm": 0.34355810284614563, "learning_rate": 1.9488162523829635e-06, "loss": 0.246, "step": 44190 }, { "epoch": 0.7979799074094136, "grad_norm": 0.5479265451431274, "learning_rate": 1.9471343605039796e-06, "loss": 0.1632, "step": 44195 }, { "epoch": 0.7980701868423143, "grad_norm": 0.43104642629623413, "learning_rate": 1.945453116419488e-06, "loss": 0.2268, "step": 44200 }, { "epoch": 0.7981604662752151, "grad_norm": 0.6481384038925171, "learning_rate": 1.943772520264732e-06, "loss": 0.231, "step": 44205 }, { "epoch": 0.7982507457081157, "grad_norm": 0.3647168278694153, "learning_rate": 1.9420925721748985e-06, "loss": 0.2154, "step": 44210 }, { "epoch": 0.7983410251410165, "grad_norm": 0.5041742920875549, "learning_rate": 1.940413272285131e-06, "loss": 0.1393, "step": 44215 }, { "epoch": 0.7984313045739172, "grad_norm": 0.16152489185333252, "learning_rate": 1.938734620730518e-06, "loss": 0.2195, "step": 44220 }, { "epoch": 0.7985215840068179, "grad_norm": 0.21917890012264252, "learning_rate": 1.9370566176460957e-06, "loss": 0.1921, "step": 44225 }, { "epoch": 0.7986118634397186, "grad_norm": 1.227859616279602, "learning_rate": 1.935379263166842e-06, "loss": 0.294, "step": 44230 }, { "epoch": 0.7987021428726193, "grad_norm": 0.507227897644043, "learning_rate": 1.9337025574276945e-06, "loss": 0.2109, "step": 44235 }, { "epoch": 0.7987924223055201, "grad_norm": 0.521886944770813, "learning_rate": 1.9320265005635262e-06, "loss": 0.2597, "step": 44240 }, { "epoch": 0.7988827017384208, "grad_norm": 0.1763850450515747, "learning_rate": 1.9303510927091684e-06, "loss": 0.2288, "step": 44245 }, { "epoch": 0.7989729811713214, "grad_norm": 0.7960894703865051, "learning_rate": 1.9286763339993907e-06, "loss": 0.3042, "step": 44250 }, { "epoch": 0.7990632606042222, "grad_norm": 0.39358705282211304, "learning_rate": 1.927002224568917e-06, "loss": 0.2801, "step": 44255 }, { "epoch": 0.7991535400371229, "grad_norm": 0.8040003776550293, "learning_rate": 1.925328764552419e-06, "loss": 0.2096, "step": 44260 }, { "epoch": 0.7992438194700237, "grad_norm": 0.48055535554885864, "learning_rate": 1.9236559540845157e-06, "loss": 0.3142, "step": 44265 }, { "epoch": 0.7993340989029243, "grad_norm": 0.38093963265419006, "learning_rate": 1.9219837932997666e-06, "loss": 0.2982, "step": 44270 }, { "epoch": 0.799424378335825, "grad_norm": 0.5091568827629089, "learning_rate": 1.9203122823326925e-06, "loss": 0.1926, "step": 44275 }, { "epoch": 0.7995146577687258, "grad_norm": 0.4233086109161377, "learning_rate": 1.918641421317746e-06, "loss": 0.2658, "step": 44280 }, { "epoch": 0.7996049372016265, "grad_norm": 0.3393450379371643, "learning_rate": 1.9169712103893412e-06, "loss": 0.2316, "step": 44285 }, { "epoch": 0.7996952166345271, "grad_norm": 0.47289741039276123, "learning_rate": 1.915301649681833e-06, "loss": 0.3163, "step": 44290 }, { "epoch": 0.7997854960674279, "grad_norm": 0.8122720122337341, "learning_rate": 1.9136327393295285e-06, "loss": 0.2344, "step": 44295 }, { "epoch": 0.7998757755003286, "grad_norm": 0.40111979842185974, "learning_rate": 1.911964479466677e-06, "loss": 0.2124, "step": 44300 }, { "epoch": 0.7999660549332294, "grad_norm": 0.3363553583621979, "learning_rate": 1.9102968702274737e-06, "loss": 0.1898, "step": 44305 }, { "epoch": 0.80005633436613, "grad_norm": 0.5055487751960754, "learning_rate": 1.9086299117460726e-06, "loss": 0.2962, "step": 44310 }, { "epoch": 0.8001466137990307, "grad_norm": 0.22720535099506378, "learning_rate": 1.906963604156562e-06, "loss": 0.1968, "step": 44315 }, { "epoch": 0.8002368932319315, "grad_norm": 0.4407348930835724, "learning_rate": 1.9052979475929877e-06, "loss": 0.2451, "step": 44320 }, { "epoch": 0.8003271726648322, "grad_norm": 0.40608954429626465, "learning_rate": 1.9036329421893395e-06, "loss": 0.1808, "step": 44325 }, { "epoch": 0.8004174520977329, "grad_norm": 0.34735071659088135, "learning_rate": 1.901968588079558e-06, "loss": 0.1944, "step": 44330 }, { "epoch": 0.8005077315306336, "grad_norm": 0.7078969478607178, "learning_rate": 1.9003048853975215e-06, "loss": 0.2149, "step": 44335 }, { "epoch": 0.8005980109635343, "grad_norm": 0.5943984389305115, "learning_rate": 1.8986418342770696e-06, "loss": 0.226, "step": 44340 }, { "epoch": 0.8006882903964351, "grad_norm": 0.5430403351783752, "learning_rate": 1.8969794348519777e-06, "loss": 0.2443, "step": 44345 }, { "epoch": 0.8007785698293357, "grad_norm": 0.38101640343666077, "learning_rate": 1.8953176872559742e-06, "loss": 0.2337, "step": 44350 }, { "epoch": 0.8008688492622364, "grad_norm": 0.30152231454849243, "learning_rate": 1.8936565916227367e-06, "loss": 0.2549, "step": 44355 }, { "epoch": 0.8009591286951372, "grad_norm": 0.6075379252433777, "learning_rate": 1.8919961480858907e-06, "loss": 0.1433, "step": 44360 }, { "epoch": 0.8010494081280379, "grad_norm": 0.4083125591278076, "learning_rate": 1.890336356779e-06, "loss": 0.2292, "step": 44365 }, { "epoch": 0.8011396875609386, "grad_norm": 0.44115856289863586, "learning_rate": 1.8886772178355894e-06, "loss": 0.1646, "step": 44370 }, { "epoch": 0.8012299669938393, "grad_norm": 0.4118017554283142, "learning_rate": 1.8870187313891197e-06, "loss": 0.1987, "step": 44375 }, { "epoch": 0.80132024642674, "grad_norm": 0.3881694972515106, "learning_rate": 1.8853608975730075e-06, "loss": 0.2035, "step": 44380 }, { "epoch": 0.8014105258596408, "grad_norm": 0.5062962770462036, "learning_rate": 1.8837037165206074e-06, "loss": 0.3047, "step": 44385 }, { "epoch": 0.8015008052925415, "grad_norm": 0.37755340337753296, "learning_rate": 1.882047188365238e-06, "loss": 0.1709, "step": 44390 }, { "epoch": 0.8015910847254422, "grad_norm": 0.19946175813674927, "learning_rate": 1.8803913132401474e-06, "loss": 0.2803, "step": 44395 }, { "epoch": 0.8016813641583429, "grad_norm": 0.5771229267120361, "learning_rate": 1.878736091278539e-06, "loss": 0.2037, "step": 44400 }, { "epoch": 0.8017716435912436, "grad_norm": 0.33102309703826904, "learning_rate": 1.8770815226135675e-06, "loss": 0.2233, "step": 44405 }, { "epoch": 0.8018619230241444, "grad_norm": 0.5204035043716431, "learning_rate": 1.875427607378325e-06, "loss": 0.3369, "step": 44410 }, { "epoch": 0.801952202457045, "grad_norm": 0.5146638751029968, "learning_rate": 1.8737743457058598e-06, "loss": 0.1338, "step": 44415 }, { "epoch": 0.8020424818899458, "grad_norm": 0.393114298582077, "learning_rate": 1.8721217377291656e-06, "loss": 0.1774, "step": 44420 }, { "epoch": 0.8021327613228465, "grad_norm": 0.5393953919410706, "learning_rate": 1.8704697835811858e-06, "loss": 0.2302, "step": 44425 }, { "epoch": 0.8022230407557472, "grad_norm": 0.33049318194389343, "learning_rate": 1.8688184833948008e-06, "loss": 0.2776, "step": 44430 }, { "epoch": 0.8023133201886479, "grad_norm": 0.4153202772140503, "learning_rate": 1.8671678373028524e-06, "loss": 0.1873, "step": 44435 }, { "epoch": 0.8024035996215486, "grad_norm": 0.5525389313697815, "learning_rate": 1.8655178454381162e-06, "loss": 0.1835, "step": 44440 }, { "epoch": 0.8024938790544494, "grad_norm": 0.4064835011959076, "learning_rate": 1.8638685079333296e-06, "loss": 0.1795, "step": 44445 }, { "epoch": 0.8025841584873501, "grad_norm": 0.39771416783332825, "learning_rate": 1.8622198249211609e-06, "loss": 0.1714, "step": 44450 }, { "epoch": 0.8026744379202507, "grad_norm": 0.9161157608032227, "learning_rate": 1.860571796534245e-06, "loss": 0.2378, "step": 44455 }, { "epoch": 0.8027647173531515, "grad_norm": 0.472883015871048, "learning_rate": 1.8589244229051472e-06, "loss": 0.2975, "step": 44460 }, { "epoch": 0.8028549967860522, "grad_norm": 0.3426239490509033, "learning_rate": 1.8572777041663892e-06, "loss": 0.1242, "step": 44465 }, { "epoch": 0.802945276218953, "grad_norm": 0.29185158014297485, "learning_rate": 1.8556316404504348e-06, "loss": 0.193, "step": 44470 }, { "epoch": 0.8030355556518536, "grad_norm": 0.40711694955825806, "learning_rate": 1.8539862318897027e-06, "loss": 0.2212, "step": 44475 }, { "epoch": 0.8031258350847543, "grad_norm": 0.8984672427177429, "learning_rate": 1.8523414786165472e-06, "loss": 0.2174, "step": 44480 }, { "epoch": 0.8032161145176551, "grad_norm": 0.47199028730392456, "learning_rate": 1.850697380763281e-06, "loss": 0.2688, "step": 44485 }, { "epoch": 0.8033063939505558, "grad_norm": 0.33139854669570923, "learning_rate": 1.849053938462163e-06, "loss": 0.2384, "step": 44490 }, { "epoch": 0.8033966733834564, "grad_norm": 0.39386871457099915, "learning_rate": 1.8474111518453886e-06, "loss": 0.2262, "step": 44495 }, { "epoch": 0.8034869528163572, "grad_norm": 0.5428234934806824, "learning_rate": 1.845769021045115e-06, "loss": 0.1837, "step": 44500 }, { "epoch": 0.8035772322492579, "grad_norm": 0.4081260561943054, "learning_rate": 1.8441275461934338e-06, "loss": 0.1902, "step": 44505 }, { "epoch": 0.8036675116821587, "grad_norm": 0.6265204548835754, "learning_rate": 1.8424867274223956e-06, "loss": 0.2034, "step": 44510 }, { "epoch": 0.8037577911150593, "grad_norm": 0.31426239013671875, "learning_rate": 1.8408465648639873e-06, "loss": 0.2762, "step": 44515 }, { "epoch": 0.80384807054796, "grad_norm": 0.5084913969039917, "learning_rate": 1.8392070586501487e-06, "loss": 0.2807, "step": 44520 }, { "epoch": 0.8039383499808608, "grad_norm": 0.5164384841918945, "learning_rate": 1.837568208912769e-06, "loss": 0.1994, "step": 44525 }, { "epoch": 0.8040286294137615, "grad_norm": 0.5218554139137268, "learning_rate": 1.8359300157836812e-06, "loss": 0.3358, "step": 44530 }, { "epoch": 0.8041189088466622, "grad_norm": 0.4245232343673706, "learning_rate": 1.8342924793946637e-06, "loss": 0.1924, "step": 44535 }, { "epoch": 0.8042091882795629, "grad_norm": 0.7326409220695496, "learning_rate": 1.8326555998774487e-06, "loss": 0.2482, "step": 44540 }, { "epoch": 0.8042994677124636, "grad_norm": 0.5915966033935547, "learning_rate": 1.831019377363704e-06, "loss": 0.2157, "step": 44545 }, { "epoch": 0.8043897471453644, "grad_norm": 0.4375337064266205, "learning_rate": 1.8293838119850583e-06, "loss": 0.192, "step": 44550 }, { "epoch": 0.804480026578265, "grad_norm": 1.0574471950531006, "learning_rate": 1.8277489038730779e-06, "loss": 0.3344, "step": 44555 }, { "epoch": 0.8045703060111657, "grad_norm": 0.3823387026786804, "learning_rate": 1.826114653159282e-06, "loss": 0.2604, "step": 44560 }, { "epoch": 0.8046605854440665, "grad_norm": 0.5871850252151489, "learning_rate": 1.8244810599751307e-06, "loss": 0.259, "step": 44565 }, { "epoch": 0.8047508648769672, "grad_norm": 0.3595975339412689, "learning_rate": 1.822848124452039e-06, "loss": 0.1577, "step": 44570 }, { "epoch": 0.8048411443098679, "grad_norm": 0.3500572144985199, "learning_rate": 1.8212158467213613e-06, "loss": 0.278, "step": 44575 }, { "epoch": 0.8049314237427686, "grad_norm": 2.607898235321045, "learning_rate": 1.8195842269144016e-06, "loss": 0.21, "step": 44580 }, { "epoch": 0.8050217031756693, "grad_norm": 0.2642761170864105, "learning_rate": 1.8179532651624133e-06, "loss": 0.1544, "step": 44585 }, { "epoch": 0.8051119826085701, "grad_norm": 0.7221905589103699, "learning_rate": 1.816322961596595e-06, "loss": 0.3092, "step": 44590 }, { "epoch": 0.8052022620414707, "grad_norm": 0.4336141049861908, "learning_rate": 1.8146933163480963e-06, "loss": 0.2354, "step": 44595 }, { "epoch": 0.8052925414743715, "grad_norm": 0.3331955075263977, "learning_rate": 1.8130643295480044e-06, "loss": 0.2931, "step": 44600 }, { "epoch": 0.8053828209072722, "grad_norm": 0.4939133822917938, "learning_rate": 1.8114360013273657e-06, "loss": 0.4071, "step": 44605 }, { "epoch": 0.8054731003401729, "grad_norm": 1.1969037055969238, "learning_rate": 1.8098083318171611e-06, "loss": 0.2334, "step": 44610 }, { "epoch": 0.8055633797730736, "grad_norm": 0.6354053020477295, "learning_rate": 1.808181321148328e-06, "loss": 0.2467, "step": 44615 }, { "epoch": 0.8056536592059743, "grad_norm": 0.8038125038146973, "learning_rate": 1.806554969451747e-06, "loss": 0.2662, "step": 44620 }, { "epoch": 0.8057439386388751, "grad_norm": 0.4923882484436035, "learning_rate": 1.8049292768582483e-06, "loss": 0.2144, "step": 44625 }, { "epoch": 0.8058342180717758, "grad_norm": 0.4922740161418915, "learning_rate": 1.8033042434986049e-06, "loss": 0.1656, "step": 44630 }, { "epoch": 0.8059244975046764, "grad_norm": 0.5181177854537964, "learning_rate": 1.8016798695035399e-06, "loss": 0.1638, "step": 44635 }, { "epoch": 0.8060147769375772, "grad_norm": 0.428590327501297, "learning_rate": 1.80005615500372e-06, "loss": 0.1832, "step": 44640 }, { "epoch": 0.8061050563704779, "grad_norm": 0.47034353017807007, "learning_rate": 1.798433100129766e-06, "loss": 0.2203, "step": 44645 }, { "epoch": 0.8061953358033787, "grad_norm": 0.33631256222724915, "learning_rate": 1.796810705012233e-06, "loss": 0.1783, "step": 44650 }, { "epoch": 0.8062856152362793, "grad_norm": 0.5317744612693787, "learning_rate": 1.7951889697816404e-06, "loss": 0.2507, "step": 44655 }, { "epoch": 0.80637589466918, "grad_norm": 0.3411064147949219, "learning_rate": 1.7935678945684375e-06, "loss": 0.1693, "step": 44660 }, { "epoch": 0.8064661741020808, "grad_norm": 0.42006516456604004, "learning_rate": 1.7919474795030346e-06, "loss": 0.283, "step": 44665 }, { "epoch": 0.8065564535349815, "grad_norm": 0.49459028244018555, "learning_rate": 1.790327724715778e-06, "loss": 0.1784, "step": 44670 }, { "epoch": 0.8066467329678821, "grad_norm": 0.3617062568664551, "learning_rate": 1.7887086303369638e-06, "loss": 0.1276, "step": 44675 }, { "epoch": 0.8067370124007829, "grad_norm": 0.4747965633869171, "learning_rate": 1.787090196496838e-06, "loss": 0.205, "step": 44680 }, { "epoch": 0.8068272918336836, "grad_norm": 0.25721991062164307, "learning_rate": 1.7854724233255926e-06, "loss": 0.1509, "step": 44685 }, { "epoch": 0.8069175712665844, "grad_norm": 0.997877299785614, "learning_rate": 1.7838553109533674e-06, "loss": 0.1909, "step": 44690 }, { "epoch": 0.807007850699485, "grad_norm": 0.38840773701667786, "learning_rate": 1.7822388595102424e-06, "loss": 0.2013, "step": 44695 }, { "epoch": 0.8070981301323857, "grad_norm": 0.5792832374572754, "learning_rate": 1.780623069126256e-06, "loss": 0.098, "step": 44700 }, { "epoch": 0.8071884095652865, "grad_norm": 0.22275789082050323, "learning_rate": 1.7790079399313798e-06, "loss": 0.195, "step": 44705 }, { "epoch": 0.8072786889981872, "grad_norm": 0.28965526819229126, "learning_rate": 1.7773934720555452e-06, "loss": 0.1749, "step": 44710 }, { "epoch": 0.8073689684310879, "grad_norm": 0.4989018440246582, "learning_rate": 1.7757796656286164e-06, "loss": 0.2092, "step": 44715 }, { "epoch": 0.8074592478639886, "grad_norm": 0.3674529194831848, "learning_rate": 1.7741665207804227e-06, "loss": 0.2806, "step": 44720 }, { "epoch": 0.8075495272968893, "grad_norm": 0.7431139945983887, "learning_rate": 1.7725540376407224e-06, "loss": 0.1866, "step": 44725 }, { "epoch": 0.8076398067297901, "grad_norm": 0.5370224714279175, "learning_rate": 1.7709422163392332e-06, "loss": 0.1992, "step": 44730 }, { "epoch": 0.8077300861626907, "grad_norm": 0.5872454047203064, "learning_rate": 1.7693310570056089e-06, "loss": 0.259, "step": 44735 }, { "epoch": 0.8078203655955914, "grad_norm": 0.42964139580726624, "learning_rate": 1.7677205597694602e-06, "loss": 0.1762, "step": 44740 }, { "epoch": 0.8079106450284922, "grad_norm": 0.3020368218421936, "learning_rate": 1.766110724760336e-06, "loss": 0.2001, "step": 44745 }, { "epoch": 0.8080009244613929, "grad_norm": 0.5135726928710938, "learning_rate": 1.764501552107738e-06, "loss": 0.2018, "step": 44750 }, { "epoch": 0.8080912038942936, "grad_norm": 0.4926067888736725, "learning_rate": 1.7628930419411128e-06, "loss": 0.2562, "step": 44755 }, { "epoch": 0.8081814833271943, "grad_norm": 0.37829169631004333, "learning_rate": 1.7612851943898546e-06, "loss": 0.1305, "step": 44760 }, { "epoch": 0.808271762760095, "grad_norm": 0.5007221698760986, "learning_rate": 1.759678009583301e-06, "loss": 0.2188, "step": 44765 }, { "epoch": 0.8083620421929958, "grad_norm": 0.34714531898498535, "learning_rate": 1.7580714876507366e-06, "loss": 0.2671, "step": 44770 }, { "epoch": 0.8084523216258964, "grad_norm": 0.3378044664859772, "learning_rate": 1.7564656287213988e-06, "loss": 0.2097, "step": 44775 }, { "epoch": 0.8085426010587972, "grad_norm": 0.5682040452957153, "learning_rate": 1.75486043292446e-06, "loss": 0.1759, "step": 44780 }, { "epoch": 0.8086328804916979, "grad_norm": 0.5743517279624939, "learning_rate": 1.753255900389056e-06, "loss": 0.237, "step": 44785 }, { "epoch": 0.8087231599245986, "grad_norm": 0.47140514850616455, "learning_rate": 1.751652031244253e-06, "loss": 0.2202, "step": 44790 }, { "epoch": 0.8088134393574994, "grad_norm": 0.628346860408783, "learning_rate": 1.7500488256190752e-06, "loss": 0.2611, "step": 44795 }, { "epoch": 0.8089037187904, "grad_norm": 0.2804016172885895, "learning_rate": 1.7484462836424842e-06, "loss": 0.2269, "step": 44800 }, { "epoch": 0.8089939982233008, "grad_norm": 0.4037613868713379, "learning_rate": 1.7468444054433976e-06, "loss": 0.1999, "step": 44805 }, { "epoch": 0.8090842776562015, "grad_norm": 0.7222532033920288, "learning_rate": 1.7452431911506695e-06, "loss": 0.3218, "step": 44810 }, { "epoch": 0.8091745570891022, "grad_norm": 0.23788173496723175, "learning_rate": 1.7436426408931084e-06, "loss": 0.1793, "step": 44815 }, { "epoch": 0.8092648365220029, "grad_norm": 0.47189345955848694, "learning_rate": 1.742042754799468e-06, "loss": 0.2618, "step": 44820 }, { "epoch": 0.8093551159549036, "grad_norm": 0.3974069654941559, "learning_rate": 1.7404435329984492e-06, "loss": 0.2938, "step": 44825 }, { "epoch": 0.8094453953878044, "grad_norm": 0.3743002116680145, "learning_rate": 1.7388449756186932e-06, "loss": 0.2527, "step": 44830 }, { "epoch": 0.8095356748207051, "grad_norm": 0.3871017396450043, "learning_rate": 1.7372470827887967e-06, "loss": 0.2407, "step": 44835 }, { "epoch": 0.8096259542536057, "grad_norm": 0.7488775849342346, "learning_rate": 1.7356498546372936e-06, "loss": 0.3174, "step": 44840 }, { "epoch": 0.8097162336865065, "grad_norm": 0.20142963528633118, "learning_rate": 1.7340532912926722e-06, "loss": 0.1753, "step": 44845 }, { "epoch": 0.8098065131194072, "grad_norm": 0.9276465177536011, "learning_rate": 1.7324573928833666e-06, "loss": 0.2644, "step": 44850 }, { "epoch": 0.809896792552308, "grad_norm": 0.43048661947250366, "learning_rate": 1.7308621595377517e-06, "loss": 0.2023, "step": 44855 }, { "epoch": 0.8099870719852086, "grad_norm": 0.4367915987968445, "learning_rate": 1.729267591384155e-06, "loss": 0.1995, "step": 44860 }, { "epoch": 0.8100773514181093, "grad_norm": 0.45970258116722107, "learning_rate": 1.7276736885508438e-06, "loss": 0.1826, "step": 44865 }, { "epoch": 0.8101676308510101, "grad_norm": 0.4545137286186218, "learning_rate": 1.7260804511660411e-06, "loss": 0.275, "step": 44870 }, { "epoch": 0.8102579102839108, "grad_norm": 0.6485731601715088, "learning_rate": 1.7244878793579067e-06, "loss": 0.1917, "step": 44875 }, { "epoch": 0.8103481897168114, "grad_norm": 0.3482241630554199, "learning_rate": 1.7228959732545547e-06, "loss": 0.1651, "step": 44880 }, { "epoch": 0.8104384691497122, "grad_norm": 0.1493377387523651, "learning_rate": 1.7213047329840405e-06, "loss": 0.191, "step": 44885 }, { "epoch": 0.8105287485826129, "grad_norm": 0.5001938939094543, "learning_rate": 1.7197141586743705e-06, "loss": 0.1329, "step": 44890 }, { "epoch": 0.8106190280155137, "grad_norm": 0.6227378249168396, "learning_rate": 1.7181242504534913e-06, "loss": 0.2342, "step": 44895 }, { "epoch": 0.8107093074484143, "grad_norm": 0.5441110730171204, "learning_rate": 1.7165350084493026e-06, "loss": 0.278, "step": 44900 }, { "epoch": 0.810799586881315, "grad_norm": 0.4558870196342468, "learning_rate": 1.7149464327896437e-06, "loss": 0.1451, "step": 44905 }, { "epoch": 0.8108898663142158, "grad_norm": 0.37264585494995117, "learning_rate": 1.7133585236023065e-06, "loss": 0.3025, "step": 44910 }, { "epoch": 0.8109801457471165, "grad_norm": 0.41475045680999756, "learning_rate": 1.7117712810150268e-06, "loss": 0.1962, "step": 44915 }, { "epoch": 0.8110704251800172, "grad_norm": 0.7094918489456177, "learning_rate": 1.7101847051554876e-06, "loss": 0.2787, "step": 44920 }, { "epoch": 0.8111607046129179, "grad_norm": 0.6133314967155457, "learning_rate": 1.7085987961513151e-06, "loss": 0.2263, "step": 44925 }, { "epoch": 0.8112509840458186, "grad_norm": 0.5395045280456543, "learning_rate": 1.7070135541300858e-06, "loss": 0.2635, "step": 44930 }, { "epoch": 0.8113412634787194, "grad_norm": 0.7307504415512085, "learning_rate": 1.705428979219319e-06, "loss": 0.2148, "step": 44935 }, { "epoch": 0.81143154291162, "grad_norm": 0.448566198348999, "learning_rate": 1.703845071546486e-06, "loss": 0.2212, "step": 44940 }, { "epoch": 0.8115218223445207, "grad_norm": 0.400714635848999, "learning_rate": 1.702261831238995e-06, "loss": 0.3076, "step": 44945 }, { "epoch": 0.8116121017774215, "grad_norm": 0.368564248085022, "learning_rate": 1.7006792584242094e-06, "loss": 0.1411, "step": 44950 }, { "epoch": 0.8117023812103222, "grad_norm": 0.3286515772342682, "learning_rate": 1.6990973532294386e-06, "loss": 0.2919, "step": 44955 }, { "epoch": 0.8117926606432229, "grad_norm": 0.6517858505249023, "learning_rate": 1.6975161157819297e-06, "loss": 0.2365, "step": 44960 }, { "epoch": 0.8118829400761236, "grad_norm": 0.4448356330394745, "learning_rate": 1.6959355462088867e-06, "loss": 0.2446, "step": 44965 }, { "epoch": 0.8119732195090243, "grad_norm": 0.35378003120422363, "learning_rate": 1.6943556446374497e-06, "loss": 0.2291, "step": 44970 }, { "epoch": 0.8120634989419251, "grad_norm": 0.8098388910293579, "learning_rate": 1.6927764111947142e-06, "loss": 0.2064, "step": 44975 }, { "epoch": 0.8121537783748257, "grad_norm": 0.6267945766448975, "learning_rate": 1.691197846007716e-06, "loss": 0.1499, "step": 44980 }, { "epoch": 0.8122440578077265, "grad_norm": 0.6006688475608826, "learning_rate": 1.6896199492034437e-06, "loss": 0.2227, "step": 44985 }, { "epoch": 0.8123343372406272, "grad_norm": 0.34231728315353394, "learning_rate": 1.6880427209088212e-06, "loss": 0.2521, "step": 44990 }, { "epoch": 0.8124246166735279, "grad_norm": 0.6057373285293579, "learning_rate": 1.6864661612507305e-06, "loss": 0.1788, "step": 44995 }, { "epoch": 0.8125148961064286, "grad_norm": 0.7820007801055908, "learning_rate": 1.684890270355989e-06, "loss": 0.1882, "step": 45000 }, { "epoch": 0.8126051755393293, "grad_norm": 0.6362524628639221, "learning_rate": 1.6833150483513717e-06, "loss": 0.2114, "step": 45005 }, { "epoch": 0.8126954549722301, "grad_norm": 0.4829091429710388, "learning_rate": 1.6817404953635874e-06, "loss": 0.2289, "step": 45010 }, { "epoch": 0.8127857344051308, "grad_norm": 0.486634224653244, "learning_rate": 1.6801666115193016e-06, "loss": 0.2444, "step": 45015 }, { "epoch": 0.8128760138380314, "grad_norm": 0.4345434308052063, "learning_rate": 1.6785933969451207e-06, "loss": 0.1903, "step": 45020 }, { "epoch": 0.8129662932709322, "grad_norm": 0.3699716031551361, "learning_rate": 1.6770208517676e-06, "loss": 0.2499, "step": 45025 }, { "epoch": 0.8130565727038329, "grad_norm": 0.15359029173851013, "learning_rate": 1.6754489761132365e-06, "loss": 0.1945, "step": 45030 }, { "epoch": 0.8131468521367337, "grad_norm": 0.3875373601913452, "learning_rate": 1.6738777701084795e-06, "loss": 0.2114, "step": 45035 }, { "epoch": 0.8132371315696343, "grad_norm": 0.42671942710876465, "learning_rate": 1.6723072338797197e-06, "loss": 0.2397, "step": 45040 }, { "epoch": 0.813327411002535, "grad_norm": 0.8030031323432922, "learning_rate": 1.670737367553289e-06, "loss": 0.2518, "step": 45045 }, { "epoch": 0.8134176904354358, "grad_norm": 0.8285560011863708, "learning_rate": 1.669168171255483e-06, "loss": 0.2263, "step": 45050 }, { "epoch": 0.8135079698683365, "grad_norm": 0.880510151386261, "learning_rate": 1.6675996451125243e-06, "loss": 0.2712, "step": 45055 }, { "epoch": 0.8135982493012371, "grad_norm": 0.4474562108516693, "learning_rate": 1.6660317892505939e-06, "loss": 0.2186, "step": 45060 }, { "epoch": 0.8136885287341379, "grad_norm": 0.5012331008911133, "learning_rate": 1.66446460379581e-06, "loss": 0.211, "step": 45065 }, { "epoch": 0.8137788081670386, "grad_norm": 0.5450423359870911, "learning_rate": 1.6628980888742452e-06, "loss": 0.2358, "step": 45070 }, { "epoch": 0.8138690875999394, "grad_norm": 0.704733669757843, "learning_rate": 1.6613322446119107e-06, "loss": 0.1618, "step": 45075 }, { "epoch": 0.81395936703284, "grad_norm": 0.22256499528884888, "learning_rate": 1.6597670711347691e-06, "loss": 0.2362, "step": 45080 }, { "epoch": 0.8140496464657407, "grad_norm": 0.3464551270008087, "learning_rate": 1.6582025685687274e-06, "loss": 0.2208, "step": 45085 }, { "epoch": 0.8141399258986415, "grad_norm": 0.35018548369407654, "learning_rate": 1.6566387370396409e-06, "loss": 0.1883, "step": 45090 }, { "epoch": 0.8142302053315422, "grad_norm": 0.4291853904724121, "learning_rate": 1.655075576673304e-06, "loss": 0.2775, "step": 45095 }, { "epoch": 0.8143204847644429, "grad_norm": 0.30712375044822693, "learning_rate": 1.6535130875954642e-06, "loss": 0.1741, "step": 45100 }, { "epoch": 0.8144107641973436, "grad_norm": 0.559040904045105, "learning_rate": 1.6519512699318107e-06, "loss": 0.2643, "step": 45105 }, { "epoch": 0.8145010436302443, "grad_norm": 0.48954933881759644, "learning_rate": 1.650390123807981e-06, "loss": 0.2021, "step": 45110 }, { "epoch": 0.8145913230631451, "grad_norm": 0.8701210618019104, "learning_rate": 1.648829649349558e-06, "loss": 0.131, "step": 45115 }, { "epoch": 0.8146816024960457, "grad_norm": 0.3505600094795227, "learning_rate": 1.6472698466820735e-06, "loss": 0.201, "step": 45120 }, { "epoch": 0.8147718819289465, "grad_norm": 0.2789531350135803, "learning_rate": 1.6457107159309993e-06, "loss": 0.2203, "step": 45125 }, { "epoch": 0.8148621613618472, "grad_norm": 1.8841131925582886, "learning_rate": 1.644152257221755e-06, "loss": 0.1857, "step": 45130 }, { "epoch": 0.8149524407947479, "grad_norm": 0.5127928256988525, "learning_rate": 1.6425944706797103e-06, "loss": 0.1711, "step": 45135 }, { "epoch": 0.8150427202276486, "grad_norm": 0.6695572733879089, "learning_rate": 1.641037356430174e-06, "loss": 0.1877, "step": 45140 }, { "epoch": 0.8151329996605493, "grad_norm": 0.6078423857688904, "learning_rate": 1.639480914598407e-06, "loss": 0.2369, "step": 45145 }, { "epoch": 0.81522327909345, "grad_norm": 0.4141404330730438, "learning_rate": 1.6379251453096124e-06, "loss": 0.3065, "step": 45150 }, { "epoch": 0.8153135585263508, "grad_norm": 0.4831113815307617, "learning_rate": 1.6363700486889456e-06, "loss": 0.1821, "step": 45155 }, { "epoch": 0.8154038379592514, "grad_norm": 0.5786094069480896, "learning_rate": 1.6348156248614955e-06, "loss": 0.23, "step": 45160 }, { "epoch": 0.8154941173921522, "grad_norm": 0.45996254682540894, "learning_rate": 1.6332618739523109e-06, "loss": 0.1924, "step": 45165 }, { "epoch": 0.8155843968250529, "grad_norm": 0.2889226973056793, "learning_rate": 1.6317087960863731e-06, "loss": 0.1009, "step": 45170 }, { "epoch": 0.8156746762579536, "grad_norm": 0.211906298995018, "learning_rate": 1.630156391388621e-06, "loss": 0.2564, "step": 45175 }, { "epoch": 0.8157649556908544, "grad_norm": 0.38543757796287537, "learning_rate": 1.628604659983931e-06, "loss": 0.2133, "step": 45180 }, { "epoch": 0.815855235123755, "grad_norm": 0.36902737617492676, "learning_rate": 1.6270536019971329e-06, "loss": 0.1993, "step": 45185 }, { "epoch": 0.8159455145566558, "grad_norm": 0.6300240755081177, "learning_rate": 1.6255032175529927e-06, "loss": 0.1647, "step": 45190 }, { "epoch": 0.8160357939895565, "grad_norm": 0.6084157228469849, "learning_rate": 1.6239535067762336e-06, "loss": 0.1406, "step": 45195 }, { "epoch": 0.8161260734224572, "grad_norm": 0.7221276760101318, "learning_rate": 1.622404469791512e-06, "loss": 0.1899, "step": 45200 }, { "epoch": 0.8162163528553579, "grad_norm": 0.44410908222198486, "learning_rate": 1.6208561067234418e-06, "loss": 0.2237, "step": 45205 }, { "epoch": 0.8163066322882586, "grad_norm": 0.6417168974876404, "learning_rate": 1.6193084176965735e-06, "loss": 0.2775, "step": 45210 }, { "epoch": 0.8163969117211594, "grad_norm": 0.5598950386047363, "learning_rate": 1.6177614028354095e-06, "loss": 0.2089, "step": 45215 }, { "epoch": 0.8164871911540601, "grad_norm": 0.5878446102142334, "learning_rate": 1.616215062264399e-06, "loss": 0.1731, "step": 45220 }, { "epoch": 0.8165774705869607, "grad_norm": 0.490138441324234, "learning_rate": 1.6146693961079274e-06, "loss": 0.2354, "step": 45225 }, { "epoch": 0.8166677500198615, "grad_norm": 0.36699575185775757, "learning_rate": 1.6131244044903383e-06, "loss": 0.2205, "step": 45230 }, { "epoch": 0.8167580294527622, "grad_norm": 0.4489833116531372, "learning_rate": 1.61158008753591e-06, "loss": 0.1959, "step": 45235 }, { "epoch": 0.816848308885663, "grad_norm": 0.47021374106407166, "learning_rate": 1.6100364453688733e-06, "loss": 0.2349, "step": 45240 }, { "epoch": 0.8169385883185636, "grad_norm": 0.3278280794620514, "learning_rate": 1.6084934781134044e-06, "loss": 0.2608, "step": 45245 }, { "epoch": 0.8170288677514643, "grad_norm": 0.3986305892467499, "learning_rate": 1.606951185893626e-06, "loss": 0.234, "step": 45250 }, { "epoch": 0.8171191471843651, "grad_norm": 0.364401251077652, "learning_rate": 1.605409568833598e-06, "loss": 0.2502, "step": 45255 }, { "epoch": 0.8172094266172658, "grad_norm": 0.5744989514350891, "learning_rate": 1.6038686270573378e-06, "loss": 0.1782, "step": 45260 }, { "epoch": 0.8172997060501664, "grad_norm": 0.25071555376052856, "learning_rate": 1.6023283606887996e-06, "loss": 0.2248, "step": 45265 }, { "epoch": 0.8173899854830672, "grad_norm": 0.21699373424053192, "learning_rate": 1.6007887698518888e-06, "loss": 0.1588, "step": 45270 }, { "epoch": 0.8174802649159679, "grad_norm": 0.3030523955821991, "learning_rate": 1.5992498546704516e-06, "loss": 0.2139, "step": 45275 }, { "epoch": 0.8175705443488687, "grad_norm": 0.3498544692993164, "learning_rate": 1.5977116152682848e-06, "loss": 0.2147, "step": 45280 }, { "epoch": 0.8176608237817693, "grad_norm": 0.4606175422668457, "learning_rate": 1.5961740517691272e-06, "loss": 0.1715, "step": 45285 }, { "epoch": 0.81775110321467, "grad_norm": 0.3159838914871216, "learning_rate": 1.5946371642966674e-06, "loss": 0.2202, "step": 45290 }, { "epoch": 0.8178413826475708, "grad_norm": 0.3965045213699341, "learning_rate": 1.5931009529745333e-06, "loss": 0.1971, "step": 45295 }, { "epoch": 0.8179316620804715, "grad_norm": 0.6597896218299866, "learning_rate": 1.5915654179263063e-06, "loss": 0.1405, "step": 45300 }, { "epoch": 0.8180219415133722, "grad_norm": 0.904430627822876, "learning_rate": 1.5900305592755028e-06, "loss": 0.2466, "step": 45305 }, { "epoch": 0.8181122209462729, "grad_norm": 0.4741705358028412, "learning_rate": 1.5884963771455942e-06, "loss": 0.2244, "step": 45310 }, { "epoch": 0.8182025003791736, "grad_norm": 0.44903695583343506, "learning_rate": 1.5869628716599982e-06, "loss": 0.2194, "step": 45315 }, { "epoch": 0.8182927798120744, "grad_norm": 0.4540101885795593, "learning_rate": 1.5854300429420667e-06, "loss": 0.3024, "step": 45320 }, { "epoch": 0.818383059244975, "grad_norm": 0.2935616374015808, "learning_rate": 1.5838978911151114e-06, "loss": 0.1556, "step": 45325 }, { "epoch": 0.8184733386778758, "grad_norm": 0.3232356011867523, "learning_rate": 1.5823664163023777e-06, "loss": 0.2741, "step": 45330 }, { "epoch": 0.8185636181107765, "grad_norm": 0.24531584978103638, "learning_rate": 1.5808356186270657e-06, "loss": 0.2053, "step": 45335 }, { "epoch": 0.8186538975436772, "grad_norm": 0.725897490978241, "learning_rate": 1.5793054982123123e-06, "loss": 0.2228, "step": 45340 }, { "epoch": 0.8187441769765779, "grad_norm": 0.49496546387672424, "learning_rate": 1.5777760551812083e-06, "loss": 0.1896, "step": 45345 }, { "epoch": 0.8188344564094786, "grad_norm": 0.5691260695457458, "learning_rate": 1.5762472896567838e-06, "loss": 0.1977, "step": 45350 }, { "epoch": 0.8189247358423793, "grad_norm": 0.3409838378429413, "learning_rate": 1.5747192017620216e-06, "loss": 0.2193, "step": 45355 }, { "epoch": 0.8190150152752801, "grad_norm": 0.6356446146965027, "learning_rate": 1.5731917916198392e-06, "loss": 0.3169, "step": 45360 }, { "epoch": 0.8191052947081807, "grad_norm": 0.30235686898231506, "learning_rate": 1.5716650593531112e-06, "loss": 0.2241, "step": 45365 }, { "epoch": 0.8191955741410815, "grad_norm": 0.4847787022590637, "learning_rate": 1.5701390050846465e-06, "loss": 0.292, "step": 45370 }, { "epoch": 0.8192858535739822, "grad_norm": 0.18954876065254211, "learning_rate": 1.5686136289372079e-06, "loss": 0.2705, "step": 45375 }, { "epoch": 0.819376133006883, "grad_norm": 0.5880841016769409, "learning_rate": 1.5670889310335003e-06, "loss": 0.1502, "step": 45380 }, { "epoch": 0.8194664124397836, "grad_norm": 0.3620661199092865, "learning_rate": 1.5655649114961768e-06, "loss": 0.2292, "step": 45385 }, { "epoch": 0.8195566918726843, "grad_norm": 0.7227039337158203, "learning_rate": 1.5640415704478308e-06, "loss": 0.252, "step": 45390 }, { "epoch": 0.8196469713055851, "grad_norm": 0.5891028046607971, "learning_rate": 1.562518908011007e-06, "loss": 0.1781, "step": 45395 }, { "epoch": 0.8197372507384858, "grad_norm": 0.7310418486595154, "learning_rate": 1.5609969243081892e-06, "loss": 0.3571, "step": 45400 }, { "epoch": 0.8198275301713864, "grad_norm": 0.7425979375839233, "learning_rate": 1.5594756194618121e-06, "loss": 0.2302, "step": 45405 }, { "epoch": 0.8199178096042872, "grad_norm": 0.49799680709838867, "learning_rate": 1.5579549935942517e-06, "loss": 0.1718, "step": 45410 }, { "epoch": 0.8200080890371879, "grad_norm": 0.41933315992355347, "learning_rate": 1.556435046827832e-06, "loss": 0.2294, "step": 45415 }, { "epoch": 0.8200983684700887, "grad_norm": 0.33986660838127136, "learning_rate": 1.5549157792848247e-06, "loss": 0.2222, "step": 45420 }, { "epoch": 0.8201886479029893, "grad_norm": 0.4626463055610657, "learning_rate": 1.5533971910874378e-06, "loss": 0.1882, "step": 45425 }, { "epoch": 0.82027892733589, "grad_norm": 0.4492209851741791, "learning_rate": 1.5518792823578377e-06, "loss": 0.2006, "step": 45430 }, { "epoch": 0.8203692067687908, "grad_norm": 0.33428868651390076, "learning_rate": 1.550362053218123e-06, "loss": 0.1741, "step": 45435 }, { "epoch": 0.8204594862016915, "grad_norm": 0.29296696186065674, "learning_rate": 1.548845503790346e-06, "loss": 0.1906, "step": 45440 }, { "epoch": 0.8205497656345921, "grad_norm": 0.5941862463951111, "learning_rate": 1.5473296341965027e-06, "loss": 0.2781, "step": 45445 }, { "epoch": 0.8206400450674929, "grad_norm": 0.4935421943664551, "learning_rate": 1.5458144445585367e-06, "loss": 0.2923, "step": 45450 }, { "epoch": 0.8207303245003936, "grad_norm": 0.5380502343177795, "learning_rate": 1.5442999349983267e-06, "loss": 0.3464, "step": 45455 }, { "epoch": 0.8208206039332944, "grad_norm": 0.4661569595336914, "learning_rate": 1.5427861056377114e-06, "loss": 0.1701, "step": 45460 }, { "epoch": 0.820910883366195, "grad_norm": 0.662245512008667, "learning_rate": 1.541272956598462e-06, "loss": 0.2531, "step": 45465 }, { "epoch": 0.8210011627990957, "grad_norm": 0.5433655977249146, "learning_rate": 1.5397604880023032e-06, "loss": 0.1601, "step": 45470 }, { "epoch": 0.8210914422319965, "grad_norm": 2.0113091468811035, "learning_rate": 1.5382486999709002e-06, "loss": 0.1519, "step": 45475 }, { "epoch": 0.8211817216648972, "grad_norm": 0.4511041045188904, "learning_rate": 1.5367375926258665e-06, "loss": 0.2001, "step": 45480 }, { "epoch": 0.8212720010977979, "grad_norm": 0.29495155811309814, "learning_rate": 1.5352271660887585e-06, "loss": 0.1836, "step": 45485 }, { "epoch": 0.8213622805306986, "grad_norm": 0.554208517074585, "learning_rate": 1.5337174204810835e-06, "loss": 0.2228, "step": 45490 }, { "epoch": 0.8214525599635993, "grad_norm": 0.5604204535484314, "learning_rate": 1.5322083559242862e-06, "loss": 0.191, "step": 45495 }, { "epoch": 0.8215428393965001, "grad_norm": 0.24129433929920197, "learning_rate": 1.5306999725397564e-06, "loss": 0.1646, "step": 45500 }, { "epoch": 0.8216331188294007, "grad_norm": 0.45954737067222595, "learning_rate": 1.529192270448837e-06, "loss": 0.1976, "step": 45505 }, { "epoch": 0.8217233982623015, "grad_norm": 0.27473676204681396, "learning_rate": 1.5276852497728112e-06, "loss": 0.2805, "step": 45510 }, { "epoch": 0.8218136776952022, "grad_norm": 0.3894435465335846, "learning_rate": 1.5261789106329084e-06, "loss": 0.253, "step": 45515 }, { "epoch": 0.8219039571281029, "grad_norm": 0.4440051019191742, "learning_rate": 1.5246732531503005e-06, "loss": 0.2896, "step": 45520 }, { "epoch": 0.8219942365610036, "grad_norm": 0.36047643423080444, "learning_rate": 1.52316827744611e-06, "loss": 0.2452, "step": 45525 }, { "epoch": 0.8220845159939043, "grad_norm": 0.6247464418411255, "learning_rate": 1.521663983641396e-06, "loss": 0.3099, "step": 45530 }, { "epoch": 0.822174795426805, "grad_norm": 0.866725742816925, "learning_rate": 1.5201603718571734e-06, "loss": 0.2416, "step": 45535 }, { "epoch": 0.8222650748597058, "grad_norm": 0.44059160351753235, "learning_rate": 1.5186574422143918e-06, "loss": 0.2011, "step": 45540 }, { "epoch": 0.8223553542926064, "grad_norm": 0.40573906898498535, "learning_rate": 1.5171551948339547e-06, "loss": 0.2119, "step": 45545 }, { "epoch": 0.8224456337255072, "grad_norm": 0.3320337235927582, "learning_rate": 1.5156536298367053e-06, "loss": 0.2905, "step": 45550 }, { "epoch": 0.8225359131584079, "grad_norm": 0.4631249010562897, "learning_rate": 1.514152747343437e-06, "loss": 0.2341, "step": 45555 }, { "epoch": 0.8226261925913086, "grad_norm": 0.6496574282646179, "learning_rate": 1.51265254747488e-06, "loss": 0.2163, "step": 45560 }, { "epoch": 0.8227164720242093, "grad_norm": 0.38601595163345337, "learning_rate": 1.5111530303517185e-06, "loss": 0.2474, "step": 45565 }, { "epoch": 0.82280675145711, "grad_norm": 0.7684838175773621, "learning_rate": 1.5096541960945731e-06, "loss": 0.2907, "step": 45570 }, { "epoch": 0.8228970308900108, "grad_norm": 0.29702773690223694, "learning_rate": 1.5081560448240174e-06, "loss": 0.2478, "step": 45575 }, { "epoch": 0.8229873103229115, "grad_norm": 0.36197206377983093, "learning_rate": 1.5066585766605656e-06, "loss": 0.1844, "step": 45580 }, { "epoch": 0.8230775897558122, "grad_norm": 0.37863409519195557, "learning_rate": 1.505161791724682e-06, "loss": 0.2684, "step": 45585 }, { "epoch": 0.8231678691887129, "grad_norm": 0.5644698143005371, "learning_rate": 1.5036656901367685e-06, "loss": 0.2234, "step": 45590 }, { "epoch": 0.8232581486216136, "grad_norm": 0.4333361089229584, "learning_rate": 1.5021702720171727e-06, "loss": 0.1902, "step": 45595 }, { "epoch": 0.8233484280545144, "grad_norm": 0.6529067158699036, "learning_rate": 1.5006755374861969e-06, "loss": 0.2258, "step": 45600 }, { "epoch": 0.8234387074874151, "grad_norm": 0.46586236357688904, "learning_rate": 1.4991814866640753e-06, "loss": 0.2391, "step": 45605 }, { "epoch": 0.8235289869203157, "grad_norm": 0.32549789547920227, "learning_rate": 1.4976881196709959e-06, "loss": 0.2115, "step": 45610 }, { "epoch": 0.8236192663532165, "grad_norm": 0.3597162961959839, "learning_rate": 1.4961954366270891e-06, "loss": 0.2647, "step": 45615 }, { "epoch": 0.8237095457861172, "grad_norm": 0.48388177156448364, "learning_rate": 1.4947034376524338e-06, "loss": 0.2034, "step": 45620 }, { "epoch": 0.823799825219018, "grad_norm": 0.4890066683292389, "learning_rate": 1.4932121228670448e-06, "loss": 0.2268, "step": 45625 }, { "epoch": 0.8238901046519186, "grad_norm": 0.5106359124183655, "learning_rate": 1.4917214923908929e-06, "loss": 0.1494, "step": 45630 }, { "epoch": 0.8239803840848193, "grad_norm": 0.5168476104736328, "learning_rate": 1.4902315463438833e-06, "loss": 0.2053, "step": 45635 }, { "epoch": 0.8240706635177201, "grad_norm": 0.36747848987579346, "learning_rate": 1.4887422848458733e-06, "loss": 0.2252, "step": 45640 }, { "epoch": 0.8241609429506208, "grad_norm": 0.6707513332366943, "learning_rate": 1.4872537080166649e-06, "loss": 0.2243, "step": 45645 }, { "epoch": 0.8242512223835214, "grad_norm": 0.7406870722770691, "learning_rate": 1.4857658159760035e-06, "loss": 0.2212, "step": 45650 }, { "epoch": 0.8243415018164222, "grad_norm": 0.1467631310224533, "learning_rate": 1.4842786088435745e-06, "loss": 0.1931, "step": 45655 }, { "epoch": 0.8244317812493229, "grad_norm": 0.31390658020973206, "learning_rate": 1.48279208673902e-06, "loss": 0.2757, "step": 45660 }, { "epoch": 0.8245220606822237, "grad_norm": 0.5410983562469482, "learning_rate": 1.481306249781913e-06, "loss": 0.2389, "step": 45665 }, { "epoch": 0.8246123401151243, "grad_norm": 0.48541054129600525, "learning_rate": 1.4798210980917837e-06, "loss": 0.2438, "step": 45670 }, { "epoch": 0.824702619548025, "grad_norm": 0.46386122703552246, "learning_rate": 1.4783366317880976e-06, "loss": 0.2263, "step": 45675 }, { "epoch": 0.8247928989809258, "grad_norm": 0.35350045561790466, "learning_rate": 1.4768528509902703e-06, "loss": 0.2448, "step": 45680 }, { "epoch": 0.8248831784138265, "grad_norm": 0.5350896120071411, "learning_rate": 1.4753697558176637e-06, "loss": 0.1816, "step": 45685 }, { "epoch": 0.8249734578467272, "grad_norm": 0.5602902770042419, "learning_rate": 1.4738873463895797e-06, "loss": 0.2954, "step": 45690 }, { "epoch": 0.8250637372796279, "grad_norm": 0.6952036619186401, "learning_rate": 1.4724056228252682e-06, "loss": 0.272, "step": 45695 }, { "epoch": 0.8251540167125286, "grad_norm": 0.55024653673172, "learning_rate": 1.4709245852439212e-06, "loss": 0.146, "step": 45700 }, { "epoch": 0.8252442961454294, "grad_norm": 0.26396316289901733, "learning_rate": 1.4694442337646797e-06, "loss": 0.1623, "step": 45705 }, { "epoch": 0.82533457557833, "grad_norm": 0.37291499972343445, "learning_rate": 1.4679645685066256e-06, "loss": 0.1808, "step": 45710 }, { "epoch": 0.8254248550112308, "grad_norm": 0.47473058104515076, "learning_rate": 1.4664855895887908e-06, "loss": 0.2854, "step": 45715 }, { "epoch": 0.8255151344441315, "grad_norm": 0.17294996976852417, "learning_rate": 1.4650072971301433e-06, "loss": 0.2381, "step": 45720 }, { "epoch": 0.8256054138770322, "grad_norm": 0.6652577519416809, "learning_rate": 1.4635296912496054e-06, "loss": 0.2251, "step": 45725 }, { "epoch": 0.8256956933099329, "grad_norm": 0.42790499329566956, "learning_rate": 1.4620527720660348e-06, "loss": 0.2164, "step": 45730 }, { "epoch": 0.8257859727428336, "grad_norm": 0.7221193313598633, "learning_rate": 1.4605765396982452e-06, "loss": 0.1605, "step": 45735 }, { "epoch": 0.8258762521757343, "grad_norm": 0.4172767698764801, "learning_rate": 1.4591009942649825e-06, "loss": 0.2616, "step": 45740 }, { "epoch": 0.8259665316086351, "grad_norm": 0.5771311521530151, "learning_rate": 1.4576261358849464e-06, "loss": 0.1407, "step": 45745 }, { "epoch": 0.8260568110415357, "grad_norm": 0.41090327501296997, "learning_rate": 1.4561519646767797e-06, "loss": 0.2271, "step": 45750 }, { "epoch": 0.8261470904744365, "grad_norm": 0.8600770831108093, "learning_rate": 1.4546784807590687e-06, "loss": 0.143, "step": 45755 }, { "epoch": 0.8262373699073372, "grad_norm": 0.4484533667564392, "learning_rate": 1.4532056842503416e-06, "loss": 0.2129, "step": 45760 }, { "epoch": 0.826327649340238, "grad_norm": 0.418782114982605, "learning_rate": 1.4517335752690786e-06, "loss": 0.1961, "step": 45765 }, { "epoch": 0.8264179287731386, "grad_norm": 0.3176212012767792, "learning_rate": 1.4502621539336947e-06, "loss": 0.2542, "step": 45770 }, { "epoch": 0.8265082082060393, "grad_norm": 0.5707917809486389, "learning_rate": 1.4487914203625574e-06, "loss": 0.2532, "step": 45775 }, { "epoch": 0.8265984876389401, "grad_norm": 0.8464087843894958, "learning_rate": 1.447321374673981e-06, "loss": 0.3759, "step": 45780 }, { "epoch": 0.8266887670718408, "grad_norm": 0.38225147128105164, "learning_rate": 1.4458520169862123e-06, "loss": 0.1762, "step": 45785 }, { "epoch": 0.8267790465047414, "grad_norm": 0.49505189061164856, "learning_rate": 1.4443833474174574e-06, "loss": 0.2672, "step": 45790 }, { "epoch": 0.8268693259376422, "grad_norm": 0.20402777194976807, "learning_rate": 1.4429153660858542e-06, "loss": 0.1732, "step": 45795 }, { "epoch": 0.8269596053705429, "grad_norm": 0.6084776520729065, "learning_rate": 1.441448073109496e-06, "loss": 0.2127, "step": 45800 }, { "epoch": 0.8270498848034437, "grad_norm": 1.3320783376693726, "learning_rate": 1.4399814686064107e-06, "loss": 0.228, "step": 45805 }, { "epoch": 0.8271401642363443, "grad_norm": 0.4035283029079437, "learning_rate": 1.4385155526945793e-06, "loss": 0.184, "step": 45810 }, { "epoch": 0.827230443669245, "grad_norm": 0.4012914001941681, "learning_rate": 1.4370503254919244e-06, "loss": 0.2614, "step": 45815 }, { "epoch": 0.8273207231021458, "grad_norm": 0.47161754965782166, "learning_rate": 1.4355857871163138e-06, "loss": 0.1704, "step": 45820 }, { "epoch": 0.8274110025350465, "grad_norm": 0.4330509305000305, "learning_rate": 1.4341219376855552e-06, "loss": 0.1205, "step": 45825 }, { "epoch": 0.8275012819679471, "grad_norm": 0.9258213043212891, "learning_rate": 1.4326587773174094e-06, "loss": 0.2305, "step": 45830 }, { "epoch": 0.8275915614008479, "grad_norm": 0.668340265750885, "learning_rate": 1.431196306129572e-06, "loss": 0.2348, "step": 45835 }, { "epoch": 0.8276818408337486, "grad_norm": 0.579482913017273, "learning_rate": 1.42973452423969e-06, "loss": 0.2361, "step": 45840 }, { "epoch": 0.8277721202666494, "grad_norm": 0.4980883002281189, "learning_rate": 1.4282734317653536e-06, "loss": 0.1864, "step": 45845 }, { "epoch": 0.82786239969955, "grad_norm": 0.3318484425544739, "learning_rate": 1.4268130288241e-06, "loss": 0.2001, "step": 45850 }, { "epoch": 0.8279526791324507, "grad_norm": 0.6138582825660706, "learning_rate": 1.4253533155334022e-06, "loss": 0.1656, "step": 45855 }, { "epoch": 0.8280429585653515, "grad_norm": 0.5384577512741089, "learning_rate": 1.423894292010689e-06, "loss": 0.1881, "step": 45860 }, { "epoch": 0.8281332379982522, "grad_norm": 0.47394129633903503, "learning_rate": 1.422435958373326e-06, "loss": 0.2215, "step": 45865 }, { "epoch": 0.8282235174311529, "grad_norm": 0.32517707347869873, "learning_rate": 1.4209783147386237e-06, "loss": 0.2363, "step": 45870 }, { "epoch": 0.8283137968640536, "grad_norm": 0.4851417541503906, "learning_rate": 1.419521361223839e-06, "loss": 0.2226, "step": 45875 }, { "epoch": 0.8284040762969543, "grad_norm": 0.411033570766449, "learning_rate": 1.4180650979461751e-06, "loss": 0.2029, "step": 45880 }, { "epoch": 0.8284943557298551, "grad_norm": 0.28907907009124756, "learning_rate": 1.4166095250227795e-06, "loss": 0.2287, "step": 45885 }, { "epoch": 0.8285846351627557, "grad_norm": 0.6083971858024597, "learning_rate": 1.4151546425707384e-06, "loss": 0.2267, "step": 45890 }, { "epoch": 0.8286749145956565, "grad_norm": 0.3592473566532135, "learning_rate": 1.4137004507070907e-06, "loss": 0.2908, "step": 45895 }, { "epoch": 0.8287651940285572, "grad_norm": 0.7764474749565125, "learning_rate": 1.4122469495488112e-06, "loss": 0.2336, "step": 45900 }, { "epoch": 0.8288554734614579, "grad_norm": 0.7354249954223633, "learning_rate": 1.4107941392128254e-06, "loss": 0.2546, "step": 45905 }, { "epoch": 0.8289457528943586, "grad_norm": 0.2935306131839752, "learning_rate": 1.4093420198160013e-06, "loss": 0.1447, "step": 45910 }, { "epoch": 0.8290360323272593, "grad_norm": 0.5847833156585693, "learning_rate": 1.407890591475155e-06, "loss": 0.1871, "step": 45915 }, { "epoch": 0.82912631176016, "grad_norm": 0.9674150347709656, "learning_rate": 1.4064398543070369e-06, "loss": 0.2378, "step": 45920 }, { "epoch": 0.8292165911930608, "grad_norm": 0.42791542410850525, "learning_rate": 1.404989808428353e-06, "loss": 0.1536, "step": 45925 }, { "epoch": 0.8293068706259614, "grad_norm": 0.43429914116859436, "learning_rate": 1.403540453955745e-06, "loss": 0.205, "step": 45930 }, { "epoch": 0.8293971500588622, "grad_norm": 0.5471498370170593, "learning_rate": 1.4020917910058086e-06, "loss": 0.2592, "step": 45935 }, { "epoch": 0.8294874294917629, "grad_norm": 0.28336235880851746, "learning_rate": 1.4006438196950712e-06, "loss": 0.3411, "step": 45940 }, { "epoch": 0.8295777089246636, "grad_norm": 0.5176075100898743, "learning_rate": 1.3991965401400154e-06, "loss": 0.1394, "step": 45945 }, { "epoch": 0.8296679883575643, "grad_norm": 0.48003312945365906, "learning_rate": 1.3977499524570648e-06, "loss": 0.2763, "step": 45950 }, { "epoch": 0.829758267790465, "grad_norm": 0.4888760447502136, "learning_rate": 1.3963040567625886e-06, "loss": 0.1987, "step": 45955 }, { "epoch": 0.8298485472233658, "grad_norm": 0.5276985168457031, "learning_rate": 1.3948588531728946e-06, "loss": 0.2529, "step": 45960 }, { "epoch": 0.8299388266562665, "grad_norm": 0.6013329029083252, "learning_rate": 1.393414341804239e-06, "loss": 0.1331, "step": 45965 }, { "epoch": 0.8300291060891672, "grad_norm": 0.6370763778686523, "learning_rate": 1.3919705227728242e-06, "loss": 0.2626, "step": 45970 }, { "epoch": 0.8301193855220679, "grad_norm": 0.3273886740207672, "learning_rate": 1.390527396194794e-06, "loss": 0.2427, "step": 45975 }, { "epoch": 0.8302096649549686, "grad_norm": 0.715945303440094, "learning_rate": 1.3890849621862401e-06, "loss": 0.3468, "step": 45980 }, { "epoch": 0.8302999443878694, "grad_norm": 0.5415103435516357, "learning_rate": 1.3876432208631917e-06, "loss": 0.3264, "step": 45985 }, { "epoch": 0.8303902238207701, "grad_norm": 0.4317578077316284, "learning_rate": 1.3862021723416298e-06, "loss": 0.2118, "step": 45990 }, { "epoch": 0.8304805032536707, "grad_norm": 0.23266218602657318, "learning_rate": 1.3847618167374733e-06, "loss": 0.1878, "step": 45995 }, { "epoch": 0.8305707826865715, "grad_norm": 0.19852973520755768, "learning_rate": 1.383322154166591e-06, "loss": 0.2825, "step": 46000 }, { "epoch": 0.8306610621194722, "grad_norm": 0.3784917891025543, "learning_rate": 1.381883184744791e-06, "loss": 0.2635, "step": 46005 }, { "epoch": 0.830751341552373, "grad_norm": 0.508661687374115, "learning_rate": 1.380444908587828e-06, "loss": 0.2338, "step": 46010 }, { "epoch": 0.8308416209852736, "grad_norm": 0.42893388867378235, "learning_rate": 1.3790073258114023e-06, "loss": 0.1343, "step": 46015 }, { "epoch": 0.8309319004181743, "grad_norm": 0.6285560727119446, "learning_rate": 1.3775704365311594e-06, "loss": 0.254, "step": 46020 }, { "epoch": 0.8310221798510751, "grad_norm": 0.6512768864631653, "learning_rate": 1.376134240862681e-06, "loss": 0.1963, "step": 46025 }, { "epoch": 0.8311124592839758, "grad_norm": 0.5742652416229248, "learning_rate": 1.374698738921504e-06, "loss": 0.2186, "step": 46030 }, { "epoch": 0.8312027387168764, "grad_norm": 0.470001757144928, "learning_rate": 1.3732639308230988e-06, "loss": 0.2349, "step": 46035 }, { "epoch": 0.8312930181497772, "grad_norm": 0.6508385539054871, "learning_rate": 1.371829816682888e-06, "loss": 0.2872, "step": 46040 }, { "epoch": 0.8313832975826779, "grad_norm": 0.49703094363212585, "learning_rate": 1.3703963966162348e-06, "loss": 0.2483, "step": 46045 }, { "epoch": 0.8314735770155787, "grad_norm": 0.5042121410369873, "learning_rate": 1.3689636707384512e-06, "loss": 0.2363, "step": 46050 }, { "epoch": 0.8315638564484793, "grad_norm": 0.37353435158729553, "learning_rate": 1.3675316391647874e-06, "loss": 0.2679, "step": 46055 }, { "epoch": 0.83165413588138, "grad_norm": 0.46420320868492126, "learning_rate": 1.3661003020104358e-06, "loss": 0.2406, "step": 46060 }, { "epoch": 0.8317444153142808, "grad_norm": 0.42775171995162964, "learning_rate": 1.364669659390543e-06, "loss": 0.2727, "step": 46065 }, { "epoch": 0.8318346947471815, "grad_norm": 0.36116909980773926, "learning_rate": 1.363239711420189e-06, "loss": 0.2324, "step": 46070 }, { "epoch": 0.8319249741800822, "grad_norm": 0.6921894550323486, "learning_rate": 1.3618104582144042e-06, "loss": 0.2463, "step": 46075 }, { "epoch": 0.8320152536129829, "grad_norm": 0.536548376083374, "learning_rate": 1.3603818998881634e-06, "loss": 0.2015, "step": 46080 }, { "epoch": 0.8321055330458836, "grad_norm": 0.4366627335548401, "learning_rate": 1.3589540365563835e-06, "loss": 0.251, "step": 46085 }, { "epoch": 0.8321958124787844, "grad_norm": 0.3272264897823334, "learning_rate": 1.3575268683339239e-06, "loss": 0.1897, "step": 46090 }, { "epoch": 0.832286091911685, "grad_norm": 0.5101941227912903, "learning_rate": 1.3561003953355933e-06, "loss": 0.2492, "step": 46095 }, { "epoch": 0.8323763713445858, "grad_norm": 0.5696504712104797, "learning_rate": 1.354674617676135e-06, "loss": 0.123, "step": 46100 }, { "epoch": 0.8324666507774865, "grad_norm": 0.5099152326583862, "learning_rate": 1.353249535470248e-06, "loss": 0.1961, "step": 46105 }, { "epoch": 0.8325569302103872, "grad_norm": 0.30795571208000183, "learning_rate": 1.3518251488325661e-06, "loss": 0.2404, "step": 46110 }, { "epoch": 0.8326472096432879, "grad_norm": 0.631792426109314, "learning_rate": 1.3504014578776758e-06, "loss": 0.2101, "step": 46115 }, { "epoch": 0.8327374890761886, "grad_norm": 1.0345816612243652, "learning_rate": 1.3489784627200976e-06, "loss": 0.2382, "step": 46120 }, { "epoch": 0.8328277685090894, "grad_norm": 0.2446281611919403, "learning_rate": 1.3475561634743051e-06, "loss": 0.1597, "step": 46125 }, { "epoch": 0.8329180479419901, "grad_norm": 0.5093153119087219, "learning_rate": 1.3461345602547072e-06, "loss": 0.2228, "step": 46130 }, { "epoch": 0.8330083273748907, "grad_norm": 0.2520841956138611, "learning_rate": 1.3447136531756667e-06, "loss": 0.1745, "step": 46135 }, { "epoch": 0.8330986068077915, "grad_norm": 0.3567710220813751, "learning_rate": 1.343293442351481e-06, "loss": 0.1607, "step": 46140 }, { "epoch": 0.8331888862406922, "grad_norm": 0.6307740807533264, "learning_rate": 1.3418739278963965e-06, "loss": 0.2686, "step": 46145 }, { "epoch": 0.833279165673593, "grad_norm": 0.391429603099823, "learning_rate": 1.3404551099246066e-06, "loss": 0.2448, "step": 46150 }, { "epoch": 0.8333694451064936, "grad_norm": 0.28969264030456543, "learning_rate": 1.3390369885502408e-06, "loss": 0.2472, "step": 46155 }, { "epoch": 0.8334597245393943, "grad_norm": 0.4014858603477478, "learning_rate": 1.337619563887379e-06, "loss": 0.2494, "step": 46160 }, { "epoch": 0.8335500039722951, "grad_norm": 0.522121787071228, "learning_rate": 1.33620283605004e-06, "loss": 0.2404, "step": 46165 }, { "epoch": 0.8336402834051958, "grad_norm": 0.42990896105766296, "learning_rate": 1.3347868051521907e-06, "loss": 0.3098, "step": 46170 }, { "epoch": 0.8337305628380964, "grad_norm": 0.4846160113811493, "learning_rate": 1.3333714713077417e-06, "loss": 0.3495, "step": 46175 }, { "epoch": 0.8338208422709972, "grad_norm": 0.3562118113040924, "learning_rate": 1.3319568346305466e-06, "loss": 0.2418, "step": 46180 }, { "epoch": 0.8339111217038979, "grad_norm": 0.5375434160232544, "learning_rate": 1.330542895234399e-06, "loss": 0.1946, "step": 46185 }, { "epoch": 0.8340014011367987, "grad_norm": 0.31592509150505066, "learning_rate": 1.3291296532330454e-06, "loss": 0.162, "step": 46190 }, { "epoch": 0.8340916805696993, "grad_norm": 0.39862120151519775, "learning_rate": 1.3277171087401652e-06, "loss": 0.1801, "step": 46195 }, { "epoch": 0.8341819600026, "grad_norm": 0.45258471369743347, "learning_rate": 1.3263052618693917e-06, "loss": 0.2124, "step": 46200 }, { "epoch": 0.8342722394355008, "grad_norm": 0.4387921988964081, "learning_rate": 1.324894112734293e-06, "loss": 0.126, "step": 46205 }, { "epoch": 0.8343625188684015, "grad_norm": 0.3463088274002075, "learning_rate": 1.3234836614483915e-06, "loss": 0.2149, "step": 46210 }, { "epoch": 0.8344527983013021, "grad_norm": 0.29718273878097534, "learning_rate": 1.3220739081251443e-06, "loss": 0.1239, "step": 46215 }, { "epoch": 0.8345430777342029, "grad_norm": 0.36515769362449646, "learning_rate": 1.3206648528779586e-06, "loss": 0.2371, "step": 46220 }, { "epoch": 0.8346333571671036, "grad_norm": 0.2504488229751587, "learning_rate": 1.319256495820178e-06, "loss": 0.205, "step": 46225 }, { "epoch": 0.8347236366000044, "grad_norm": 0.5337098240852356, "learning_rate": 1.3178488370650989e-06, "loss": 0.2764, "step": 46230 }, { "epoch": 0.834813916032905, "grad_norm": 0.47260913252830505, "learning_rate": 1.3164418767259545e-06, "loss": 0.2033, "step": 46235 }, { "epoch": 0.8349041954658057, "grad_norm": 0.40227386355400085, "learning_rate": 1.3150356149159248e-06, "loss": 0.2325, "step": 46240 }, { "epoch": 0.8349944748987065, "grad_norm": 0.4283299148082733, "learning_rate": 1.313630051748136e-06, "loss": 0.1534, "step": 46245 }, { "epoch": 0.8350847543316072, "grad_norm": 0.541343092918396, "learning_rate": 1.3122251873356517e-06, "loss": 0.2231, "step": 46250 }, { "epoch": 0.8351750337645079, "grad_norm": 0.4880170524120331, "learning_rate": 1.3108210217914874e-06, "loss": 0.2555, "step": 46255 }, { "epoch": 0.8352653131974086, "grad_norm": 0.5511779189109802, "learning_rate": 1.3094175552285925e-06, "loss": 0.2015, "step": 46260 }, { "epoch": 0.8353555926303093, "grad_norm": 0.43681982159614563, "learning_rate": 1.3080147877598715e-06, "loss": 0.2518, "step": 46265 }, { "epoch": 0.8354458720632101, "grad_norm": 0.4958720803260803, "learning_rate": 1.3066127194981593e-06, "loss": 0.1949, "step": 46270 }, { "epoch": 0.8355361514961107, "grad_norm": 0.6504906415939331, "learning_rate": 1.3052113505562513e-06, "loss": 0.2196, "step": 46275 }, { "epoch": 0.8356264309290115, "grad_norm": 0.6573824882507324, "learning_rate": 1.303810681046871e-06, "loss": 0.22, "step": 46280 }, { "epoch": 0.8357167103619122, "grad_norm": 0.4231870472431183, "learning_rate": 1.3024107110826956e-06, "loss": 0.3308, "step": 46285 }, { "epoch": 0.8358069897948129, "grad_norm": 0.4615139961242676, "learning_rate": 1.3010114407763397e-06, "loss": 0.2548, "step": 46290 }, { "epoch": 0.8358972692277136, "grad_norm": 0.5508519411087036, "learning_rate": 1.2996128702403665e-06, "loss": 0.2336, "step": 46295 }, { "epoch": 0.8359875486606143, "grad_norm": 0.5502642393112183, "learning_rate": 1.2982149995872784e-06, "loss": 0.2087, "step": 46300 }, { "epoch": 0.836077828093515, "grad_norm": 0.37094512581825256, "learning_rate": 1.2968178289295264e-06, "loss": 0.2083, "step": 46305 }, { "epoch": 0.8361681075264158, "grad_norm": 0.595604658126831, "learning_rate": 1.2954213583795005e-06, "loss": 0.2734, "step": 46310 }, { "epoch": 0.8362583869593164, "grad_norm": 0.41564124822616577, "learning_rate": 1.29402558804954e-06, "loss": 0.2285, "step": 46315 }, { "epoch": 0.8363486663922172, "grad_norm": 0.33614933490753174, "learning_rate": 1.2926305180519204e-06, "loss": 0.2481, "step": 46320 }, { "epoch": 0.8364389458251179, "grad_norm": 0.480729341506958, "learning_rate": 1.29123614849887e-06, "loss": 0.2043, "step": 46325 }, { "epoch": 0.8365292252580186, "grad_norm": 0.5558351278305054, "learning_rate": 1.2898424795025522e-06, "loss": 0.2212, "step": 46330 }, { "epoch": 0.8366195046909193, "grad_norm": 0.5643190145492554, "learning_rate": 1.2884495111750762e-06, "loss": 0.1975, "step": 46335 }, { "epoch": 0.83670978412382, "grad_norm": 0.42842450737953186, "learning_rate": 1.2870572436284967e-06, "loss": 0.1398, "step": 46340 }, { "epoch": 0.8368000635567208, "grad_norm": 0.5728448629379272, "learning_rate": 1.2856656769748132e-06, "loss": 0.2152, "step": 46345 }, { "epoch": 0.8368903429896215, "grad_norm": 0.4689022898674011, "learning_rate": 1.2842748113259695e-06, "loss": 0.1772, "step": 46350 }, { "epoch": 0.8369806224225221, "grad_norm": 0.4232998490333557, "learning_rate": 1.282884646793845e-06, "loss": 0.3376, "step": 46355 }, { "epoch": 0.8370709018554229, "grad_norm": 0.7367646098136902, "learning_rate": 1.2814951834902734e-06, "loss": 0.3007, "step": 46360 }, { "epoch": 0.8371611812883236, "grad_norm": 0.4986398220062256, "learning_rate": 1.280106421527022e-06, "loss": 0.2284, "step": 46365 }, { "epoch": 0.8372514607212244, "grad_norm": 0.5241086483001709, "learning_rate": 1.2787183610158104e-06, "loss": 0.2932, "step": 46370 }, { "epoch": 0.8373417401541251, "grad_norm": 0.6657364964485168, "learning_rate": 1.2773310020682961e-06, "loss": 0.1989, "step": 46375 }, { "epoch": 0.8374320195870257, "grad_norm": 0.5218197703361511, "learning_rate": 1.275944344796085e-06, "loss": 0.1435, "step": 46380 }, { "epoch": 0.8375222990199265, "grad_norm": 0.401005357503891, "learning_rate": 1.2745583893107183e-06, "loss": 0.236, "step": 46385 }, { "epoch": 0.8376125784528272, "grad_norm": 0.4521532356739044, "learning_rate": 1.273173135723692e-06, "loss": 0.2441, "step": 46390 }, { "epoch": 0.837702857885728, "grad_norm": 0.41541245579719543, "learning_rate": 1.271788584146435e-06, "loss": 0.2277, "step": 46395 }, { "epoch": 0.8377931373186286, "grad_norm": 0.33301645517349243, "learning_rate": 1.270404734690328e-06, "loss": 0.1948, "step": 46400 }, { "epoch": 0.8378834167515293, "grad_norm": 0.789784848690033, "learning_rate": 1.2690215874666856e-06, "loss": 0.2297, "step": 46405 }, { "epoch": 0.8379736961844301, "grad_norm": 0.3074299693107605, "learning_rate": 1.2676391425867806e-06, "loss": 0.2477, "step": 46410 }, { "epoch": 0.8380639756173308, "grad_norm": 0.4493200480937958, "learning_rate": 1.2662574001618156e-06, "loss": 0.1588, "step": 46415 }, { "epoch": 0.8381542550502314, "grad_norm": 0.7905411124229431, "learning_rate": 1.2648763603029413e-06, "loss": 0.2534, "step": 46420 }, { "epoch": 0.8382445344831322, "grad_norm": 0.3890785276889801, "learning_rate": 1.2634960231212546e-06, "loss": 0.2178, "step": 46425 }, { "epoch": 0.8383348139160329, "grad_norm": 0.7565959095954895, "learning_rate": 1.2621163887277899e-06, "loss": 0.1351, "step": 46430 }, { "epoch": 0.8384250933489337, "grad_norm": 0.4592171013355255, "learning_rate": 1.2607374572335317e-06, "loss": 0.2981, "step": 46435 }, { "epoch": 0.8385153727818343, "grad_norm": 0.22006268799304962, "learning_rate": 1.2593592287494038e-06, "loss": 0.2293, "step": 46440 }, { "epoch": 0.838605652214735, "grad_norm": 0.4412384331226349, "learning_rate": 1.2579817033862784e-06, "loss": 0.2287, "step": 46445 }, { "epoch": 0.8386959316476358, "grad_norm": 0.532873272895813, "learning_rate": 1.2566048812549614e-06, "loss": 0.19, "step": 46450 }, { "epoch": 0.8387862110805365, "grad_norm": 0.23210127651691437, "learning_rate": 1.2552287624662141e-06, "loss": 0.1869, "step": 46455 }, { "epoch": 0.8388764905134372, "grad_norm": 0.6327080130577087, "learning_rate": 1.253853347130729e-06, "loss": 0.1106, "step": 46460 }, { "epoch": 0.8389667699463379, "grad_norm": 0.44668781757354736, "learning_rate": 1.2524786353591544e-06, "loss": 0.1868, "step": 46465 }, { "epoch": 0.8390570493792386, "grad_norm": 0.3663041889667511, "learning_rate": 1.2511046272620685e-06, "loss": 0.1131, "step": 46470 }, { "epoch": 0.8391473288121394, "grad_norm": 0.12444600462913513, "learning_rate": 1.2497313229500086e-06, "loss": 0.2369, "step": 46475 }, { "epoch": 0.83923760824504, "grad_norm": 0.7890183329582214, "learning_rate": 1.2483587225334414e-06, "loss": 0.1307, "step": 46480 }, { "epoch": 0.8393278876779408, "grad_norm": 0.4431261420249939, "learning_rate": 1.246986826122787e-06, "loss": 0.2444, "step": 46485 }, { "epoch": 0.8394181671108415, "grad_norm": 0.39454951882362366, "learning_rate": 1.2456156338283998e-06, "loss": 0.1739, "step": 46490 }, { "epoch": 0.8395084465437422, "grad_norm": 0.3442912995815277, "learning_rate": 1.2442451457605852e-06, "loss": 0.2237, "step": 46495 }, { "epoch": 0.8395987259766429, "grad_norm": 0.35818588733673096, "learning_rate": 1.2428753620295875e-06, "loss": 0.2182, "step": 46500 }, { "epoch": 0.8396890054095436, "grad_norm": 0.4712678790092468, "learning_rate": 1.2415062827455948e-06, "loss": 0.2619, "step": 46505 }, { "epoch": 0.8397792848424444, "grad_norm": 0.41358664631843567, "learning_rate": 1.2401379080187447e-06, "loss": 0.2226, "step": 46510 }, { "epoch": 0.8398695642753451, "grad_norm": 1.274709939956665, "learning_rate": 1.238770237959107e-06, "loss": 0.1985, "step": 46515 }, { "epoch": 0.8399598437082457, "grad_norm": 0.3299989402294159, "learning_rate": 1.237403272676706e-06, "loss": 0.1947, "step": 46520 }, { "epoch": 0.8400501231411465, "grad_norm": 0.2533578872680664, "learning_rate": 1.236037012281499e-06, "loss": 0.2319, "step": 46525 }, { "epoch": 0.8401404025740472, "grad_norm": 0.845720648765564, "learning_rate": 1.234671456883396e-06, "loss": 0.2142, "step": 46530 }, { "epoch": 0.840230682006948, "grad_norm": 0.48371750116348267, "learning_rate": 1.2333066065922394e-06, "loss": 0.2569, "step": 46535 }, { "epoch": 0.8403209614398486, "grad_norm": 0.3321785628795624, "learning_rate": 1.231942461517832e-06, "loss": 0.1951, "step": 46540 }, { "epoch": 0.8404112408727493, "grad_norm": 0.3849267065525055, "learning_rate": 1.230579021769901e-06, "loss": 0.2217, "step": 46545 }, { "epoch": 0.8405015203056501, "grad_norm": 0.6842372417449951, "learning_rate": 1.2292162874581293e-06, "loss": 0.2251, "step": 46550 }, { "epoch": 0.8405917997385508, "grad_norm": 0.4850200116634369, "learning_rate": 1.227854258692136e-06, "loss": 0.2633, "step": 46555 }, { "epoch": 0.8406820791714514, "grad_norm": 0.28475072979927063, "learning_rate": 1.2264929355814904e-06, "loss": 0.1486, "step": 46560 }, { "epoch": 0.8407723586043522, "grad_norm": 0.5592582821846008, "learning_rate": 1.2251323182356957e-06, "loss": 0.256, "step": 46565 }, { "epoch": 0.8408626380372529, "grad_norm": 0.7354555726051331, "learning_rate": 1.2237724067642066e-06, "loss": 0.159, "step": 46570 }, { "epoch": 0.8409529174701537, "grad_norm": 0.25784942507743835, "learning_rate": 1.2224132012764178e-06, "loss": 0.1818, "step": 46575 }, { "epoch": 0.8410431969030543, "grad_norm": 0.9170084595680237, "learning_rate": 1.221054701881671e-06, "loss": 0.2719, "step": 46580 }, { "epoch": 0.841133476335955, "grad_norm": 0.21908967196941376, "learning_rate": 1.2196969086892408e-06, "loss": 0.1778, "step": 46585 }, { "epoch": 0.8412237557688558, "grad_norm": 0.7114096283912659, "learning_rate": 1.2183398218083587e-06, "loss": 0.235, "step": 46590 }, { "epoch": 0.8413140352017565, "grad_norm": 0.5532537698745728, "learning_rate": 1.2169834413481862e-06, "loss": 0.3203, "step": 46595 }, { "epoch": 0.8414043146346571, "grad_norm": 0.2997341752052307, "learning_rate": 1.2156277674178374e-06, "loss": 0.1601, "step": 46600 }, { "epoch": 0.8414945940675579, "grad_norm": 0.7469394207000732, "learning_rate": 1.2142728001263682e-06, "loss": 0.2101, "step": 46605 }, { "epoch": 0.8415848735004586, "grad_norm": 0.5516133308410645, "learning_rate": 1.2129185395827715e-06, "loss": 0.2367, "step": 46610 }, { "epoch": 0.8416751529333594, "grad_norm": 0.5415496826171875, "learning_rate": 1.2115649858959922e-06, "loss": 0.2088, "step": 46615 }, { "epoch": 0.84176543236626, "grad_norm": 0.4268716275691986, "learning_rate": 1.21021213917491e-06, "loss": 0.2514, "step": 46620 }, { "epoch": 0.8418557117991607, "grad_norm": 0.26933982968330383, "learning_rate": 1.2088599995283546e-06, "loss": 0.215, "step": 46625 }, { "epoch": 0.8419459912320615, "grad_norm": 0.4040341377258301, "learning_rate": 1.2075085670650931e-06, "loss": 0.2693, "step": 46630 }, { "epoch": 0.8420362706649622, "grad_norm": 0.39315953850746155, "learning_rate": 1.2061578418938403e-06, "loss": 0.203, "step": 46635 }, { "epoch": 0.8421265500978629, "grad_norm": 0.48658832907676697, "learning_rate": 1.2048078241232508e-06, "loss": 0.2236, "step": 46640 }, { "epoch": 0.8422168295307636, "grad_norm": 0.534344494342804, "learning_rate": 1.203458513861927e-06, "loss": 0.2558, "step": 46645 }, { "epoch": 0.8423071089636643, "grad_norm": 0.5080280303955078, "learning_rate": 1.2021099112184065e-06, "loss": 0.214, "step": 46650 }, { "epoch": 0.8423973883965651, "grad_norm": 0.2412056177854538, "learning_rate": 1.2007620163011792e-06, "loss": 0.2084, "step": 46655 }, { "epoch": 0.8424876678294657, "grad_norm": 0.5974761843681335, "learning_rate": 1.1994148292186681e-06, "loss": 0.1612, "step": 46660 }, { "epoch": 0.8425779472623665, "grad_norm": 1.1710773706436157, "learning_rate": 1.1980683500792477e-06, "loss": 0.227, "step": 46665 }, { "epoch": 0.8426682266952672, "grad_norm": 0.4503132998943329, "learning_rate": 1.196722578991233e-06, "loss": 0.2271, "step": 46670 }, { "epoch": 0.8427585061281679, "grad_norm": 0.4720381200313568, "learning_rate": 1.1953775160628822e-06, "loss": 0.2954, "step": 46675 }, { "epoch": 0.8428487855610686, "grad_norm": 0.432170569896698, "learning_rate": 1.1940331614023936e-06, "loss": 0.3108, "step": 46680 }, { "epoch": 0.8429390649939693, "grad_norm": 0.5633404850959778, "learning_rate": 1.192689515117913e-06, "loss": 0.2709, "step": 46685 }, { "epoch": 0.84302934442687, "grad_norm": 0.3888779282569885, "learning_rate": 1.1913465773175247e-06, "loss": 0.2879, "step": 46690 }, { "epoch": 0.8431196238597708, "grad_norm": 0.7158108949661255, "learning_rate": 1.1900043481092583e-06, "loss": 0.3079, "step": 46695 }, { "epoch": 0.8432099032926714, "grad_norm": 0.2776183784008026, "learning_rate": 1.188662827601087e-06, "loss": 0.1768, "step": 46700 }, { "epoch": 0.8433001827255722, "grad_norm": 0.39774903655052185, "learning_rate": 1.1873220159009258e-06, "loss": 0.156, "step": 46705 }, { "epoch": 0.8433904621584729, "grad_norm": 0.5344502925872803, "learning_rate": 1.1859819131166372e-06, "loss": 0.1581, "step": 46710 }, { "epoch": 0.8434807415913737, "grad_norm": 0.23680981993675232, "learning_rate": 1.1846425193560162e-06, "loss": 0.1056, "step": 46715 }, { "epoch": 0.8435710210242743, "grad_norm": 0.33252429962158203, "learning_rate": 1.1833038347268145e-06, "loss": 0.2275, "step": 46720 }, { "epoch": 0.843661300457175, "grad_norm": 0.558745801448822, "learning_rate": 1.1819658593367123e-06, "loss": 0.247, "step": 46725 }, { "epoch": 0.8437515798900758, "grad_norm": 0.19038057327270508, "learning_rate": 1.1806285932933436e-06, "loss": 0.208, "step": 46730 }, { "epoch": 0.8438418593229765, "grad_norm": 0.4949471652507782, "learning_rate": 1.1792920367042814e-06, "loss": 0.2876, "step": 46735 }, { "epoch": 0.8439321387558771, "grad_norm": 0.37117841839790344, "learning_rate": 1.1779561896770453e-06, "loss": 0.2373, "step": 46740 }, { "epoch": 0.8440224181887779, "grad_norm": 0.5064247846603394, "learning_rate": 1.176621052319088e-06, "loss": 0.2694, "step": 46745 }, { "epoch": 0.8441126976216786, "grad_norm": 0.46294933557510376, "learning_rate": 1.175286624737817e-06, "loss": 0.2378, "step": 46750 }, { "epoch": 0.8442029770545794, "grad_norm": 0.3186456263065338, "learning_rate": 1.1739529070405742e-06, "loss": 0.2229, "step": 46755 }, { "epoch": 0.84429325648748, "grad_norm": 0.7466938495635986, "learning_rate": 1.1726198993346495e-06, "loss": 0.2063, "step": 46760 }, { "epoch": 0.8443835359203807, "grad_norm": 0.64537113904953, "learning_rate": 1.171287601727271e-06, "loss": 0.2007, "step": 46765 }, { "epoch": 0.8444738153532815, "grad_norm": 0.4769459068775177, "learning_rate": 1.169956014325614e-06, "loss": 0.2044, "step": 46770 }, { "epoch": 0.8445640947861822, "grad_norm": 0.3023054599761963, "learning_rate": 1.1686251372367952e-06, "loss": 0.1671, "step": 46775 }, { "epoch": 0.844654374219083, "grad_norm": 0.3669189214706421, "learning_rate": 1.1672949705678748e-06, "loss": 0.0945, "step": 46780 }, { "epoch": 0.8447446536519836, "grad_norm": 0.2845577597618103, "learning_rate": 1.1659655144258553e-06, "loss": 0.2238, "step": 46785 }, { "epoch": 0.8448349330848843, "grad_norm": 0.3285166919231415, "learning_rate": 1.1646367689176774e-06, "loss": 0.2137, "step": 46790 }, { "epoch": 0.8449252125177851, "grad_norm": 0.5089724659919739, "learning_rate": 1.1633087341502347e-06, "loss": 0.2013, "step": 46795 }, { "epoch": 0.8450154919506858, "grad_norm": 0.24118827283382416, "learning_rate": 1.1619814102303516e-06, "loss": 0.2986, "step": 46800 }, { "epoch": 0.8451057713835864, "grad_norm": 0.40448451042175293, "learning_rate": 1.1606547972648096e-06, "loss": 0.2873, "step": 46805 }, { "epoch": 0.8451960508164872, "grad_norm": 0.5910768508911133, "learning_rate": 1.1593288953603189e-06, "loss": 0.2162, "step": 46810 }, { "epoch": 0.8452863302493879, "grad_norm": 0.4762016832828522, "learning_rate": 1.1580037046235425e-06, "loss": 0.1681, "step": 46815 }, { "epoch": 0.8453766096822887, "grad_norm": 0.653786301612854, "learning_rate": 1.1566792251610781e-06, "loss": 0.2653, "step": 46820 }, { "epoch": 0.8454668891151893, "grad_norm": 0.552449643611908, "learning_rate": 1.1553554570794757e-06, "loss": 0.2972, "step": 46825 }, { "epoch": 0.84555716854809, "grad_norm": 0.5560564994812012, "learning_rate": 1.1540324004852166e-06, "loss": 0.3347, "step": 46830 }, { "epoch": 0.8456474479809908, "grad_norm": 0.42363405227661133, "learning_rate": 1.152710055484736e-06, "loss": 0.2751, "step": 46835 }, { "epoch": 0.8457377274138915, "grad_norm": 0.20735247433185577, "learning_rate": 1.151388422184404e-06, "loss": 0.1928, "step": 46840 }, { "epoch": 0.8458280068467922, "grad_norm": 0.6323709487915039, "learning_rate": 1.1500675006905416e-06, "loss": 0.2188, "step": 46845 }, { "epoch": 0.8459182862796929, "grad_norm": 0.6633914113044739, "learning_rate": 1.1487472911094e-06, "loss": 0.263, "step": 46850 }, { "epoch": 0.8460085657125936, "grad_norm": 0.6211409568786621, "learning_rate": 1.1474277935471866e-06, "loss": 0.2004, "step": 46855 }, { "epoch": 0.8460988451454944, "grad_norm": 0.34853506088256836, "learning_rate": 1.1461090081100402e-06, "loss": 0.1077, "step": 46860 }, { "epoch": 0.846189124578395, "grad_norm": 0.41717976331710815, "learning_rate": 1.144790934904051e-06, "loss": 0.186, "step": 46865 }, { "epoch": 0.8462794040112958, "grad_norm": 0.4029599726200104, "learning_rate": 1.1434735740352465e-06, "loss": 0.2869, "step": 46870 }, { "epoch": 0.8463696834441965, "grad_norm": 0.47104987502098083, "learning_rate": 1.1421569256096032e-06, "loss": 0.2056, "step": 46875 }, { "epoch": 0.8464599628770972, "grad_norm": 0.627106249332428, "learning_rate": 1.140840989733032e-06, "loss": 0.2746, "step": 46880 }, { "epoch": 0.8465502423099979, "grad_norm": 0.5372933149337769, "learning_rate": 1.1395257665113878e-06, "loss": 0.2181, "step": 46885 }, { "epoch": 0.8466405217428986, "grad_norm": 0.482340544462204, "learning_rate": 1.1382112560504765e-06, "loss": 0.2103, "step": 46890 }, { "epoch": 0.8467308011757994, "grad_norm": 0.28699415922164917, "learning_rate": 1.1368974584560366e-06, "loss": 0.1307, "step": 46895 }, { "epoch": 0.8468210806087001, "grad_norm": 0.421001136302948, "learning_rate": 1.1355843738337546e-06, "loss": 0.2902, "step": 46900 }, { "epoch": 0.8469113600416007, "grad_norm": 0.503657341003418, "learning_rate": 1.1342720022892594e-06, "loss": 0.1634, "step": 46905 }, { "epoch": 0.8470016394745015, "grad_norm": 0.38650810718536377, "learning_rate": 1.1329603439281245e-06, "loss": 0.2721, "step": 46910 }, { "epoch": 0.8470919189074022, "grad_norm": 0.8284596800804138, "learning_rate": 1.1316493988558575e-06, "loss": 0.316, "step": 46915 }, { "epoch": 0.847182198340303, "grad_norm": 0.4707046449184418, "learning_rate": 1.1303391671779185e-06, "loss": 0.2384, "step": 46920 }, { "epoch": 0.8472724777732036, "grad_norm": 0.7109625935554504, "learning_rate": 1.1290296489997033e-06, "loss": 0.1903, "step": 46925 }, { "epoch": 0.8473627572061043, "grad_norm": 0.3998103439807892, "learning_rate": 1.1277208444265552e-06, "loss": 0.1523, "step": 46930 }, { "epoch": 0.8474530366390051, "grad_norm": 0.5555111765861511, "learning_rate": 1.126412753563757e-06, "loss": 0.2332, "step": 46935 }, { "epoch": 0.8475433160719058, "grad_norm": 0.7187251448631287, "learning_rate": 1.1251053765165377e-06, "loss": 0.3009, "step": 46940 }, { "epoch": 0.8476335955048064, "grad_norm": 0.275674045085907, "learning_rate": 1.1237987133900619e-06, "loss": 0.2232, "step": 46945 }, { "epoch": 0.8477238749377072, "grad_norm": 0.3866052031517029, "learning_rate": 1.1224927642894456e-06, "loss": 0.233, "step": 46950 }, { "epoch": 0.8478141543706079, "grad_norm": 0.6808643937110901, "learning_rate": 1.1211875293197394e-06, "loss": 0.2382, "step": 46955 }, { "epoch": 0.8479044338035087, "grad_norm": 0.3973676860332489, "learning_rate": 1.1198830085859425e-06, "loss": 0.1897, "step": 46960 }, { "epoch": 0.8479947132364093, "grad_norm": 0.4017499089241028, "learning_rate": 1.1185792021929909e-06, "loss": 0.1826, "step": 46965 }, { "epoch": 0.84808499266931, "grad_norm": 0.31418779492378235, "learning_rate": 1.1172761102457675e-06, "loss": 0.2113, "step": 46970 }, { "epoch": 0.8481752721022108, "grad_norm": 0.49413245916366577, "learning_rate": 1.1159737328491005e-06, "loss": 0.2781, "step": 46975 }, { "epoch": 0.8482655515351115, "grad_norm": 0.31502243876457214, "learning_rate": 1.1146720701077517e-06, "loss": 0.2216, "step": 46980 }, { "epoch": 0.8483558309680121, "grad_norm": 0.5428707003593445, "learning_rate": 1.113371122126433e-06, "loss": 0.1835, "step": 46985 }, { "epoch": 0.8484461104009129, "grad_norm": 0.9356114864349365, "learning_rate": 1.1120708890097954e-06, "loss": 0.1494, "step": 46990 }, { "epoch": 0.8485363898338136, "grad_norm": 0.696953296661377, "learning_rate": 1.1107713708624324e-06, "loss": 0.3059, "step": 46995 }, { "epoch": 0.8486266692667144, "grad_norm": 0.1654614359140396, "learning_rate": 1.1094725677888806e-06, "loss": 0.2274, "step": 47000 }, { "epoch": 0.848716948699615, "grad_norm": 0.2907618582248688, "learning_rate": 1.1081744798936235e-06, "loss": 0.1928, "step": 47005 }, { "epoch": 0.8488072281325157, "grad_norm": 0.27342113852500916, "learning_rate": 1.106877107281078e-06, "loss": 0.287, "step": 47010 }, { "epoch": 0.8488975075654165, "grad_norm": 0.21456903219223022, "learning_rate": 1.1055804500556112e-06, "loss": 0.2203, "step": 47015 }, { "epoch": 0.8489877869983172, "grad_norm": 0.44015249609947205, "learning_rate": 1.104284508321527e-06, "loss": 0.1979, "step": 47020 }, { "epoch": 0.8490780664312179, "grad_norm": 0.2882255017757416, "learning_rate": 1.1029892821830779e-06, "loss": 0.2393, "step": 47025 }, { "epoch": 0.8491683458641186, "grad_norm": 0.3677639663219452, "learning_rate": 1.1016947717444504e-06, "loss": 0.1739, "step": 47030 }, { "epoch": 0.8492586252970193, "grad_norm": 0.2677743136882782, "learning_rate": 1.1004009771097834e-06, "loss": 0.2938, "step": 47035 }, { "epoch": 0.8493489047299201, "grad_norm": 0.38641270995140076, "learning_rate": 1.0991078983831505e-06, "loss": 0.1543, "step": 47040 }, { "epoch": 0.8494391841628207, "grad_norm": 0.30329596996307373, "learning_rate": 1.0978155356685727e-06, "loss": 0.213, "step": 47045 }, { "epoch": 0.8495294635957215, "grad_norm": 0.31273511052131653, "learning_rate": 1.0965238890700091e-06, "loss": 0.2213, "step": 47050 }, { "epoch": 0.8496197430286222, "grad_norm": 0.6745008826255798, "learning_rate": 1.0952329586913657e-06, "loss": 0.1875, "step": 47055 }, { "epoch": 0.8497100224615229, "grad_norm": 0.5616105198860168, "learning_rate": 1.0939427446364848e-06, "loss": 0.2215, "step": 47060 }, { "epoch": 0.8498003018944236, "grad_norm": 0.16216596961021423, "learning_rate": 1.0926532470091567e-06, "loss": 0.163, "step": 47065 }, { "epoch": 0.8498905813273243, "grad_norm": 0.40241101384162903, "learning_rate": 1.091364465913115e-06, "loss": 0.2139, "step": 47070 }, { "epoch": 0.849980860760225, "grad_norm": 0.5022798180580139, "learning_rate": 1.0900764014520272e-06, "loss": 0.2423, "step": 47075 }, { "epoch": 0.8500711401931258, "grad_norm": 0.6277716755867004, "learning_rate": 1.0887890537295142e-06, "loss": 0.1428, "step": 47080 }, { "epoch": 0.8501614196260264, "grad_norm": 0.5615366697311401, "learning_rate": 1.0875024228491282e-06, "loss": 0.1823, "step": 47085 }, { "epoch": 0.8502516990589272, "grad_norm": 0.3080914318561554, "learning_rate": 1.086216508914374e-06, "loss": 0.1969, "step": 47090 }, { "epoch": 0.8503419784918279, "grad_norm": 0.38214749097824097, "learning_rate": 1.0849313120286908e-06, "loss": 0.1717, "step": 47095 }, { "epoch": 0.8504322579247287, "grad_norm": 0.5622016787528992, "learning_rate": 1.0836468322954651e-06, "loss": 0.2176, "step": 47100 }, { "epoch": 0.8505225373576293, "grad_norm": 0.316812664270401, "learning_rate": 1.082363069818023e-06, "loss": 0.1706, "step": 47105 }, { "epoch": 0.85061281679053, "grad_norm": 0.3119005560874939, "learning_rate": 1.0810800246996366e-06, "loss": 0.2445, "step": 47110 }, { "epoch": 0.8507030962234308, "grad_norm": 0.2569788694381714, "learning_rate": 1.0797976970435132e-06, "loss": 0.1961, "step": 47115 }, { "epoch": 0.8507933756563315, "grad_norm": 0.5018973350524902, "learning_rate": 1.078516086952811e-06, "loss": 0.1391, "step": 47120 }, { "epoch": 0.8508836550892321, "grad_norm": 0.4110466241836548, "learning_rate": 1.0772351945306225e-06, "loss": 0.2013, "step": 47125 }, { "epoch": 0.8509739345221329, "grad_norm": 0.37337103486061096, "learning_rate": 1.0759550198799873e-06, "loss": 0.2508, "step": 47130 }, { "epoch": 0.8510642139550336, "grad_norm": 0.581804633140564, "learning_rate": 1.0746755631038874e-06, "loss": 0.2345, "step": 47135 }, { "epoch": 0.8511544933879344, "grad_norm": 0.35834333300590515, "learning_rate": 1.0733968243052462e-06, "loss": 0.2535, "step": 47140 }, { "epoch": 0.851244772820835, "grad_norm": 0.5402718186378479, "learning_rate": 1.0721188035869267e-06, "loss": 0.2759, "step": 47145 }, { "epoch": 0.8513350522537357, "grad_norm": 0.4894947409629822, "learning_rate": 1.0708415010517392e-06, "loss": 0.2165, "step": 47150 }, { "epoch": 0.8514253316866365, "grad_norm": 0.44462037086486816, "learning_rate": 1.0695649168024313e-06, "loss": 0.1352, "step": 47155 }, { "epoch": 0.8515156111195372, "grad_norm": 0.419643759727478, "learning_rate": 1.0682890509416932e-06, "loss": 0.2462, "step": 47160 }, { "epoch": 0.851605890552438, "grad_norm": 0.964062511920929, "learning_rate": 1.0670139035721617e-06, "loss": 0.2357, "step": 47165 }, { "epoch": 0.8516961699853386, "grad_norm": 0.38403525948524475, "learning_rate": 1.0657394747964122e-06, "loss": 0.1935, "step": 47170 }, { "epoch": 0.8517864494182393, "grad_norm": 0.2649580240249634, "learning_rate": 1.0644657647169654e-06, "loss": 0.1805, "step": 47175 }, { "epoch": 0.8518767288511401, "grad_norm": 0.5285569429397583, "learning_rate": 1.0631927734362791e-06, "loss": 0.2362, "step": 47180 }, { "epoch": 0.8519670082840408, "grad_norm": 0.9157831072807312, "learning_rate": 1.0619205010567579e-06, "loss": 0.2776, "step": 47185 }, { "epoch": 0.8520572877169414, "grad_norm": 0.367585688829422, "learning_rate": 1.0606489476807458e-06, "loss": 0.2512, "step": 47190 }, { "epoch": 0.8521475671498422, "grad_norm": 0.30853065848350525, "learning_rate": 1.0593781134105296e-06, "loss": 0.236, "step": 47195 }, { "epoch": 0.8522378465827429, "grad_norm": 0.5924100279808044, "learning_rate": 1.05810799834834e-06, "loss": 0.1941, "step": 47200 }, { "epoch": 0.8523281260156437, "grad_norm": 0.4391089081764221, "learning_rate": 1.0568386025963496e-06, "loss": 0.2303, "step": 47205 }, { "epoch": 0.8524184054485443, "grad_norm": 0.44665244221687317, "learning_rate": 1.0555699262566688e-06, "loss": 0.1246, "step": 47210 }, { "epoch": 0.852508684881445, "grad_norm": 0.275681734085083, "learning_rate": 1.0543019694313573e-06, "loss": 0.1656, "step": 47215 }, { "epoch": 0.8525989643143458, "grad_norm": 0.34005236625671387, "learning_rate": 1.0530347322224087e-06, "loss": 0.1833, "step": 47220 }, { "epoch": 0.8526892437472465, "grad_norm": 0.5032612085342407, "learning_rate": 1.0517682147317665e-06, "loss": 0.1561, "step": 47225 }, { "epoch": 0.8527795231801472, "grad_norm": 0.3961392343044281, "learning_rate": 1.050502417061311e-06, "loss": 0.2852, "step": 47230 }, { "epoch": 0.8528698026130479, "grad_norm": 0.7540068030357361, "learning_rate": 1.0492373393128663e-06, "loss": 0.2878, "step": 47235 }, { "epoch": 0.8529600820459486, "grad_norm": 0.53920978307724, "learning_rate": 1.0479729815881988e-06, "loss": 0.2544, "step": 47240 }, { "epoch": 0.8530503614788494, "grad_norm": 0.26546889543533325, "learning_rate": 1.0467093439890198e-06, "loss": 0.1424, "step": 47245 }, { "epoch": 0.85314064091175, "grad_norm": 0.4271557033061981, "learning_rate": 1.0454464266169774e-06, "loss": 0.2831, "step": 47250 }, { "epoch": 0.8532309203446508, "grad_norm": 0.43881359696388245, "learning_rate": 1.0441842295736615e-06, "loss": 0.2006, "step": 47255 }, { "epoch": 0.8533211997775515, "grad_norm": 0.4485865831375122, "learning_rate": 1.04292275296061e-06, "loss": 0.1576, "step": 47260 }, { "epoch": 0.8534114792104522, "grad_norm": 0.39906686544418335, "learning_rate": 1.0416619968792985e-06, "loss": 0.2443, "step": 47265 }, { "epoch": 0.8535017586433529, "grad_norm": 0.6947904825210571, "learning_rate": 1.0404019614311479e-06, "loss": 0.2761, "step": 47270 }, { "epoch": 0.8535920380762536, "grad_norm": 0.7733624577522278, "learning_rate": 1.0391426467175147e-06, "loss": 0.2278, "step": 47275 }, { "epoch": 0.8536823175091544, "grad_norm": 1.3637889623641968, "learning_rate": 1.0378840528397061e-06, "loss": 0.21, "step": 47280 }, { "epoch": 0.8537725969420551, "grad_norm": 0.4050987958908081, "learning_rate": 1.036626179898963e-06, "loss": 0.2574, "step": 47285 }, { "epoch": 0.8538628763749557, "grad_norm": 0.29665687680244446, "learning_rate": 1.0353690279964746e-06, "loss": 0.2488, "step": 47290 }, { "epoch": 0.8539531558078565, "grad_norm": 0.3802777826786041, "learning_rate": 1.0341125972333676e-06, "loss": 0.1798, "step": 47295 }, { "epoch": 0.8540434352407572, "grad_norm": 0.2983674108982086, "learning_rate": 1.0328568877107137e-06, "loss": 0.1454, "step": 47300 }, { "epoch": 0.854133714673658, "grad_norm": 0.5490597486495972, "learning_rate": 1.0316018995295252e-06, "loss": 0.2243, "step": 47305 }, { "epoch": 0.8542239941065586, "grad_norm": 0.4573357105255127, "learning_rate": 1.0303476327907602e-06, "loss": 0.1936, "step": 47310 }, { "epoch": 0.8543142735394593, "grad_norm": 0.23389914631843567, "learning_rate": 1.0290940875953103e-06, "loss": 0.2392, "step": 47315 }, { "epoch": 0.8544045529723601, "grad_norm": 0.5954925417900085, "learning_rate": 1.0278412640440183e-06, "loss": 0.261, "step": 47320 }, { "epoch": 0.8544948324052608, "grad_norm": 0.30451729893684387, "learning_rate": 1.02658916223766e-06, "loss": 0.1524, "step": 47325 }, { "epoch": 0.8545851118381614, "grad_norm": 0.24599765241146088, "learning_rate": 1.0253377822769605e-06, "loss": 0.2205, "step": 47330 }, { "epoch": 0.8546753912710622, "grad_norm": 0.24942728877067566, "learning_rate": 1.0240871242625872e-06, "loss": 0.218, "step": 47335 }, { "epoch": 0.8547656707039629, "grad_norm": 0.17759248614311218, "learning_rate": 1.0228371882951416e-06, "loss": 0.2113, "step": 47340 }, { "epoch": 0.8548559501368637, "grad_norm": 0.4103125035762787, "learning_rate": 1.0215879744751756e-06, "loss": 0.1333, "step": 47345 }, { "epoch": 0.8549462295697643, "grad_norm": 0.31829023361206055, "learning_rate": 1.0203394829031754e-06, "loss": 0.2226, "step": 47350 }, { "epoch": 0.855036509002665, "grad_norm": 0.9971163868904114, "learning_rate": 1.0190917136795775e-06, "loss": 0.3087, "step": 47355 }, { "epoch": 0.8551267884355658, "grad_norm": 0.2358555644750595, "learning_rate": 1.0178446669047515e-06, "loss": 0.2073, "step": 47360 }, { "epoch": 0.8552170678684665, "grad_norm": 0.3916555643081665, "learning_rate": 1.016598342679016e-06, "loss": 0.2214, "step": 47365 }, { "epoch": 0.8553073473013671, "grad_norm": 0.5762766599655151, "learning_rate": 1.0153527411026287e-06, "loss": 0.2235, "step": 47370 }, { "epoch": 0.8553976267342679, "grad_norm": 0.6114965081214905, "learning_rate": 1.0141078622757894e-06, "loss": 0.1998, "step": 47375 }, { "epoch": 0.8554879061671686, "grad_norm": 0.711415708065033, "learning_rate": 1.012863706298638e-06, "loss": 0.2235, "step": 47380 }, { "epoch": 0.8555781856000694, "grad_norm": 0.4038284420967102, "learning_rate": 1.01162027327126e-06, "loss": 0.1959, "step": 47385 }, { "epoch": 0.85566846503297, "grad_norm": 0.2545495629310608, "learning_rate": 1.0103775632936764e-06, "loss": 0.1961, "step": 47390 }, { "epoch": 0.8557587444658707, "grad_norm": 0.429012656211853, "learning_rate": 1.0091355764658583e-06, "loss": 0.2066, "step": 47395 }, { "epoch": 0.8558490238987715, "grad_norm": 0.4435504674911499, "learning_rate": 1.0078943128877128e-06, "loss": 0.2632, "step": 47400 }, { "epoch": 0.8559393033316722, "grad_norm": 0.6207600235939026, "learning_rate": 1.0066537726590918e-06, "loss": 0.2314, "step": 47405 }, { "epoch": 0.8560295827645729, "grad_norm": 0.49283120036125183, "learning_rate": 1.0054139558797859e-06, "loss": 0.2113, "step": 47410 }, { "epoch": 0.8561198621974736, "grad_norm": 0.6071460247039795, "learning_rate": 1.004174862649533e-06, "loss": 0.2228, "step": 47415 }, { "epoch": 0.8562101416303743, "grad_norm": 0.3180447816848755, "learning_rate": 1.002936493068003e-06, "loss": 0.2133, "step": 47420 }, { "epoch": 0.8563004210632751, "grad_norm": 0.4284539520740509, "learning_rate": 1.00169884723482e-06, "loss": 0.2385, "step": 47425 }, { "epoch": 0.8563907004961757, "grad_norm": 0.45321059226989746, "learning_rate": 1.0004619252495385e-06, "loss": 0.2522, "step": 47430 }, { "epoch": 0.8564809799290765, "grad_norm": 0.5622091889381409, "learning_rate": 9.992257272116623e-07, "loss": 0.2924, "step": 47435 }, { "epoch": 0.8565712593619772, "grad_norm": 0.4963833689689636, "learning_rate": 9.979902532206365e-07, "loss": 0.1977, "step": 47440 }, { "epoch": 0.8566615387948779, "grad_norm": 0.2972092628479004, "learning_rate": 9.967555033758414e-07, "loss": 0.2081, "step": 47445 }, { "epoch": 0.8567518182277786, "grad_norm": 0.5037822127342224, "learning_rate": 9.955214777766076e-07, "loss": 0.2568, "step": 47450 }, { "epoch": 0.8568420976606793, "grad_norm": 0.5657833218574524, "learning_rate": 9.942881765222012e-07, "loss": 0.1971, "step": 47455 }, { "epoch": 0.85693237709358, "grad_norm": 0.6688538193702698, "learning_rate": 9.93055599711833e-07, "loss": 0.1707, "step": 47460 }, { "epoch": 0.8570226565264808, "grad_norm": 0.5159571766853333, "learning_rate": 9.918237474446546e-07, "loss": 0.2757, "step": 47465 }, { "epoch": 0.8571129359593814, "grad_norm": 0.4161568880081177, "learning_rate": 9.905926198197624e-07, "loss": 0.2097, "step": 47470 }, { "epoch": 0.8572032153922822, "grad_norm": 0.3604661524295807, "learning_rate": 9.893622169361871e-07, "loss": 0.1489, "step": 47475 }, { "epoch": 0.8572934948251829, "grad_norm": 0.4307427406311035, "learning_rate": 9.881325388929087e-07, "loss": 0.2146, "step": 47480 }, { "epoch": 0.8573837742580837, "grad_norm": 0.3729318082332611, "learning_rate": 9.869035857888443e-07, "loss": 0.1714, "step": 47485 }, { "epoch": 0.8574740536909843, "grad_norm": 0.4250018000602722, "learning_rate": 9.856753577228562e-07, "loss": 0.2264, "step": 47490 }, { "epoch": 0.857564333123885, "grad_norm": 0.5746373534202576, "learning_rate": 9.844478547937431e-07, "loss": 0.1523, "step": 47495 }, { "epoch": 0.8576546125567858, "grad_norm": 0.3057235777378082, "learning_rate": 9.832210771002515e-07, "loss": 0.1251, "step": 47500 }, { "epoch": 0.8577448919896865, "grad_norm": 0.3894713222980499, "learning_rate": 9.819950247410647e-07, "loss": 0.2702, "step": 47505 }, { "epoch": 0.8578351714225871, "grad_norm": 0.43752869963645935, "learning_rate": 9.80769697814814e-07, "loss": 0.2357, "step": 47510 }, { "epoch": 0.8579254508554879, "grad_norm": 0.34574222564697266, "learning_rate": 9.795450964200625e-07, "loss": 0.1833, "step": 47515 }, { "epoch": 0.8580157302883886, "grad_norm": 0.3923795521259308, "learning_rate": 9.78321220655325e-07, "loss": 0.2149, "step": 47520 }, { "epoch": 0.8581060097212894, "grad_norm": 0.3717537224292755, "learning_rate": 9.77098070619049e-07, "loss": 0.192, "step": 47525 }, { "epoch": 0.85819628915419, "grad_norm": 0.429400235414505, "learning_rate": 9.758756464096309e-07, "loss": 0.2578, "step": 47530 }, { "epoch": 0.8582865685870907, "grad_norm": 0.37884220480918884, "learning_rate": 9.74653948125407e-07, "loss": 0.2427, "step": 47535 }, { "epoch": 0.8583768480199915, "grad_norm": 0.5908001065254211, "learning_rate": 9.73432975864651e-07, "loss": 0.233, "step": 47540 }, { "epoch": 0.8584671274528922, "grad_norm": 0.3083002269268036, "learning_rate": 9.722127297255833e-07, "loss": 0.2563, "step": 47545 }, { "epoch": 0.8585574068857929, "grad_norm": 0.7070236802101135, "learning_rate": 9.709932098063624e-07, "loss": 0.2216, "step": 47550 }, { "epoch": 0.8586476863186936, "grad_norm": 0.7982986569404602, "learning_rate": 9.697744162050914e-07, "loss": 0.1687, "step": 47555 }, { "epoch": 0.8587379657515943, "grad_norm": 0.5032423138618469, "learning_rate": 9.685563490198113e-07, "loss": 0.259, "step": 47560 }, { "epoch": 0.8588282451844951, "grad_norm": 0.6070419549942017, "learning_rate": 9.67339008348508e-07, "loss": 0.2304, "step": 47565 }, { "epoch": 0.8589185246173958, "grad_norm": 0.4481718838214874, "learning_rate": 9.661223942891075e-07, "loss": 0.2453, "step": 47570 }, { "epoch": 0.8590088040502964, "grad_norm": 0.6688275337219238, "learning_rate": 9.649065069394802e-07, "loss": 0.268, "step": 47575 }, { "epoch": 0.8590990834831972, "grad_norm": 0.584083080291748, "learning_rate": 9.636913463974308e-07, "loss": 0.2024, "step": 47580 }, { "epoch": 0.8591893629160979, "grad_norm": 0.4952586591243744, "learning_rate": 9.624769127607137e-07, "loss": 0.2318, "step": 47585 }, { "epoch": 0.8592796423489987, "grad_norm": 1.0109870433807373, "learning_rate": 9.612632061270178e-07, "loss": 0.2133, "step": 47590 }, { "epoch": 0.8593699217818993, "grad_norm": 0.5469220876693726, "learning_rate": 9.600502265939793e-07, "loss": 0.1844, "step": 47595 }, { "epoch": 0.8594602012148, "grad_norm": 0.2093033343553543, "learning_rate": 9.588379742591736e-07, "loss": 0.1955, "step": 47600 }, { "epoch": 0.8595504806477008, "grad_norm": 0.34480932354927063, "learning_rate": 9.576264492201194e-07, "loss": 0.2472, "step": 47605 }, { "epoch": 0.8596407600806015, "grad_norm": 0.3347495198249817, "learning_rate": 9.564156515742705e-07, "loss": 0.3033, "step": 47610 }, { "epoch": 0.8597310395135022, "grad_norm": 0.41779398918151855, "learning_rate": 9.552055814190308e-07, "loss": 0.2663, "step": 47615 }, { "epoch": 0.8598213189464029, "grad_norm": 0.3597933351993561, "learning_rate": 9.53996238851741e-07, "loss": 0.2986, "step": 47620 }, { "epoch": 0.8599115983793036, "grad_norm": 0.8659226894378662, "learning_rate": 9.527876239696798e-07, "loss": 0.3131, "step": 47625 }, { "epoch": 0.8600018778122044, "grad_norm": 0.8438224792480469, "learning_rate": 9.515797368700764e-07, "loss": 0.2146, "step": 47630 }, { "epoch": 0.860092157245105, "grad_norm": 0.5149122476577759, "learning_rate": 9.503725776500927e-07, "loss": 0.313, "step": 47635 }, { "epoch": 0.8601824366780058, "grad_norm": 0.5562651753425598, "learning_rate": 9.491661464068413e-07, "loss": 0.1861, "step": 47640 }, { "epoch": 0.8602727161109065, "grad_norm": 0.39613303542137146, "learning_rate": 9.479604432373657e-07, "loss": 0.2227, "step": 47645 }, { "epoch": 0.8603629955438072, "grad_norm": 0.42106348276138306, "learning_rate": 9.467554682386593e-07, "loss": 0.1886, "step": 47650 }, { "epoch": 0.8604532749767079, "grad_norm": 0.620561420917511, "learning_rate": 9.455512215076501e-07, "loss": 0.378, "step": 47655 }, { "epoch": 0.8605435544096086, "grad_norm": 0.2884880304336548, "learning_rate": 9.443477031412129e-07, "loss": 0.2732, "step": 47660 }, { "epoch": 0.8606338338425094, "grad_norm": 0.2973548173904419, "learning_rate": 9.431449132361615e-07, "loss": 0.291, "step": 47665 }, { "epoch": 0.8607241132754101, "grad_norm": 0.65578693151474, "learning_rate": 9.419428518892548e-07, "loss": 0.295, "step": 47670 }, { "epoch": 0.8608143927083107, "grad_norm": 0.49643024802207947, "learning_rate": 9.407415191971847e-07, "loss": 0.1978, "step": 47675 }, { "epoch": 0.8609046721412115, "grad_norm": 0.29305776953697205, "learning_rate": 9.395409152565948e-07, "loss": 0.2002, "step": 47680 }, { "epoch": 0.8609949515741122, "grad_norm": 0.35219839215278625, "learning_rate": 9.383410401640602e-07, "loss": 0.1997, "step": 47685 }, { "epoch": 0.861085231007013, "grad_norm": 0.5114193558692932, "learning_rate": 9.371418940161059e-07, "loss": 0.1862, "step": 47690 }, { "epoch": 0.8611755104399136, "grad_norm": 0.7070506811141968, "learning_rate": 9.359434769091913e-07, "loss": 0.1949, "step": 47695 }, { "epoch": 0.8612657898728143, "grad_norm": 0.7334641814231873, "learning_rate": 9.347457889397215e-07, "loss": 0.2913, "step": 47700 }, { "epoch": 0.8613560693057151, "grad_norm": 0.42316699028015137, "learning_rate": 9.335488302040452e-07, "loss": 0.2535, "step": 47705 }, { "epoch": 0.8614463487386158, "grad_norm": 0.7839803099632263, "learning_rate": 9.32352600798443e-07, "loss": 0.1816, "step": 47710 }, { "epoch": 0.8615366281715164, "grad_norm": 0.41461873054504395, "learning_rate": 9.311571008191478e-07, "loss": 0.2394, "step": 47715 }, { "epoch": 0.8616269076044172, "grad_norm": 0.6121299862861633, "learning_rate": 9.299623303623262e-07, "loss": 0.2592, "step": 47720 }, { "epoch": 0.8617171870373179, "grad_norm": 0.4134901165962219, "learning_rate": 9.287682895240879e-07, "loss": 0.2361, "step": 47725 }, { "epoch": 0.8618074664702187, "grad_norm": 0.33365491032600403, "learning_rate": 9.275749784004884e-07, "loss": 0.2001, "step": 47730 }, { "epoch": 0.8618977459031193, "grad_norm": 0.5843504071235657, "learning_rate": 9.263823970875196e-07, "loss": 0.3159, "step": 47735 }, { "epoch": 0.86198802533602, "grad_norm": 0.4002785384654999, "learning_rate": 9.251905456811139e-07, "loss": 0.1652, "step": 47740 }, { "epoch": 0.8620783047689208, "grad_norm": 0.6355798840522766, "learning_rate": 9.239994242771499e-07, "loss": 0.205, "step": 47745 }, { "epoch": 0.8621685842018215, "grad_norm": 0.4560568034648895, "learning_rate": 9.228090329714423e-07, "loss": 0.2141, "step": 47750 }, { "epoch": 0.8622588636347222, "grad_norm": 0.5634968876838684, "learning_rate": 9.21619371859751e-07, "loss": 0.2183, "step": 47755 }, { "epoch": 0.8623491430676229, "grad_norm": 0.5417904853820801, "learning_rate": 9.20430441037774e-07, "loss": 0.1852, "step": 47760 }, { "epoch": 0.8624394225005236, "grad_norm": 0.18929065763950348, "learning_rate": 9.192422406011536e-07, "loss": 0.1941, "step": 47765 }, { "epoch": 0.8625297019334244, "grad_norm": 0.8262953758239746, "learning_rate": 9.180547706454712e-07, "loss": 0.1594, "step": 47770 }, { "epoch": 0.862619981366325, "grad_norm": 0.4193209707736969, "learning_rate": 9.168680312662515e-07, "loss": 0.2705, "step": 47775 }, { "epoch": 0.8627102607992257, "grad_norm": 0.45258623361587524, "learning_rate": 9.156820225589569e-07, "loss": 0.1757, "step": 47780 }, { "epoch": 0.8628005402321265, "grad_norm": 0.4728344678878784, "learning_rate": 9.144967446189956e-07, "loss": 0.2866, "step": 47785 }, { "epoch": 0.8628908196650272, "grad_norm": 0.5930164456367493, "learning_rate": 9.133121975417125e-07, "loss": 0.2212, "step": 47790 }, { "epoch": 0.8629810990979279, "grad_norm": 0.47831207513809204, "learning_rate": 9.121283814223946e-07, "loss": 0.2075, "step": 47795 }, { "epoch": 0.8630713785308286, "grad_norm": 0.6171165108680725, "learning_rate": 9.10945296356277e-07, "loss": 0.2753, "step": 47800 }, { "epoch": 0.8631616579637293, "grad_norm": 0.31840503215789795, "learning_rate": 9.097629424385235e-07, "loss": 0.1894, "step": 47805 }, { "epoch": 0.8632519373966301, "grad_norm": 0.717898428440094, "learning_rate": 9.085813197642502e-07, "loss": 0.1638, "step": 47810 }, { "epoch": 0.8633422168295307, "grad_norm": 0.4794361889362335, "learning_rate": 9.074004284285076e-07, "loss": 0.2876, "step": 47815 }, { "epoch": 0.8634324962624315, "grad_norm": 0.5447631478309631, "learning_rate": 9.062202685262933e-07, "loss": 0.2605, "step": 47820 }, { "epoch": 0.8635227756953322, "grad_norm": 0.32787272334098816, "learning_rate": 9.050408401525379e-07, "loss": 0.1382, "step": 47825 }, { "epoch": 0.8636130551282329, "grad_norm": 0.3508855700492859, "learning_rate": 9.038621434021199e-07, "loss": 0.2211, "step": 47830 }, { "epoch": 0.8637033345611336, "grad_norm": 0.3971794545650482, "learning_rate": 9.02684178369857e-07, "loss": 0.1168, "step": 47835 }, { "epoch": 0.8637936139940343, "grad_norm": 0.3002808690071106, "learning_rate": 9.015069451505109e-07, "loss": 0.2017, "step": 47840 }, { "epoch": 0.8638838934269351, "grad_norm": 0.3641931712627411, "learning_rate": 9.00330443838776e-07, "loss": 0.2921, "step": 47845 }, { "epoch": 0.8639741728598358, "grad_norm": 0.8863463401794434, "learning_rate": 8.991546745292979e-07, "loss": 0.2159, "step": 47850 }, { "epoch": 0.8640644522927364, "grad_norm": 0.6085312366485596, "learning_rate": 8.97979637316655e-07, "loss": 0.2143, "step": 47855 }, { "epoch": 0.8641547317256372, "grad_norm": 0.4391411542892456, "learning_rate": 8.968053322953718e-07, "loss": 0.2538, "step": 47860 }, { "epoch": 0.8642450111585379, "grad_norm": 0.4850060045719147, "learning_rate": 8.956317595599129e-07, "loss": 0.2463, "step": 47865 }, { "epoch": 0.8643352905914387, "grad_norm": 0.5466737151145935, "learning_rate": 8.94458919204686e-07, "loss": 0.1952, "step": 47870 }, { "epoch": 0.8644255700243393, "grad_norm": 0.29820647835731506, "learning_rate": 8.932868113240334e-07, "loss": 0.2013, "step": 47875 }, { "epoch": 0.86451584945724, "grad_norm": 0.585169792175293, "learning_rate": 8.921154360122464e-07, "loss": 0.1738, "step": 47880 }, { "epoch": 0.8646061288901408, "grad_norm": 0.8074939846992493, "learning_rate": 8.909447933635506e-07, "loss": 0.1304, "step": 47885 }, { "epoch": 0.8646964083230415, "grad_norm": 0.280657023191452, "learning_rate": 8.897748834721187e-07, "loss": 0.3266, "step": 47890 }, { "epoch": 0.8647866877559421, "grad_norm": 0.5078336000442505, "learning_rate": 8.886057064320574e-07, "loss": 0.1204, "step": 47895 }, { "epoch": 0.8648769671888429, "grad_norm": 0.4391881823539734, "learning_rate": 8.874372623374206e-07, "loss": 0.2035, "step": 47900 }, { "epoch": 0.8649672466217436, "grad_norm": 0.3612831234931946, "learning_rate": 8.862695512822039e-07, "loss": 0.2019, "step": 47905 }, { "epoch": 0.8650575260546444, "grad_norm": 0.2005240023136139, "learning_rate": 8.851025733603358e-07, "loss": 0.1793, "step": 47910 }, { "epoch": 0.865147805487545, "grad_norm": 0.3247433006763458, "learning_rate": 8.839363286656965e-07, "loss": 0.1645, "step": 47915 }, { "epoch": 0.8652380849204457, "grad_norm": 0.23706485331058502, "learning_rate": 8.827708172920968e-07, "loss": 0.2286, "step": 47920 }, { "epoch": 0.8653283643533465, "grad_norm": 0.42843496799468994, "learning_rate": 8.81606039333297e-07, "loss": 0.245, "step": 47925 }, { "epoch": 0.8654186437862472, "grad_norm": 0.6373015642166138, "learning_rate": 8.804419948829935e-07, "loss": 0.2401, "step": 47930 }, { "epoch": 0.8655089232191479, "grad_norm": 0.3249672055244446, "learning_rate": 8.792786840348289e-07, "loss": 0.1525, "step": 47935 }, { "epoch": 0.8655992026520486, "grad_norm": 0.25594303011894226, "learning_rate": 8.781161068823774e-07, "loss": 0.2066, "step": 47940 }, { "epoch": 0.8656894820849493, "grad_norm": 0.8266198635101318, "learning_rate": 8.769542635191641e-07, "loss": 0.2451, "step": 47945 }, { "epoch": 0.8657797615178501, "grad_norm": 0.3521641194820404, "learning_rate": 8.757931540386478e-07, "loss": 0.1713, "step": 47950 }, { "epoch": 0.8658700409507508, "grad_norm": 0.4017597436904907, "learning_rate": 8.746327785342357e-07, "loss": 0.2574, "step": 47955 }, { "epoch": 0.8659603203836514, "grad_norm": 0.4761274456977844, "learning_rate": 8.734731370992633e-07, "loss": 0.3343, "step": 47960 }, { "epoch": 0.8660505998165522, "grad_norm": 0.5980884432792664, "learning_rate": 8.72314229827026e-07, "loss": 0.2553, "step": 47965 }, { "epoch": 0.8661408792494529, "grad_norm": 0.6012890934944153, "learning_rate": 8.711560568107414e-07, "loss": 0.1958, "step": 47970 }, { "epoch": 0.8662311586823537, "grad_norm": 0.5191618204116821, "learning_rate": 8.699986181435815e-07, "loss": 0.2341, "step": 47975 }, { "epoch": 0.8663214381152543, "grad_norm": 0.4359510540962219, "learning_rate": 8.68841913918651e-07, "loss": 0.2036, "step": 47980 }, { "epoch": 0.866411717548155, "grad_norm": 0.32875490188598633, "learning_rate": 8.676859442289964e-07, "loss": 0.1804, "step": 47985 }, { "epoch": 0.8665019969810558, "grad_norm": 0.4370461106300354, "learning_rate": 8.66530709167609e-07, "loss": 0.1166, "step": 47990 }, { "epoch": 0.8665922764139565, "grad_norm": 0.15905004739761353, "learning_rate": 8.653762088274198e-07, "loss": 0.2479, "step": 47995 }, { "epoch": 0.8666825558468572, "grad_norm": 0.32449597120285034, "learning_rate": 8.642224433013014e-07, "loss": 0.2932, "step": 48000 }, { "epoch": 0.8667728352797579, "grad_norm": 0.26115724444389343, "learning_rate": 8.630694126820616e-07, "loss": 0.2006, "step": 48005 }, { "epoch": 0.8668631147126586, "grad_norm": 0.693935751914978, "learning_rate": 8.619171170624585e-07, "loss": 0.1629, "step": 48010 }, { "epoch": 0.8669533941455594, "grad_norm": 0.6370662450790405, "learning_rate": 8.607655565351803e-07, "loss": 0.2614, "step": 48015 }, { "epoch": 0.86704367357846, "grad_norm": 0.2275598645210266, "learning_rate": 8.59614731192866e-07, "loss": 0.1091, "step": 48020 }, { "epoch": 0.8671339530113608, "grad_norm": 0.2575340270996094, "learning_rate": 8.584646411280872e-07, "loss": 0.1735, "step": 48025 }, { "epoch": 0.8672242324442615, "grad_norm": 0.40870359539985657, "learning_rate": 8.573152864333645e-07, "loss": 0.2322, "step": 48030 }, { "epoch": 0.8673145118771622, "grad_norm": 0.33703476190567017, "learning_rate": 8.561666672011526e-07, "loss": 0.1138, "step": 48035 }, { "epoch": 0.8674047913100629, "grad_norm": 0.45342332124710083, "learning_rate": 8.550187835238521e-07, "loss": 0.2634, "step": 48040 }, { "epoch": 0.8674950707429636, "grad_norm": 0.39678117632865906, "learning_rate": 8.538716354937982e-07, "loss": 0.3507, "step": 48045 }, { "epoch": 0.8675853501758644, "grad_norm": 0.4366861581802368, "learning_rate": 8.527252232032734e-07, "loss": 0.2137, "step": 48050 }, { "epoch": 0.8676756296087651, "grad_norm": 0.3188689351081848, "learning_rate": 8.515795467444965e-07, "loss": 0.1916, "step": 48055 }, { "epoch": 0.8677659090416657, "grad_norm": 0.6067432165145874, "learning_rate": 8.504346062096291e-07, "loss": 0.2341, "step": 48060 }, { "epoch": 0.8678561884745665, "grad_norm": 0.47358977794647217, "learning_rate": 8.492904016907744e-07, "loss": 0.1646, "step": 48065 }, { "epoch": 0.8679464679074672, "grad_norm": 0.33584290742874146, "learning_rate": 8.481469332799763e-07, "loss": 0.2262, "step": 48070 }, { "epoch": 0.868036747340368, "grad_norm": 0.6119019389152527, "learning_rate": 8.470042010692159e-07, "loss": 0.1833, "step": 48075 }, { "epoch": 0.8681270267732686, "grad_norm": 0.4626547396183014, "learning_rate": 8.458622051504184e-07, "loss": 0.2844, "step": 48080 }, { "epoch": 0.8682173062061693, "grad_norm": 0.35874173045158386, "learning_rate": 8.447209456154503e-07, "loss": 0.2262, "step": 48085 }, { "epoch": 0.8683075856390701, "grad_norm": 0.36015933752059937, "learning_rate": 8.435804225561162e-07, "loss": 0.2485, "step": 48090 }, { "epoch": 0.8683978650719708, "grad_norm": 0.515663743019104, "learning_rate": 8.424406360641624e-07, "loss": 0.21, "step": 48095 }, { "epoch": 0.8684881445048714, "grad_norm": 0.47348344326019287, "learning_rate": 8.41301586231279e-07, "loss": 0.2061, "step": 48100 }, { "epoch": 0.8685784239377722, "grad_norm": 0.4087829887866974, "learning_rate": 8.401632731490927e-07, "loss": 0.2398, "step": 48105 }, { "epoch": 0.8686687033706729, "grad_norm": 0.7120229005813599, "learning_rate": 8.390256969091725e-07, "loss": 0.2717, "step": 48110 }, { "epoch": 0.8687589828035737, "grad_norm": 0.8429829478263855, "learning_rate": 8.378888576030287e-07, "loss": 0.1942, "step": 48115 }, { "epoch": 0.8688492622364743, "grad_norm": 0.3972378075122833, "learning_rate": 8.3675275532211e-07, "loss": 0.2245, "step": 48120 }, { "epoch": 0.868939541669375, "grad_norm": 0.515321671962738, "learning_rate": 8.356173901578091e-07, "loss": 0.2349, "step": 48125 }, { "epoch": 0.8690298211022758, "grad_norm": 1.08586847782135, "learning_rate": 8.344827622014584e-07, "loss": 0.2143, "step": 48130 }, { "epoch": 0.8691201005351765, "grad_norm": 0.41314950585365295, "learning_rate": 8.333488715443305e-07, "loss": 0.3164, "step": 48135 }, { "epoch": 0.8692103799680772, "grad_norm": 0.5639352798461914, "learning_rate": 8.322157182776358e-07, "loss": 0.1827, "step": 48140 }, { "epoch": 0.8693006594009779, "grad_norm": 0.6171694993972778, "learning_rate": 8.310833024925325e-07, "loss": 0.159, "step": 48145 }, { "epoch": 0.8693909388338786, "grad_norm": 0.5867541432380676, "learning_rate": 8.29951624280112e-07, "loss": 0.1949, "step": 48150 }, { "epoch": 0.8694812182667794, "grad_norm": 0.44068971276283264, "learning_rate": 8.288206837314116e-07, "loss": 0.2258, "step": 48155 }, { "epoch": 0.86957149769968, "grad_norm": 0.42447203397750854, "learning_rate": 8.276904809374031e-07, "loss": 0.2565, "step": 48160 }, { "epoch": 0.8696617771325807, "grad_norm": 0.4244731664657593, "learning_rate": 8.265610159890092e-07, "loss": 0.2274, "step": 48165 }, { "epoch": 0.8697520565654815, "grad_norm": 0.6324188113212585, "learning_rate": 8.254322889770849e-07, "loss": 0.2529, "step": 48170 }, { "epoch": 0.8698423359983822, "grad_norm": 0.4434747099876404, "learning_rate": 8.243042999924255e-07, "loss": 0.2762, "step": 48175 }, { "epoch": 0.8699326154312829, "grad_norm": 0.5678615570068359, "learning_rate": 8.231770491257729e-07, "loss": 0.1874, "step": 48180 }, { "epoch": 0.8700228948641836, "grad_norm": 0.44842442870140076, "learning_rate": 8.220505364678033e-07, "loss": 0.2059, "step": 48185 }, { "epoch": 0.8701131742970843, "grad_norm": 0.7900363206863403, "learning_rate": 8.209247621091376e-07, "loss": 0.2184, "step": 48190 }, { "epoch": 0.8702034537299851, "grad_norm": 0.3303584158420563, "learning_rate": 8.197997261403368e-07, "loss": 0.2753, "step": 48195 }, { "epoch": 0.8702937331628857, "grad_norm": 0.4439292550086975, "learning_rate": 8.186754286519027e-07, "loss": 0.2112, "step": 48200 }, { "epoch": 0.8703840125957865, "grad_norm": 0.3900735378265381, "learning_rate": 8.175518697342743e-07, "loss": 0.2861, "step": 48205 }, { "epoch": 0.8704742920286872, "grad_norm": 0.643632173538208, "learning_rate": 8.164290494778371e-07, "loss": 0.2954, "step": 48210 }, { "epoch": 0.8705645714615879, "grad_norm": 0.5918735861778259, "learning_rate": 8.153069679729098e-07, "loss": 0.3003, "step": 48215 }, { "epoch": 0.8706548508944886, "grad_norm": 0.40243905782699585, "learning_rate": 8.141856253097602e-07, "loss": 0.175, "step": 48220 }, { "epoch": 0.8707451303273893, "grad_norm": 2.0269851684570312, "learning_rate": 8.130650215785852e-07, "loss": 0.3366, "step": 48225 }, { "epoch": 0.8708354097602901, "grad_norm": 0.38487523794174194, "learning_rate": 8.11945156869538e-07, "loss": 0.2189, "step": 48230 }, { "epoch": 0.8709256891931908, "grad_norm": 0.34521669149398804, "learning_rate": 8.108260312726968e-07, "loss": 0.1645, "step": 48235 }, { "epoch": 0.8710159686260914, "grad_norm": 0.4212098717689514, "learning_rate": 8.097076448780916e-07, "loss": 0.1905, "step": 48240 }, { "epoch": 0.8711062480589922, "grad_norm": 0.5981873869895935, "learning_rate": 8.08589997775685e-07, "loss": 0.1878, "step": 48245 }, { "epoch": 0.8711965274918929, "grad_norm": 0.5543757081031799, "learning_rate": 8.074730900553873e-07, "loss": 0.2727, "step": 48250 }, { "epoch": 0.8712868069247937, "grad_norm": 0.4017278254032135, "learning_rate": 8.063569218070411e-07, "loss": 0.2687, "step": 48255 }, { "epoch": 0.8713770863576943, "grad_norm": 0.8131873607635498, "learning_rate": 8.052414931204366e-07, "loss": 0.2013, "step": 48260 }, { "epoch": 0.871467365790595, "grad_norm": 0.784367561340332, "learning_rate": 8.041268040853023e-07, "loss": 0.2989, "step": 48265 }, { "epoch": 0.8715576452234958, "grad_norm": 0.24246563017368317, "learning_rate": 8.030128547913052e-07, "loss": 0.3262, "step": 48270 }, { "epoch": 0.8716479246563965, "grad_norm": 0.36412227153778076, "learning_rate": 8.018996453280581e-07, "loss": 0.1953, "step": 48275 }, { "epoch": 0.8717382040892971, "grad_norm": 0.482963889837265, "learning_rate": 8.007871757851049e-07, "loss": 0.2645, "step": 48280 }, { "epoch": 0.8718284835221979, "grad_norm": 0.2874138057231903, "learning_rate": 7.996754462519408e-07, "loss": 0.1705, "step": 48285 }, { "epoch": 0.8719187629550986, "grad_norm": 0.36590391397476196, "learning_rate": 7.985644568179906e-07, "loss": 0.1881, "step": 48290 }, { "epoch": 0.8720090423879994, "grad_norm": 0.5138126015663147, "learning_rate": 7.974542075726322e-07, "loss": 0.1993, "step": 48295 }, { "epoch": 0.8720993218209, "grad_norm": 0.6720679402351379, "learning_rate": 7.963446986051715e-07, "loss": 0.2382, "step": 48300 }, { "epoch": 0.8721896012538007, "grad_norm": 0.4910641312599182, "learning_rate": 7.952359300048662e-07, "loss": 0.2453, "step": 48305 }, { "epoch": 0.8722798806867015, "grad_norm": 0.4402197003364563, "learning_rate": 7.941279018609027e-07, "loss": 0.2395, "step": 48310 }, { "epoch": 0.8723701601196022, "grad_norm": 0.41484811902046204, "learning_rate": 7.930206142624175e-07, "loss": 0.2043, "step": 48315 }, { "epoch": 0.8724604395525029, "grad_norm": 0.3428449034690857, "learning_rate": 7.919140672984815e-07, "loss": 0.2656, "step": 48320 }, { "epoch": 0.8725507189854036, "grad_norm": 0.5132912993431091, "learning_rate": 7.908082610581091e-07, "loss": 0.3005, "step": 48325 }, { "epoch": 0.8726409984183043, "grad_norm": 0.44725364446640015, "learning_rate": 7.897031956302547e-07, "loss": 0.1681, "step": 48330 }, { "epoch": 0.8727312778512051, "grad_norm": 0.46623513102531433, "learning_rate": 7.885988711038162e-07, "loss": 0.181, "step": 48335 }, { "epoch": 0.8728215572841057, "grad_norm": 0.30655795335769653, "learning_rate": 7.874952875676212e-07, "loss": 0.1926, "step": 48340 }, { "epoch": 0.8729118367170065, "grad_norm": 0.543652355670929, "learning_rate": 7.863924451104521e-07, "loss": 0.25, "step": 48345 }, { "epoch": 0.8730021161499072, "grad_norm": 0.5199833512306213, "learning_rate": 7.852903438210213e-07, "loss": 0.2849, "step": 48350 }, { "epoch": 0.8730923955828079, "grad_norm": 0.37067753076553345, "learning_rate": 7.841889837879812e-07, "loss": 0.1712, "step": 48355 }, { "epoch": 0.8731826750157087, "grad_norm": 0.3907431364059448, "learning_rate": 7.830883650999355e-07, "loss": 0.2518, "step": 48360 }, { "epoch": 0.8732729544486093, "grad_norm": 0.2768262028694153, "learning_rate": 7.819884878454154e-07, "loss": 0.2396, "step": 48365 }, { "epoch": 0.87336323388151, "grad_norm": 0.7335043549537659, "learning_rate": 7.808893521129013e-07, "loss": 0.1918, "step": 48370 }, { "epoch": 0.8734535133144108, "grad_norm": 0.5029274821281433, "learning_rate": 7.797909579908092e-07, "loss": 0.2167, "step": 48375 }, { "epoch": 0.8735437927473115, "grad_norm": 0.6301203966140747, "learning_rate": 7.786933055674973e-07, "loss": 0.2502, "step": 48380 }, { "epoch": 0.8736340721802122, "grad_norm": 0.35355889797210693, "learning_rate": 7.775963949312626e-07, "loss": 0.2063, "step": 48385 }, { "epoch": 0.8737243516131129, "grad_norm": 0.8525371551513672, "learning_rate": 7.765002261703448e-07, "loss": 0.21, "step": 48390 }, { "epoch": 0.8738146310460136, "grad_norm": 0.2836104929447174, "learning_rate": 7.754047993729219e-07, "loss": 0.1717, "step": 48395 }, { "epoch": 0.8739049104789144, "grad_norm": 0.3038562834262848, "learning_rate": 7.743101146271159e-07, "loss": 0.2272, "step": 48400 }, { "epoch": 0.873995189911815, "grad_norm": 0.9222632050514221, "learning_rate": 7.732161720209818e-07, "loss": 0.2373, "step": 48405 }, { "epoch": 0.8740854693447158, "grad_norm": 0.2853277623653412, "learning_rate": 7.721229716425227e-07, "loss": 0.2101, "step": 48410 }, { "epoch": 0.8741757487776165, "grad_norm": 0.6378380060195923, "learning_rate": 7.71030513579677e-07, "loss": 0.143, "step": 48415 }, { "epoch": 0.8742660282105172, "grad_norm": 0.589065670967102, "learning_rate": 7.699387979203243e-07, "loss": 0.2007, "step": 48420 }, { "epoch": 0.8743563076434179, "grad_norm": 0.4162118434906006, "learning_rate": 7.688478247522868e-07, "loss": 0.1579, "step": 48425 }, { "epoch": 0.8744465870763186, "grad_norm": 0.3636179268360138, "learning_rate": 7.677575941633264e-07, "loss": 0.2605, "step": 48430 }, { "epoch": 0.8745368665092194, "grad_norm": 0.2778651714324951, "learning_rate": 7.666681062411408e-07, "loss": 0.2255, "step": 48435 }, { "epoch": 0.8746271459421201, "grad_norm": 0.1925775110721588, "learning_rate": 7.655793610733753e-07, "loss": 0.2077, "step": 48440 }, { "epoch": 0.8747174253750207, "grad_norm": 0.21382668614387512, "learning_rate": 7.644913587476088e-07, "loss": 0.1934, "step": 48445 }, { "epoch": 0.8748077048079215, "grad_norm": 0.42845526337623596, "learning_rate": 7.634040993513625e-07, "loss": 0.2532, "step": 48450 }, { "epoch": 0.8748979842408222, "grad_norm": 0.5847483277320862, "learning_rate": 7.623175829720997e-07, "loss": 0.1947, "step": 48455 }, { "epoch": 0.874988263673723, "grad_norm": 0.5122021436691284, "learning_rate": 7.612318096972237e-07, "loss": 0.2392, "step": 48460 }, { "epoch": 0.8750785431066236, "grad_norm": 0.24058891832828522, "learning_rate": 7.601467796140771e-07, "loss": 0.2839, "step": 48465 }, { "epoch": 0.8751688225395243, "grad_norm": 0.38757041096687317, "learning_rate": 7.59062492809941e-07, "loss": 0.1499, "step": 48470 }, { "epoch": 0.8752591019724251, "grad_norm": 0.6410506963729858, "learning_rate": 7.579789493720402e-07, "loss": 0.2633, "step": 48475 }, { "epoch": 0.8753493814053258, "grad_norm": 0.38164740800857544, "learning_rate": 7.56896149387536e-07, "loss": 0.2471, "step": 48480 }, { "epoch": 0.8754396608382264, "grad_norm": 0.46518123149871826, "learning_rate": 7.55814092943532e-07, "loss": 0.1657, "step": 48485 }, { "epoch": 0.8755299402711272, "grad_norm": 0.18978314101696014, "learning_rate": 7.54732780127072e-07, "loss": 0.1636, "step": 48490 }, { "epoch": 0.8756202197040279, "grad_norm": 0.6388356685638428, "learning_rate": 7.536522110251432e-07, "loss": 0.2383, "step": 48495 }, { "epoch": 0.8757104991369287, "grad_norm": 0.3540574014186859, "learning_rate": 7.525723857246636e-07, "loss": 0.2615, "step": 48500 }, { "epoch": 0.8758007785698293, "grad_norm": 0.47638097405433655, "learning_rate": 7.514933043125027e-07, "loss": 0.1564, "step": 48505 }, { "epoch": 0.87589105800273, "grad_norm": 0.25722935795783997, "learning_rate": 7.504149668754612e-07, "loss": 0.2165, "step": 48510 }, { "epoch": 0.8759813374356308, "grad_norm": 0.1883164346218109, "learning_rate": 7.493373735002851e-07, "loss": 0.2019, "step": 48515 }, { "epoch": 0.8760716168685315, "grad_norm": 0.4336926341056824, "learning_rate": 7.482605242736573e-07, "loss": 0.1925, "step": 48520 }, { "epoch": 0.8761618963014322, "grad_norm": 0.24070729315280914, "learning_rate": 7.471844192822042e-07, "loss": 0.1779, "step": 48525 }, { "epoch": 0.8762521757343329, "grad_norm": 0.7458894848823547, "learning_rate": 7.461090586124897e-07, "loss": 0.1377, "step": 48530 }, { "epoch": 0.8763424551672336, "grad_norm": 0.2711096405982971, "learning_rate": 7.450344423510214e-07, "loss": 0.1506, "step": 48535 }, { "epoch": 0.8764327346001344, "grad_norm": 0.2870548665523529, "learning_rate": 7.439605705842412e-07, "loss": 0.2085, "step": 48540 }, { "epoch": 0.876523014033035, "grad_norm": 0.49196571111679077, "learning_rate": 7.428874433985333e-07, "loss": 0.2513, "step": 48545 }, { "epoch": 0.8766132934659358, "grad_norm": 0.4872879981994629, "learning_rate": 7.418150608802255e-07, "loss": 0.1888, "step": 48550 }, { "epoch": 0.8767035728988365, "grad_norm": 0.4504752457141876, "learning_rate": 7.40743423115583e-07, "loss": 0.2368, "step": 48555 }, { "epoch": 0.8767938523317372, "grad_norm": 0.6074302196502686, "learning_rate": 7.396725301908114e-07, "loss": 0.2713, "step": 48560 }, { "epoch": 0.8768841317646379, "grad_norm": 0.43019863963127136, "learning_rate": 7.386023821920552e-07, "loss": 0.27, "step": 48565 }, { "epoch": 0.8769744111975386, "grad_norm": 0.5306024551391602, "learning_rate": 7.375329792054009e-07, "loss": 0.1794, "step": 48570 }, { "epoch": 0.8770646906304393, "grad_norm": 0.5218394994735718, "learning_rate": 7.364643213168731e-07, "loss": 0.1527, "step": 48575 }, { "epoch": 0.8771549700633401, "grad_norm": 0.2932478189468384, "learning_rate": 7.353964086124388e-07, "loss": 0.3163, "step": 48580 }, { "epoch": 0.8772452494962407, "grad_norm": 0.4879951775074005, "learning_rate": 7.343292411780023e-07, "loss": 0.2522, "step": 48585 }, { "epoch": 0.8773355289291415, "grad_norm": 0.672153651714325, "learning_rate": 7.332628190994107e-07, "loss": 0.2887, "step": 48590 }, { "epoch": 0.8774258083620422, "grad_norm": 0.4504673480987549, "learning_rate": 7.321971424624497e-07, "loss": 0.2011, "step": 48595 }, { "epoch": 0.877516087794943, "grad_norm": 0.4887804090976715, "learning_rate": 7.311322113528463e-07, "loss": 0.2581, "step": 48600 }, { "epoch": 0.8776063672278436, "grad_norm": 0.2560054361820221, "learning_rate": 7.300680258562631e-07, "loss": 0.1815, "step": 48605 }, { "epoch": 0.8776966466607443, "grad_norm": 0.3547193109989166, "learning_rate": 7.290045860583106e-07, "loss": 0.2215, "step": 48610 }, { "epoch": 0.8777869260936451, "grad_norm": 0.524704098701477, "learning_rate": 7.279418920445314e-07, "loss": 0.2494, "step": 48615 }, { "epoch": 0.8778772055265458, "grad_norm": 0.3460308909416199, "learning_rate": 7.268799439004115e-07, "loss": 0.1966, "step": 48620 }, { "epoch": 0.8779674849594464, "grad_norm": 0.35949942469596863, "learning_rate": 7.258187417113804e-07, "loss": 0.2505, "step": 48625 }, { "epoch": 0.8780577643923472, "grad_norm": 0.3450348675251007, "learning_rate": 7.247582855627999e-07, "loss": 0.2108, "step": 48630 }, { "epoch": 0.8781480438252479, "grad_norm": 0.43432381749153137, "learning_rate": 7.236985755399794e-07, "loss": 0.2986, "step": 48635 }, { "epoch": 0.8782383232581487, "grad_norm": 0.45111775398254395, "learning_rate": 7.22639611728162e-07, "loss": 0.2615, "step": 48640 }, { "epoch": 0.8783286026910493, "grad_norm": 0.34187889099121094, "learning_rate": 7.215813942125361e-07, "loss": 0.2505, "step": 48645 }, { "epoch": 0.87841888212395, "grad_norm": 0.37597358226776123, "learning_rate": 7.205239230782246e-07, "loss": 0.2399, "step": 48650 }, { "epoch": 0.8785091615568508, "grad_norm": 0.5486959218978882, "learning_rate": 7.194671984102964e-07, "loss": 0.1959, "step": 48655 }, { "epoch": 0.8785994409897515, "grad_norm": 0.3595384657382965, "learning_rate": 7.184112202937543e-07, "loss": 0.1192, "step": 48660 }, { "epoch": 0.8786897204226521, "grad_norm": 0.3697947859764099, "learning_rate": 7.173559888135484e-07, "loss": 0.2608, "step": 48665 }, { "epoch": 0.8787799998555529, "grad_norm": 0.6005027294158936, "learning_rate": 7.163015040545607e-07, "loss": 0.2247, "step": 48670 }, { "epoch": 0.8788702792884536, "grad_norm": 0.472234308719635, "learning_rate": 7.152477661016189e-07, "loss": 0.2378, "step": 48675 }, { "epoch": 0.8789605587213544, "grad_norm": 0.9382433891296387, "learning_rate": 7.141947750394862e-07, "loss": 0.1628, "step": 48680 }, { "epoch": 0.879050838154255, "grad_norm": 0.37445417046546936, "learning_rate": 7.131425309528694e-07, "loss": 0.1783, "step": 48685 }, { "epoch": 0.8791411175871557, "grad_norm": 0.40765634179115295, "learning_rate": 7.120910339264142e-07, "loss": 0.1618, "step": 48690 }, { "epoch": 0.8792313970200565, "grad_norm": 0.5030569434165955, "learning_rate": 7.110402840447072e-07, "loss": 0.2147, "step": 48695 }, { "epoch": 0.8793216764529572, "grad_norm": 0.3929252326488495, "learning_rate": 7.09990281392271e-07, "loss": 0.232, "step": 48700 }, { "epoch": 0.8794119558858579, "grad_norm": 0.5757809281349182, "learning_rate": 7.089410260535723e-07, "loss": 0.264, "step": 48705 }, { "epoch": 0.8795022353187586, "grad_norm": 0.36466771364212036, "learning_rate": 7.078925181130147e-07, "loss": 0.1432, "step": 48710 }, { "epoch": 0.8795925147516593, "grad_norm": 0.6043946743011475, "learning_rate": 7.068447576549453e-07, "loss": 0.2092, "step": 48715 }, { "epoch": 0.8796827941845601, "grad_norm": 0.479689359664917, "learning_rate": 7.057977447636455e-07, "loss": 0.2223, "step": 48720 }, { "epoch": 0.8797730736174607, "grad_norm": 0.46360352635383606, "learning_rate": 7.047514795233423e-07, "loss": 0.2344, "step": 48725 }, { "epoch": 0.8798633530503615, "grad_norm": 0.7283722162246704, "learning_rate": 7.037059620182008e-07, "loss": 0.2391, "step": 48730 }, { "epoch": 0.8799536324832622, "grad_norm": 0.42849308252334595, "learning_rate": 7.026611923323224e-07, "loss": 0.3026, "step": 48735 }, { "epoch": 0.8800439119161629, "grad_norm": 0.5676857829093933, "learning_rate": 7.016171705497554e-07, "loss": 0.1848, "step": 48740 }, { "epoch": 0.8801341913490637, "grad_norm": 0.5181065201759338, "learning_rate": 7.005738967544784e-07, "loss": 0.1742, "step": 48745 }, { "epoch": 0.8802244707819643, "grad_norm": 0.5719236731529236, "learning_rate": 6.995313710304186e-07, "loss": 0.1925, "step": 48750 }, { "epoch": 0.880314750214865, "grad_norm": 0.5163266658782959, "learning_rate": 6.984895934614389e-07, "loss": 0.2706, "step": 48755 }, { "epoch": 0.8804050296477658, "grad_norm": 0.16679945588111877, "learning_rate": 6.974485641313444e-07, "loss": 0.1513, "step": 48760 }, { "epoch": 0.8804953090806665, "grad_norm": 0.4424380362033844, "learning_rate": 6.964082831238761e-07, "loss": 0.2493, "step": 48765 }, { "epoch": 0.8805855885135672, "grad_norm": 0.4516794681549072, "learning_rate": 6.95368750522718e-07, "loss": 0.2917, "step": 48770 }, { "epoch": 0.8806758679464679, "grad_norm": 0.3460313081741333, "learning_rate": 6.943299664114922e-07, "loss": 0.1721, "step": 48775 }, { "epoch": 0.8807661473793686, "grad_norm": 0.5408383011817932, "learning_rate": 6.932919308737629e-07, "loss": 0.2507, "step": 48780 }, { "epoch": 0.8808564268122694, "grad_norm": 0.32816874980926514, "learning_rate": 6.9225464399303e-07, "loss": 0.1181, "step": 48785 }, { "epoch": 0.88094670624517, "grad_norm": 0.24817796051502228, "learning_rate": 6.912181058527367e-07, "loss": 0.309, "step": 48790 }, { "epoch": 0.8810369856780708, "grad_norm": 0.5147075057029724, "learning_rate": 6.901823165362664e-07, "loss": 0.2264, "step": 48795 }, { "epoch": 0.8811272651109715, "grad_norm": 0.9248692989349365, "learning_rate": 6.891472761269413e-07, "loss": 0.244, "step": 48800 }, { "epoch": 0.8812175445438722, "grad_norm": 0.592526912689209, "learning_rate": 6.881129847080193e-07, "loss": 0.3732, "step": 48805 }, { "epoch": 0.8813078239767729, "grad_norm": 0.2904769778251648, "learning_rate": 6.870794423627047e-07, "loss": 0.248, "step": 48810 }, { "epoch": 0.8813981034096736, "grad_norm": 0.6007767915725708, "learning_rate": 6.860466491741368e-07, "loss": 0.2189, "step": 48815 }, { "epoch": 0.8814883828425744, "grad_norm": 0.7476814389228821, "learning_rate": 6.85014605225397e-07, "loss": 0.2788, "step": 48820 }, { "epoch": 0.8815786622754751, "grad_norm": 0.39375999569892883, "learning_rate": 6.839833105995064e-07, "loss": 0.1943, "step": 48825 }, { "epoch": 0.8816689417083757, "grad_norm": 0.6178081631660461, "learning_rate": 6.829527653794243e-07, "loss": 0.2376, "step": 48830 }, { "epoch": 0.8817592211412765, "grad_norm": 0.9571642279624939, "learning_rate": 6.819229696480511e-07, "loss": 0.2867, "step": 48835 }, { "epoch": 0.8818495005741772, "grad_norm": 0.4160026013851166, "learning_rate": 6.80893923488225e-07, "loss": 0.1502, "step": 48840 }, { "epoch": 0.881939780007078, "grad_norm": 0.3343513607978821, "learning_rate": 6.798656269827275e-07, "loss": 0.2608, "step": 48845 }, { "epoch": 0.8820300594399786, "grad_norm": 0.3635469973087311, "learning_rate": 6.788380802142758e-07, "loss": 0.1748, "step": 48850 }, { "epoch": 0.8821203388728793, "grad_norm": 0.1914890855550766, "learning_rate": 6.778112832655282e-07, "loss": 0.1421, "step": 48855 }, { "epoch": 0.8822106183057801, "grad_norm": 0.3439345061779022, "learning_rate": 6.767852362190841e-07, "loss": 0.1821, "step": 48860 }, { "epoch": 0.8823008977386808, "grad_norm": 0.5026911497116089, "learning_rate": 6.757599391574831e-07, "loss": 0.2356, "step": 48865 }, { "epoch": 0.8823911771715814, "grad_norm": 0.4740567207336426, "learning_rate": 6.747353921632005e-07, "loss": 0.2322, "step": 48870 }, { "epoch": 0.8824814566044822, "grad_norm": 0.4739016890525818, "learning_rate": 6.737115953186546e-07, "loss": 0.2387, "step": 48875 }, { "epoch": 0.8825717360373829, "grad_norm": 0.3513011038303375, "learning_rate": 6.726885487062007e-07, "loss": 0.1888, "step": 48880 }, { "epoch": 0.8826620154702837, "grad_norm": 0.5506095290184021, "learning_rate": 6.716662524081375e-07, "loss": 0.2461, "step": 48885 }, { "epoch": 0.8827522949031843, "grad_norm": 0.3321288228034973, "learning_rate": 6.706447065067012e-07, "loss": 0.1946, "step": 48890 }, { "epoch": 0.882842574336085, "grad_norm": 0.5968649387359619, "learning_rate": 6.696239110840686e-07, "loss": 0.1693, "step": 48895 }, { "epoch": 0.8829328537689858, "grad_norm": 0.3725471496582031, "learning_rate": 6.686038662223538e-07, "loss": 0.2356, "step": 48900 }, { "epoch": 0.8830231332018865, "grad_norm": 0.30136847496032715, "learning_rate": 6.675845720036101e-07, "loss": 0.2995, "step": 48905 }, { "epoch": 0.8831134126347872, "grad_norm": 0.4549270272254944, "learning_rate": 6.665660285098363e-07, "loss": 0.2596, "step": 48910 }, { "epoch": 0.8832036920676879, "grad_norm": 0.4814635217189789, "learning_rate": 6.655482358229636e-07, "loss": 0.1425, "step": 48915 }, { "epoch": 0.8832939715005886, "grad_norm": 0.31953999400138855, "learning_rate": 6.645311940248667e-07, "loss": 0.0754, "step": 48920 }, { "epoch": 0.8833842509334894, "grad_norm": 0.6461823582649231, "learning_rate": 6.635149031973598e-07, "loss": 0.1998, "step": 48925 }, { "epoch": 0.88347453036639, "grad_norm": 0.37079378962516785, "learning_rate": 6.624993634221987e-07, "loss": 0.181, "step": 48930 }, { "epoch": 0.8835648097992908, "grad_norm": 0.5783750414848328, "learning_rate": 6.614845747810706e-07, "loss": 0.2365, "step": 48935 }, { "epoch": 0.8836550892321915, "grad_norm": 0.3737196922302246, "learning_rate": 6.604705373556142e-07, "loss": 0.0863, "step": 48940 }, { "epoch": 0.8837453686650922, "grad_norm": 0.27370980381965637, "learning_rate": 6.594572512273956e-07, "loss": 0.2368, "step": 48945 }, { "epoch": 0.8838356480979929, "grad_norm": 0.42945805191993713, "learning_rate": 6.584447164779284e-07, "loss": 0.1998, "step": 48950 }, { "epoch": 0.8839259275308936, "grad_norm": 0.5229265689849854, "learning_rate": 6.574329331886642e-07, "loss": 0.2345, "step": 48955 }, { "epoch": 0.8840162069637943, "grad_norm": 0.43820658326148987, "learning_rate": 6.564219014409956e-07, "loss": 0.1946, "step": 48960 }, { "epoch": 0.8841064863966951, "grad_norm": 0.5245822072029114, "learning_rate": 6.554116213162487e-07, "loss": 0.2194, "step": 48965 }, { "epoch": 0.8841967658295957, "grad_norm": 0.3123713731765747, "learning_rate": 6.544020928956973e-07, "loss": 0.2327, "step": 48970 }, { "epoch": 0.8842870452624965, "grad_norm": 0.262265145778656, "learning_rate": 6.533933162605466e-07, "loss": 0.1715, "step": 48975 }, { "epoch": 0.8843773246953972, "grad_norm": 0.2101922333240509, "learning_rate": 6.523852914919482e-07, "loss": 0.2302, "step": 48980 }, { "epoch": 0.884467604128298, "grad_norm": 0.5431260466575623, "learning_rate": 6.513780186709884e-07, "loss": 0.1776, "step": 48985 }, { "epoch": 0.8845578835611986, "grad_norm": 0.4570768177509308, "learning_rate": 6.503714978786957e-07, "loss": 0.2269, "step": 48990 }, { "epoch": 0.8846481629940993, "grad_norm": 0.4027325510978699, "learning_rate": 6.493657291960398e-07, "loss": 0.2101, "step": 48995 }, { "epoch": 0.8847384424270001, "grad_norm": 0.18464119732379913, "learning_rate": 6.483607127039226e-07, "loss": 0.1624, "step": 49000 }, { "epoch": 0.8848287218599008, "grad_norm": 0.42464643716812134, "learning_rate": 6.473564484831962e-07, "loss": 0.2021, "step": 49005 }, { "epoch": 0.8849190012928014, "grad_norm": 0.5144774317741394, "learning_rate": 6.463529366146404e-07, "loss": 0.2546, "step": 49010 }, { "epoch": 0.8850092807257022, "grad_norm": 0.5907028913497925, "learning_rate": 6.453501771789839e-07, "loss": 0.2221, "step": 49015 }, { "epoch": 0.8850995601586029, "grad_norm": 0.44879671931266785, "learning_rate": 6.443481702568899e-07, "loss": 0.2271, "step": 49020 }, { "epoch": 0.8851898395915037, "grad_norm": 0.5192486643791199, "learning_rate": 6.433469159289662e-07, "loss": 0.1811, "step": 49025 }, { "epoch": 0.8852801190244043, "grad_norm": 0.5279654264450073, "learning_rate": 6.423464142757507e-07, "loss": 0.2762, "step": 49030 }, { "epoch": 0.885370398457305, "grad_norm": 0.41415759921073914, "learning_rate": 6.413466653777323e-07, "loss": 0.1521, "step": 49035 }, { "epoch": 0.8854606778902058, "grad_norm": 0.3615606725215912, "learning_rate": 6.403476693153287e-07, "loss": 0.1981, "step": 49040 }, { "epoch": 0.8855509573231065, "grad_norm": 0.5927579998970032, "learning_rate": 6.393494261689048e-07, "loss": 0.2668, "step": 49045 }, { "epoch": 0.8856412367560071, "grad_norm": 0.5542939305305481, "learning_rate": 6.383519360187606e-07, "loss": 0.2321, "step": 49050 }, { "epoch": 0.8857315161889079, "grad_norm": 0.48070257902145386, "learning_rate": 6.373551989451366e-07, "loss": 0.248, "step": 49055 }, { "epoch": 0.8858217956218086, "grad_norm": 0.5028548240661621, "learning_rate": 6.363592150282138e-07, "loss": 0.2422, "step": 49060 }, { "epoch": 0.8859120750547094, "grad_norm": 1.1969773769378662, "learning_rate": 6.353639843481141e-07, "loss": 0.2232, "step": 49065 }, { "epoch": 0.88600235448761, "grad_norm": 0.23896126449108124, "learning_rate": 6.34369506984892e-07, "loss": 0.2505, "step": 49070 }, { "epoch": 0.8860926339205107, "grad_norm": 0.4905920922756195, "learning_rate": 6.333757830185505e-07, "loss": 0.3466, "step": 49075 }, { "epoch": 0.8861829133534115, "grad_norm": 0.5092660784721375, "learning_rate": 6.323828125290244e-07, "loss": 0.196, "step": 49080 }, { "epoch": 0.8862731927863122, "grad_norm": 0.5876214504241943, "learning_rate": 6.313905955961908e-07, "loss": 0.2474, "step": 49085 }, { "epoch": 0.8863634722192129, "grad_norm": 0.4340628683567047, "learning_rate": 6.303991322998693e-07, "loss": 0.2652, "step": 49090 }, { "epoch": 0.8864537516521136, "grad_norm": 0.5022110939025879, "learning_rate": 6.294084227198127e-07, "loss": 0.3019, "step": 49095 }, { "epoch": 0.8865440310850143, "grad_norm": 0.32097309827804565, "learning_rate": 6.284184669357185e-07, "loss": 0.1647, "step": 49100 }, { "epoch": 0.8866343105179151, "grad_norm": 0.48288461565971375, "learning_rate": 6.274292650272196e-07, "loss": 0.2126, "step": 49105 }, { "epoch": 0.8867245899508157, "grad_norm": 0.33698907494544983, "learning_rate": 6.264408170738923e-07, "loss": 0.1959, "step": 49110 }, { "epoch": 0.8868148693837165, "grad_norm": 0.32553550601005554, "learning_rate": 6.254531231552485e-07, "loss": 0.2005, "step": 49115 }, { "epoch": 0.8869051488166172, "grad_norm": 0.3544366657733917, "learning_rate": 6.244661833507404e-07, "loss": 0.184, "step": 49120 }, { "epoch": 0.8869954282495179, "grad_norm": 0.1811123937368393, "learning_rate": 6.23479997739761e-07, "loss": 0.1693, "step": 49125 }, { "epoch": 0.8870857076824186, "grad_norm": 0.3863956034183502, "learning_rate": 6.224945664016435e-07, "loss": 0.1653, "step": 49130 }, { "epoch": 0.8871759871153193, "grad_norm": 0.2895120680332184, "learning_rate": 6.215098894156568e-07, "loss": 0.2131, "step": 49135 }, { "epoch": 0.88726626654822, "grad_norm": 0.3756144046783447, "learning_rate": 6.205259668610119e-07, "loss": 0.264, "step": 49140 }, { "epoch": 0.8873565459811208, "grad_norm": 0.8095720410346985, "learning_rate": 6.195427988168557e-07, "loss": 0.2005, "step": 49145 }, { "epoch": 0.8874468254140215, "grad_norm": 0.4172549247741699, "learning_rate": 6.18560385362279e-07, "loss": 0.192, "step": 49150 }, { "epoch": 0.8875371048469222, "grad_norm": 0.4497814476490021, "learning_rate": 6.1757872657631e-07, "loss": 0.153, "step": 49155 }, { "epoch": 0.8876273842798229, "grad_norm": 0.43509024381637573, "learning_rate": 6.165978225379165e-07, "loss": 0.302, "step": 49160 }, { "epoch": 0.8877176637127236, "grad_norm": 0.30748501420021057, "learning_rate": 6.156176733260033e-07, "loss": 0.1803, "step": 49165 }, { "epoch": 0.8878079431456244, "grad_norm": 0.4086261987686157, "learning_rate": 6.146382790194195e-07, "loss": 0.1683, "step": 49170 }, { "epoch": 0.887898222578525, "grad_norm": 0.47224515676498413, "learning_rate": 6.136596396969452e-07, "loss": 0.2976, "step": 49175 }, { "epoch": 0.8879885020114258, "grad_norm": 0.6250869631767273, "learning_rate": 6.126817554373088e-07, "loss": 0.222, "step": 49180 }, { "epoch": 0.8880787814443265, "grad_norm": 0.43033626675605774, "learning_rate": 6.117046263191729e-07, "loss": 0.2545, "step": 49185 }, { "epoch": 0.8881690608772272, "grad_norm": 0.3559838533401489, "learning_rate": 6.107282524211389e-07, "loss": 0.2645, "step": 49190 }, { "epoch": 0.8882593403101279, "grad_norm": 0.21485501527786255, "learning_rate": 6.09752633821753e-07, "loss": 0.3151, "step": 49195 }, { "epoch": 0.8883496197430286, "grad_norm": 0.34246349334716797, "learning_rate": 6.087777705994913e-07, "loss": 0.2091, "step": 49200 }, { "epoch": 0.8884398991759294, "grad_norm": 0.27413228154182434, "learning_rate": 6.078036628327788e-07, "loss": 0.2313, "step": 49205 }, { "epoch": 0.8885301786088301, "grad_norm": 0.4511106312274933, "learning_rate": 6.068303105999729e-07, "loss": 0.204, "step": 49210 }, { "epoch": 0.8886204580417307, "grad_norm": 0.37216225266456604, "learning_rate": 6.058577139793742e-07, "loss": 0.2052, "step": 49215 }, { "epoch": 0.8887107374746315, "grad_norm": 0.26895153522491455, "learning_rate": 6.048858730492191e-07, "loss": 0.1902, "step": 49220 }, { "epoch": 0.8888010169075322, "grad_norm": 0.28107526898384094, "learning_rate": 6.039147878876883e-07, "loss": 0.2358, "step": 49225 }, { "epoch": 0.888891296340433, "grad_norm": 0.4671335816383362, "learning_rate": 6.02944458572895e-07, "loss": 0.2322, "step": 49230 }, { "epoch": 0.8889815757733336, "grad_norm": 0.9049550890922546, "learning_rate": 6.019748851828988e-07, "loss": 0.2483, "step": 49235 }, { "epoch": 0.8890718552062343, "grad_norm": 0.9912333488464355, "learning_rate": 6.010060677956919e-07, "loss": 0.193, "step": 49240 }, { "epoch": 0.8891621346391351, "grad_norm": 0.38939374685287476, "learning_rate": 6.000380064892097e-07, "loss": 0.2529, "step": 49245 }, { "epoch": 0.8892524140720358, "grad_norm": 0.3491939306259155, "learning_rate": 5.990707013413243e-07, "loss": 0.2153, "step": 49250 }, { "epoch": 0.8893426935049364, "grad_norm": 0.7511391639709473, "learning_rate": 5.981041524298503e-07, "loss": 0.1904, "step": 49255 }, { "epoch": 0.8894329729378372, "grad_norm": 0.6481285095214844, "learning_rate": 5.971383598325386e-07, "loss": 0.2226, "step": 49260 }, { "epoch": 0.8895232523707379, "grad_norm": 0.412752628326416, "learning_rate": 5.961733236270817e-07, "loss": 0.1532, "step": 49265 }, { "epoch": 0.8896135318036387, "grad_norm": 0.9104444980621338, "learning_rate": 5.952090438911084e-07, "loss": 0.2151, "step": 49270 }, { "epoch": 0.8897038112365393, "grad_norm": 1.2514029741287231, "learning_rate": 5.942455207021868e-07, "loss": 0.227, "step": 49275 }, { "epoch": 0.88979409066944, "grad_norm": 0.4405597150325775, "learning_rate": 5.932827541378272e-07, "loss": 0.2554, "step": 49280 }, { "epoch": 0.8898843701023408, "grad_norm": 0.5971860289573669, "learning_rate": 5.923207442754764e-07, "loss": 0.2257, "step": 49285 }, { "epoch": 0.8899746495352415, "grad_norm": 0.5276842713356018, "learning_rate": 5.913594911925214e-07, "loss": 0.2626, "step": 49290 }, { "epoch": 0.8900649289681422, "grad_norm": 0.3187607228755951, "learning_rate": 5.90398994966288e-07, "loss": 0.2217, "step": 49295 }, { "epoch": 0.8901552084010429, "grad_norm": 0.3868699371814728, "learning_rate": 5.894392556740414e-07, "loss": 0.206, "step": 49300 }, { "epoch": 0.8902454878339436, "grad_norm": 0.4715498387813568, "learning_rate": 5.88480273392985e-07, "loss": 0.2874, "step": 49305 }, { "epoch": 0.8903357672668444, "grad_norm": 0.24980822205543518, "learning_rate": 5.875220482002631e-07, "loss": 0.1881, "step": 49310 }, { "epoch": 0.890426046699745, "grad_norm": 0.3385063111782074, "learning_rate": 5.865645801729558e-07, "loss": 0.1202, "step": 49315 }, { "epoch": 0.8905163261326458, "grad_norm": 0.7292124629020691, "learning_rate": 5.856078693880851e-07, "loss": 0.2565, "step": 49320 }, { "epoch": 0.8906066055655465, "grad_norm": 0.1540367305278778, "learning_rate": 5.846519159226127e-07, "loss": 0.2319, "step": 49325 }, { "epoch": 0.8906968849984472, "grad_norm": 0.4868296980857849, "learning_rate": 5.836967198534382e-07, "loss": 0.309, "step": 49330 }, { "epoch": 0.8907871644313479, "grad_norm": 0.4633415639400482, "learning_rate": 5.82742281257399e-07, "loss": 0.2276, "step": 49335 }, { "epoch": 0.8908774438642486, "grad_norm": 0.5018483400344849, "learning_rate": 5.817886002112738e-07, "loss": 0.1761, "step": 49340 }, { "epoch": 0.8909677232971493, "grad_norm": 0.3152850866317749, "learning_rate": 5.808356767917777e-07, "loss": 0.2455, "step": 49345 }, { "epoch": 0.8910580027300501, "grad_norm": 0.7689335942268372, "learning_rate": 5.798835110755674e-07, "loss": 0.1946, "step": 49350 }, { "epoch": 0.8911482821629507, "grad_norm": 0.3751264214515686, "learning_rate": 5.789321031392381e-07, "loss": 0.2964, "step": 49355 }, { "epoch": 0.8912385615958515, "grad_norm": 0.7395479083061218, "learning_rate": 5.779814530593241e-07, "loss": 0.2701, "step": 49360 }, { "epoch": 0.8913288410287522, "grad_norm": 0.23316659033298492, "learning_rate": 5.770315609122979e-07, "loss": 0.1797, "step": 49365 }, { "epoch": 0.891419120461653, "grad_norm": 0.2639722526073456, "learning_rate": 5.76082426774569e-07, "loss": 0.1985, "step": 49370 }, { "epoch": 0.8915093998945536, "grad_norm": 0.5189804434776306, "learning_rate": 5.751340507224923e-07, "loss": 0.3169, "step": 49375 }, { "epoch": 0.8915996793274543, "grad_norm": 0.8890037536621094, "learning_rate": 5.741864328323544e-07, "loss": 0.225, "step": 49380 }, { "epoch": 0.8916899587603551, "grad_norm": 0.4358655512332916, "learning_rate": 5.732395731803864e-07, "loss": 0.1619, "step": 49385 }, { "epoch": 0.8917802381932558, "grad_norm": 1.2555304765701294, "learning_rate": 5.722934718427542e-07, "loss": 0.2526, "step": 49390 }, { "epoch": 0.8918705176261564, "grad_norm": 0.43148449063301086, "learning_rate": 5.71348128895568e-07, "loss": 0.3112, "step": 49395 }, { "epoch": 0.8919607970590572, "grad_norm": 0.2432681769132614, "learning_rate": 5.704035444148715e-07, "loss": 0.2, "step": 49400 }, { "epoch": 0.8920510764919579, "grad_norm": 0.3374500572681427, "learning_rate": 5.694597184766504e-07, "loss": 0.242, "step": 49405 }, { "epoch": 0.8921413559248587, "grad_norm": 0.5422481298446655, "learning_rate": 5.685166511568263e-07, "loss": 0.2745, "step": 49410 }, { "epoch": 0.8922316353577593, "grad_norm": 0.910033643245697, "learning_rate": 5.675743425312652e-07, "loss": 0.1668, "step": 49415 }, { "epoch": 0.89232191479066, "grad_norm": 0.4588571786880493, "learning_rate": 5.666327926757675e-07, "loss": 0.2012, "step": 49420 }, { "epoch": 0.8924121942235608, "grad_norm": 0.5030219554901123, "learning_rate": 5.65692001666076e-07, "loss": 0.2414, "step": 49425 }, { "epoch": 0.8925024736564615, "grad_norm": 0.6208494305610657, "learning_rate": 5.64751969577868e-07, "loss": 0.2904, "step": 49430 }, { "epoch": 0.8925927530893621, "grad_norm": 0.4573724865913391, "learning_rate": 5.638126964867641e-07, "loss": 0.2345, "step": 49435 }, { "epoch": 0.8926830325222629, "grad_norm": 0.5691066384315491, "learning_rate": 5.628741824683193e-07, "loss": 0.2374, "step": 49440 }, { "epoch": 0.8927733119551636, "grad_norm": 0.732424795627594, "learning_rate": 5.619364275980333e-07, "loss": 0.1872, "step": 49445 }, { "epoch": 0.8928635913880644, "grad_norm": 0.564612865447998, "learning_rate": 5.609994319513401e-07, "loss": 0.2391, "step": 49450 }, { "epoch": 0.892953870820965, "grad_norm": 0.34582310914993286, "learning_rate": 5.600631956036129e-07, "loss": 0.307, "step": 49455 }, { "epoch": 0.8930441502538657, "grad_norm": 0.3481256067752838, "learning_rate": 5.591277186301692e-07, "loss": 0.2588, "step": 49460 }, { "epoch": 0.8931344296867665, "grad_norm": 0.46572649478912354, "learning_rate": 5.581930011062564e-07, "loss": 0.2656, "step": 49465 }, { "epoch": 0.8932247091196672, "grad_norm": 0.33579516410827637, "learning_rate": 5.572590431070702e-07, "loss": 0.125, "step": 49470 }, { "epoch": 0.8933149885525679, "grad_norm": 0.512161374092102, "learning_rate": 5.563258447077369e-07, "loss": 0.243, "step": 49475 }, { "epoch": 0.8934052679854686, "grad_norm": 0.33453431725502014, "learning_rate": 5.553934059833277e-07, "loss": 0.1899, "step": 49480 }, { "epoch": 0.8934955474183693, "grad_norm": 0.4576311409473419, "learning_rate": 5.544617270088493e-07, "loss": 0.2046, "step": 49485 }, { "epoch": 0.8935858268512701, "grad_norm": 0.3824627995491028, "learning_rate": 5.535308078592516e-07, "loss": 0.2438, "step": 49490 }, { "epoch": 0.8936761062841707, "grad_norm": 0.5188384056091309, "learning_rate": 5.52600648609416e-07, "loss": 0.3143, "step": 49495 }, { "epoch": 0.8937663857170715, "grad_norm": 0.6110566854476929, "learning_rate": 5.516712493341702e-07, "loss": 0.1958, "step": 49500 }, { "epoch": 0.8938566651499722, "grad_norm": 0.5411057472229004, "learning_rate": 5.507426101082758e-07, "loss": 0.3115, "step": 49505 }, { "epoch": 0.8939469445828729, "grad_norm": 0.3564608693122864, "learning_rate": 5.498147310064362e-07, "loss": 0.3119, "step": 49510 }, { "epoch": 0.8940372240157736, "grad_norm": 0.4241950809955597, "learning_rate": 5.488876121032915e-07, "loss": 0.2529, "step": 49515 }, { "epoch": 0.8941275034486743, "grad_norm": 0.5518417358398438, "learning_rate": 5.479612534734214e-07, "loss": 0.2723, "step": 49520 }, { "epoch": 0.894217782881575, "grad_norm": 0.6499502062797546, "learning_rate": 5.470356551913459e-07, "loss": 0.2468, "step": 49525 }, { "epoch": 0.8943080623144758, "grad_norm": 0.3124319911003113, "learning_rate": 5.461108173315233e-07, "loss": 0.2196, "step": 49530 }, { "epoch": 0.8943983417473765, "grad_norm": 0.6624701023101807, "learning_rate": 5.451867399683475e-07, "loss": 0.3122, "step": 49535 }, { "epoch": 0.8944886211802772, "grad_norm": 0.3819896876811981, "learning_rate": 5.442634231761568e-07, "loss": 0.1748, "step": 49540 }, { "epoch": 0.8945789006131779, "grad_norm": 0.4410114884376526, "learning_rate": 5.433408670292228e-07, "loss": 0.12, "step": 49545 }, { "epoch": 0.8946691800460786, "grad_norm": 0.3746069669723511, "learning_rate": 5.424190716017597e-07, "loss": 0.1849, "step": 49550 }, { "epoch": 0.8947594594789794, "grad_norm": 0.2565837502479553, "learning_rate": 5.414980369679201e-07, "loss": 0.1769, "step": 49555 }, { "epoch": 0.89484973891188, "grad_norm": 0.45501911640167236, "learning_rate": 5.405777632017917e-07, "loss": 0.1599, "step": 49560 }, { "epoch": 0.8949400183447808, "grad_norm": 0.5975123047828674, "learning_rate": 5.396582503774072e-07, "loss": 0.1931, "step": 49565 }, { "epoch": 0.8950302977776815, "grad_norm": 0.819442629814148, "learning_rate": 5.387394985687322e-07, "loss": 0.2122, "step": 49570 }, { "epoch": 0.8951205772105822, "grad_norm": 0.8892215490341187, "learning_rate": 5.378215078496751e-07, "loss": 0.3006, "step": 49575 }, { "epoch": 0.8952108566434829, "grad_norm": 0.41512638330459595, "learning_rate": 5.369042782940804e-07, "loss": 0.2028, "step": 49580 }, { "epoch": 0.8953011360763836, "grad_norm": 0.48824870586395264, "learning_rate": 5.359878099757332e-07, "loss": 0.2371, "step": 49585 }, { "epoch": 0.8953914155092844, "grad_norm": 0.27632448077201843, "learning_rate": 5.350721029683559e-07, "loss": 0.3257, "step": 49590 }, { "epoch": 0.8954816949421851, "grad_norm": 0.5071262121200562, "learning_rate": 5.341571573456128e-07, "loss": 0.2092, "step": 49595 }, { "epoch": 0.8955719743750857, "grad_norm": 0.3316676914691925, "learning_rate": 5.332429731811028e-07, "loss": 0.267, "step": 49600 }, { "epoch": 0.8956622538079865, "grad_norm": 0.5379388928413391, "learning_rate": 5.323295505483661e-07, "loss": 0.2527, "step": 49605 }, { "epoch": 0.8957525332408872, "grad_norm": 0.8905502557754517, "learning_rate": 5.314168895208782e-07, "loss": 0.3123, "step": 49610 }, { "epoch": 0.895842812673788, "grad_norm": 0.5016679763793945, "learning_rate": 5.305049901720593e-07, "loss": 0.1742, "step": 49615 }, { "epoch": 0.8959330921066886, "grad_norm": 0.6650755405426025, "learning_rate": 5.295938525752641e-07, "loss": 0.2197, "step": 49620 }, { "epoch": 0.8960233715395893, "grad_norm": 0.5120480060577393, "learning_rate": 5.286834768037885e-07, "loss": 0.1965, "step": 49625 }, { "epoch": 0.8961136509724901, "grad_norm": 0.4148460328578949, "learning_rate": 5.277738629308626e-07, "loss": 0.1623, "step": 49630 }, { "epoch": 0.8962039304053908, "grad_norm": 0.5100986361503601, "learning_rate": 5.268650110296602e-07, "loss": 0.2306, "step": 49635 }, { "epoch": 0.8962942098382914, "grad_norm": 0.400863915681839, "learning_rate": 5.259569211732929e-07, "loss": 0.2708, "step": 49640 }, { "epoch": 0.8963844892711922, "grad_norm": 0.28689926862716675, "learning_rate": 5.250495934348055e-07, "loss": 0.1492, "step": 49645 }, { "epoch": 0.8964747687040929, "grad_norm": 0.35647398233413696, "learning_rate": 5.241430278871895e-07, "loss": 0.2569, "step": 49650 }, { "epoch": 0.8965650481369937, "grad_norm": 0.4131743311882019, "learning_rate": 5.232372246033701e-07, "loss": 0.1356, "step": 49655 }, { "epoch": 0.8966553275698943, "grad_norm": 0.539134681224823, "learning_rate": 5.223321836562156e-07, "loss": 0.3402, "step": 49660 }, { "epoch": 0.896745607002795, "grad_norm": 0.35673341155052185, "learning_rate": 5.214279051185256e-07, "loss": 0.2032, "step": 49665 }, { "epoch": 0.8968358864356958, "grad_norm": 0.43705660104751587, "learning_rate": 5.205243890630462e-07, "loss": 0.1374, "step": 49670 }, { "epoch": 0.8969261658685965, "grad_norm": 0.47941845655441284, "learning_rate": 5.196216355624561e-07, "loss": 0.1932, "step": 49675 }, { "epoch": 0.8970164453014972, "grad_norm": 0.5962375998497009, "learning_rate": 5.187196446893761e-07, "loss": 0.3135, "step": 49680 }, { "epoch": 0.8971067247343979, "grad_norm": 0.47453346848487854, "learning_rate": 5.178184165163647e-07, "loss": 0.2893, "step": 49685 }, { "epoch": 0.8971970041672986, "grad_norm": 0.4467545747756958, "learning_rate": 5.169179511159217e-07, "loss": 0.2869, "step": 49690 }, { "epoch": 0.8972872836001994, "grad_norm": 0.24232818186283112, "learning_rate": 5.160182485604792e-07, "loss": 0.1522, "step": 49695 }, { "epoch": 0.8973775630331, "grad_norm": 0.26575037837028503, "learning_rate": 5.151193089224149e-07, "loss": 0.1616, "step": 49700 }, { "epoch": 0.8974678424660008, "grad_norm": 0.30552709102630615, "learning_rate": 5.142211322740398e-07, "loss": 0.2146, "step": 49705 }, { "epoch": 0.8975581218989015, "grad_norm": 0.37902095913887024, "learning_rate": 5.133237186876061e-07, "loss": 0.2451, "step": 49710 }, { "epoch": 0.8976484013318022, "grad_norm": 0.6796904802322388, "learning_rate": 5.124270682353028e-07, "loss": 0.271, "step": 49715 }, { "epoch": 0.8977386807647029, "grad_norm": 0.2675473392009735, "learning_rate": 5.115311809892631e-07, "loss": 0.1837, "step": 49720 }, { "epoch": 0.8978289601976036, "grad_norm": 0.4366501569747925, "learning_rate": 5.106360570215507e-07, "loss": 0.224, "step": 49725 }, { "epoch": 0.8979192396305044, "grad_norm": 0.36879366636276245, "learning_rate": 5.097416964041746e-07, "loss": 0.3035, "step": 49730 }, { "epoch": 0.8980095190634051, "grad_norm": 0.44806408882141113, "learning_rate": 5.088480992090783e-07, "loss": 0.1425, "step": 49735 }, { "epoch": 0.8980997984963057, "grad_norm": 0.7490373253822327, "learning_rate": 5.079552655081432e-07, "loss": 0.2077, "step": 49740 }, { "epoch": 0.8981900779292065, "grad_norm": 0.37198522686958313, "learning_rate": 5.070631953731931e-07, "loss": 0.0843, "step": 49745 }, { "epoch": 0.8982803573621072, "grad_norm": 0.3032682240009308, "learning_rate": 5.061718888759892e-07, "loss": 0.21, "step": 49750 }, { "epoch": 0.898370636795008, "grad_norm": 0.5252434015274048, "learning_rate": 5.052813460882311e-07, "loss": 0.1548, "step": 49755 }, { "epoch": 0.8984609162279086, "grad_norm": 0.265247642993927, "learning_rate": 5.043915670815536e-07, "loss": 0.1449, "step": 49760 }, { "epoch": 0.8985511956608093, "grad_norm": 0.3937067985534668, "learning_rate": 5.035025519275371e-07, "loss": 0.2638, "step": 49765 }, { "epoch": 0.8986414750937101, "grad_norm": 1.6205127239227295, "learning_rate": 5.026143006976914e-07, "loss": 0.2371, "step": 49770 }, { "epoch": 0.8987317545266108, "grad_norm": 0.48166754841804504, "learning_rate": 5.017268134634745e-07, "loss": 0.154, "step": 49775 }, { "epoch": 0.8988220339595114, "grad_norm": 0.4826033115386963, "learning_rate": 5.00840090296274e-07, "loss": 0.2296, "step": 49780 }, { "epoch": 0.8989123133924122, "grad_norm": 0.36453431844711304, "learning_rate": 4.999541312674261e-07, "loss": 0.1641, "step": 49785 }, { "epoch": 0.8990025928253129, "grad_norm": 0.2968432605266571, "learning_rate": 4.990689364481938e-07, "loss": 0.224, "step": 49790 }, { "epoch": 0.8990928722582137, "grad_norm": 0.3395954668521881, "learning_rate": 4.981845059097889e-07, "loss": 0.2494, "step": 49795 }, { "epoch": 0.8991831516911143, "grad_norm": 1.0246810913085938, "learning_rate": 4.973008397233536e-07, "loss": 0.2705, "step": 49800 }, { "epoch": 0.899273431124015, "grad_norm": 0.31981080770492554, "learning_rate": 4.964179379599765e-07, "loss": 0.2092, "step": 49805 }, { "epoch": 0.8993637105569158, "grad_norm": 0.8827393651008606, "learning_rate": 4.955358006906775e-07, "loss": 0.2263, "step": 49810 }, { "epoch": 0.8994539899898165, "grad_norm": 0.5777027010917664, "learning_rate": 4.946544279864185e-07, "loss": 0.2982, "step": 49815 }, { "epoch": 0.8995442694227171, "grad_norm": 0.31169047951698303, "learning_rate": 4.937738199181009e-07, "loss": 0.2608, "step": 49820 }, { "epoch": 0.8996345488556179, "grad_norm": 0.48567578196525574, "learning_rate": 4.928939765565633e-07, "loss": 0.2632, "step": 49825 }, { "epoch": 0.8997248282885186, "grad_norm": 0.9280194640159607, "learning_rate": 4.920148979725814e-07, "loss": 0.33, "step": 49830 }, { "epoch": 0.8998151077214194, "grad_norm": 0.18331050872802734, "learning_rate": 4.91136584236871e-07, "loss": 0.2385, "step": 49835 }, { "epoch": 0.89990538715432, "grad_norm": 0.3188425898551941, "learning_rate": 4.902590354200876e-07, "loss": 0.0719, "step": 49840 }, { "epoch": 0.8999956665872207, "grad_norm": 0.3170349597930908, "learning_rate": 4.893822515928193e-07, "loss": 0.2136, "step": 49845 }, { "epoch": 0.9000859460201215, "grad_norm": 0.49580907821655273, "learning_rate": 4.885062328256019e-07, "loss": 0.2086, "step": 49850 }, { "epoch": 0.9001762254530222, "grad_norm": 0.39910197257995605, "learning_rate": 4.876309791889023e-07, "loss": 0.2079, "step": 49855 }, { "epoch": 0.9002665048859229, "grad_norm": 0.7599525451660156, "learning_rate": 4.867564907531297e-07, "loss": 0.2537, "step": 49860 }, { "epoch": 0.9003567843188236, "grad_norm": 0.5778499245643616, "learning_rate": 4.858827675886291e-07, "loss": 0.1888, "step": 49865 }, { "epoch": 0.9004470637517243, "grad_norm": 0.5479825139045715, "learning_rate": 4.850098097656864e-07, "loss": 0.263, "step": 49870 }, { "epoch": 0.9005373431846251, "grad_norm": 0.5533686876296997, "learning_rate": 4.841376173545221e-07, "loss": 0.3033, "step": 49875 }, { "epoch": 0.9006276226175257, "grad_norm": 0.3194130063056946, "learning_rate": 4.832661904253e-07, "loss": 0.2485, "step": 49880 }, { "epoch": 0.9007179020504265, "grad_norm": 0.4575395882129669, "learning_rate": 4.823955290481186e-07, "loss": 0.1641, "step": 49885 }, { "epoch": 0.9008081814833272, "grad_norm": 0.40821582078933716, "learning_rate": 4.815256332930196e-07, "loss": 0.2354, "step": 49890 }, { "epoch": 0.9008984609162279, "grad_norm": 0.3677331507205963, "learning_rate": 4.806565032299759e-07, "loss": 0.1855, "step": 49895 }, { "epoch": 0.9009887403491286, "grad_norm": 0.4790988266468048, "learning_rate": 4.797881389289049e-07, "loss": 0.2774, "step": 49900 }, { "epoch": 0.9010790197820293, "grad_norm": 0.2946798801422119, "learning_rate": 4.789205404596587e-07, "loss": 0.2109, "step": 49905 }, { "epoch": 0.90116929921493, "grad_norm": 0.5090165734291077, "learning_rate": 4.780537078920311e-07, "loss": 0.1921, "step": 49910 }, { "epoch": 0.9012595786478308, "grad_norm": 0.5305168628692627, "learning_rate": 4.771876412957499e-07, "loss": 0.2644, "step": 49915 }, { "epoch": 0.9013498580807314, "grad_norm": 0.402736097574234, "learning_rate": 4.76322340740486e-07, "loss": 0.2345, "step": 49920 }, { "epoch": 0.9014401375136322, "grad_norm": 0.3757975697517395, "learning_rate": 4.7545780629584704e-07, "loss": 0.205, "step": 49925 }, { "epoch": 0.9015304169465329, "grad_norm": 0.5877687335014343, "learning_rate": 4.7459403803137627e-07, "loss": 0.3231, "step": 49930 }, { "epoch": 0.9016206963794337, "grad_norm": 0.38740038871765137, "learning_rate": 4.7373103601656033e-07, "loss": 0.1868, "step": 49935 }, { "epoch": 0.9017109758123344, "grad_norm": 0.4732867479324341, "learning_rate": 4.728688003208182e-07, "loss": 0.2084, "step": 49940 }, { "epoch": 0.901801255245235, "grad_norm": 0.45071929693222046, "learning_rate": 4.7200733101351204e-07, "loss": 0.2015, "step": 49945 }, { "epoch": 0.9018915346781358, "grad_norm": 0.36288556456565857, "learning_rate": 4.71146628163941e-07, "loss": 0.1347, "step": 49950 }, { "epoch": 0.9019818141110365, "grad_norm": 0.6755340695381165, "learning_rate": 4.7028669184134403e-07, "loss": 0.2061, "step": 49955 }, { "epoch": 0.9020720935439372, "grad_norm": 0.523026704788208, "learning_rate": 4.694275221148925e-07, "loss": 0.2287, "step": 49960 }, { "epoch": 0.9021623729768379, "grad_norm": 0.45862624049186707, "learning_rate": 4.685691190537056e-07, "loss": 0.2871, "step": 49965 }, { "epoch": 0.9022526524097386, "grad_norm": 0.6548789143562317, "learning_rate": 4.677114827268303e-07, "loss": 0.2517, "step": 49970 }, { "epoch": 0.9023429318426394, "grad_norm": 0.48521631956100464, "learning_rate": 4.668546132032614e-07, "loss": 0.2286, "step": 49975 }, { "epoch": 0.9024332112755401, "grad_norm": 0.17036576569080353, "learning_rate": 4.659985105519238e-07, "loss": 0.2146, "step": 49980 }, { "epoch": 0.9025234907084407, "grad_norm": 0.5720534324645996, "learning_rate": 4.6514317484169015e-07, "loss": 0.1847, "step": 49985 }, { "epoch": 0.9026137701413415, "grad_norm": 0.4207828938961029, "learning_rate": 4.642886061413621e-07, "loss": 0.2737, "step": 49990 }, { "epoch": 0.9027040495742422, "grad_norm": 0.32303088903427124, "learning_rate": 4.634348045196846e-07, "loss": 0.2367, "step": 49995 }, { "epoch": 0.902794329007143, "grad_norm": 0.604050874710083, "learning_rate": 4.625817700453383e-07, "loss": 0.2784, "step": 50000 }, { "epoch": 0.9028846084400436, "grad_norm": 0.5371136665344238, "learning_rate": 4.6172950278694616e-07, "loss": 0.1975, "step": 50005 }, { "epoch": 0.9029748878729443, "grad_norm": 0.3274342715740204, "learning_rate": 4.6087800281306437e-07, "loss": 0.1982, "step": 50010 }, { "epoch": 0.9030651673058451, "grad_norm": 0.6424617767333984, "learning_rate": 4.6002727019219153e-07, "loss": 0.2331, "step": 50015 }, { "epoch": 0.9031554467387458, "grad_norm": 0.5802335143089294, "learning_rate": 4.5917730499276393e-07, "loss": 0.2075, "step": 50020 }, { "epoch": 0.9032457261716464, "grad_norm": 0.4290149211883545, "learning_rate": 4.583281072831514e-07, "loss": 0.313, "step": 50025 }, { "epoch": 0.9033360056045472, "grad_norm": 0.6646602749824524, "learning_rate": 4.574796771316703e-07, "loss": 0.3307, "step": 50030 }, { "epoch": 0.9034262850374479, "grad_norm": 0.4131048321723938, "learning_rate": 4.5663201460656615e-07, "loss": 0.2217, "step": 50035 }, { "epoch": 0.9035165644703487, "grad_norm": 0.46411240100860596, "learning_rate": 4.557851197760321e-07, "loss": 0.1809, "step": 50040 }, { "epoch": 0.9036068439032493, "grad_norm": 0.4147179424762726, "learning_rate": 4.5493899270818817e-07, "loss": 0.2409, "step": 50045 }, { "epoch": 0.90369712333615, "grad_norm": 0.49063724279403687, "learning_rate": 4.5409363347110544e-07, "loss": 0.2961, "step": 50050 }, { "epoch": 0.9037874027690508, "grad_norm": 0.3859482705593109, "learning_rate": 4.53249042132784e-07, "loss": 0.178, "step": 50055 }, { "epoch": 0.9038776822019515, "grad_norm": 0.5122784376144409, "learning_rate": 4.5240521876116607e-07, "loss": 0.1957, "step": 50060 }, { "epoch": 0.9039679616348522, "grad_norm": 0.4450696110725403, "learning_rate": 4.5156216342412963e-07, "loss": 0.2349, "step": 50065 }, { "epoch": 0.9040582410677529, "grad_norm": 0.544794499874115, "learning_rate": 4.5071987618949485e-07, "loss": 0.2119, "step": 50070 }, { "epoch": 0.9041485205006536, "grad_norm": 0.3934589624404907, "learning_rate": 4.4987835712501425e-07, "loss": 0.125, "step": 50075 }, { "epoch": 0.9042387999335544, "grad_norm": 0.31174585223197937, "learning_rate": 4.4903760629838477e-07, "loss": 0.2428, "step": 50080 }, { "epoch": 0.904329079366455, "grad_norm": 0.5034710168838501, "learning_rate": 4.4819762377723675e-07, "loss": 0.2239, "step": 50085 }, { "epoch": 0.9044193587993558, "grad_norm": 0.31654495000839233, "learning_rate": 4.473584096291428e-07, "loss": 0.2392, "step": 50090 }, { "epoch": 0.9045096382322565, "grad_norm": 0.3663698136806488, "learning_rate": 4.465199639216089e-07, "loss": 0.2065, "step": 50095 }, { "epoch": 0.9045999176651572, "grad_norm": 0.5760450959205627, "learning_rate": 4.4568228672208557e-07, "loss": 0.2567, "step": 50100 }, { "epoch": 0.9046901970980579, "grad_norm": 0.4471501111984253, "learning_rate": 4.448453780979556e-07, "loss": 0.2105, "step": 50105 }, { "epoch": 0.9047804765309586, "grad_norm": 0.4944366216659546, "learning_rate": 4.440092381165384e-07, "loss": 0.2441, "step": 50110 }, { "epoch": 0.9048707559638594, "grad_norm": 0.7945095300674438, "learning_rate": 4.431738668451036e-07, "loss": 0.2615, "step": 50115 }, { "epoch": 0.9049610353967601, "grad_norm": 0.30083099007606506, "learning_rate": 4.4233926435084396e-07, "loss": 0.2695, "step": 50120 }, { "epoch": 0.9050513148296607, "grad_norm": 0.3817719519138336, "learning_rate": 4.4150543070090037e-07, "loss": 0.2053, "step": 50125 }, { "epoch": 0.9051415942625615, "grad_norm": 0.6499032378196716, "learning_rate": 4.4067236596234795e-07, "loss": 0.3102, "step": 50130 }, { "epoch": 0.9052318736954622, "grad_norm": 0.7429965138435364, "learning_rate": 4.39840070202201e-07, "loss": 0.1659, "step": 50135 }, { "epoch": 0.905322153128363, "grad_norm": 0.5219694375991821, "learning_rate": 4.390085434874103e-07, "loss": 0.185, "step": 50140 }, { "epoch": 0.9054124325612636, "grad_norm": 0.8456925749778748, "learning_rate": 4.381777858848668e-07, "loss": 0.175, "step": 50145 }, { "epoch": 0.9055027119941643, "grad_norm": 0.6609681844711304, "learning_rate": 4.3734779746140043e-07, "loss": 0.2725, "step": 50150 }, { "epoch": 0.9055929914270651, "grad_norm": 0.3738050162792206, "learning_rate": 4.3651857828377665e-07, "loss": 0.2405, "step": 50155 }, { "epoch": 0.9056832708599658, "grad_norm": 0.44865942001342773, "learning_rate": 4.3569012841869987e-07, "loss": 0.1327, "step": 50160 }, { "epoch": 0.9057735502928664, "grad_norm": 0.4371652901172638, "learning_rate": 4.348624479328134e-07, "loss": 0.3304, "step": 50165 }, { "epoch": 0.9058638297257672, "grad_norm": 0.22302810847759247, "learning_rate": 4.340355368926963e-07, "loss": 0.1925, "step": 50170 }, { "epoch": 0.9059541091586679, "grad_norm": 0.35875651240348816, "learning_rate": 4.3320939536486974e-07, "loss": 0.2129, "step": 50175 }, { "epoch": 0.9060443885915687, "grad_norm": 0.26657238602638245, "learning_rate": 4.3238402341578946e-07, "loss": 0.1229, "step": 50180 }, { "epoch": 0.9061346680244693, "grad_norm": 0.5273004770278931, "learning_rate": 4.3155942111185234e-07, "loss": 0.2458, "step": 50185 }, { "epoch": 0.90622494745737, "grad_norm": 0.5499250292778015, "learning_rate": 4.307355885193909e-07, "loss": 0.1427, "step": 50190 }, { "epoch": 0.9063152268902708, "grad_norm": 0.29581502079963684, "learning_rate": 4.2991252570467436e-07, "loss": 0.2336, "step": 50195 }, { "epoch": 0.9064055063231715, "grad_norm": 0.4888015687465668, "learning_rate": 4.290902327339153e-07, "loss": 0.2174, "step": 50200 }, { "epoch": 0.9064957857560721, "grad_norm": 0.33837902545928955, "learning_rate": 4.282687096732585e-07, "loss": 0.2551, "step": 50205 }, { "epoch": 0.9065860651889729, "grad_norm": 0.36375921964645386, "learning_rate": 4.274479565887912e-07, "loss": 0.1728, "step": 50210 }, { "epoch": 0.9066763446218736, "grad_norm": 0.30941715836524963, "learning_rate": 4.2662797354653616e-07, "loss": 0.1609, "step": 50215 }, { "epoch": 0.9067666240547744, "grad_norm": 0.5000976324081421, "learning_rate": 4.2580876061245726e-07, "loss": 0.2272, "step": 50220 }, { "epoch": 0.906856903487675, "grad_norm": 0.4815863072872162, "learning_rate": 4.249903178524517e-07, "loss": 0.2128, "step": 50225 }, { "epoch": 0.9069471829205757, "grad_norm": 0.3845180869102478, "learning_rate": 4.24172645332358e-07, "loss": 0.2306, "step": 50230 }, { "epoch": 0.9070374623534765, "grad_norm": 0.4279397130012512, "learning_rate": 4.2335574311795137e-07, "loss": 0.3058, "step": 50235 }, { "epoch": 0.9071277417863772, "grad_norm": 0.43979766964912415, "learning_rate": 4.225396112749458e-07, "loss": 0.2091, "step": 50240 }, { "epoch": 0.9072180212192779, "grad_norm": 0.6820843815803528, "learning_rate": 4.217242498689944e-07, "loss": 0.2498, "step": 50245 }, { "epoch": 0.9073083006521786, "grad_norm": 0.62982177734375, "learning_rate": 4.209096589656869e-07, "loss": 0.2269, "step": 50250 }, { "epoch": 0.9073985800850793, "grad_norm": 0.4188978672027588, "learning_rate": 4.200958386305498e-07, "loss": 0.2913, "step": 50255 }, { "epoch": 0.9074888595179801, "grad_norm": 0.40333935618400574, "learning_rate": 4.192827889290507e-07, "loss": 0.1762, "step": 50260 }, { "epoch": 0.9075791389508807, "grad_norm": 0.8141732811927795, "learning_rate": 4.184705099265918e-07, "loss": 0.2268, "step": 50265 }, { "epoch": 0.9076694183837815, "grad_norm": 0.6344169974327087, "learning_rate": 4.1765900168851737e-07, "loss": 0.2308, "step": 50270 }, { "epoch": 0.9077596978166822, "grad_norm": 0.4170890748500824, "learning_rate": 4.1684826428010414e-07, "loss": 0.2973, "step": 50275 }, { "epoch": 0.9078499772495829, "grad_norm": 0.522654116153717, "learning_rate": 4.160382977665722e-07, "loss": 0.254, "step": 50280 }, { "epoch": 0.9079402566824836, "grad_norm": 0.4280318319797516, "learning_rate": 4.1522910221307724e-07, "loss": 0.2998, "step": 50285 }, { "epoch": 0.9080305361153843, "grad_norm": 0.5495306849479675, "learning_rate": 4.1442067768471263e-07, "loss": 0.1829, "step": 50290 }, { "epoch": 0.908120815548285, "grad_norm": 0.6517475843429565, "learning_rate": 4.1361302424651197e-07, "loss": 0.3552, "step": 50295 }, { "epoch": 0.9082110949811858, "grad_norm": 0.5685386061668396, "learning_rate": 4.1280614196344215e-07, "loss": 0.2625, "step": 50300 }, { "epoch": 0.9083013744140864, "grad_norm": 0.7493937611579895, "learning_rate": 4.1200003090041353e-07, "loss": 0.2862, "step": 50305 }, { "epoch": 0.9083916538469872, "grad_norm": 0.4021660387516022, "learning_rate": 4.111946911222697e-07, "loss": 0.2414, "step": 50310 }, { "epoch": 0.9084819332798879, "grad_norm": 0.5879800915718079, "learning_rate": 4.1039012269379783e-07, "loss": 0.1154, "step": 50315 }, { "epoch": 0.9085722127127887, "grad_norm": 0.3413461148738861, "learning_rate": 4.09586325679715e-07, "loss": 0.2379, "step": 50320 }, { "epoch": 0.9086624921456893, "grad_norm": 0.5342974066734314, "learning_rate": 4.08783300144685e-07, "loss": 0.1862, "step": 50325 }, { "epoch": 0.90875277157859, "grad_norm": 0.4676631689071655, "learning_rate": 4.079810461533029e-07, "loss": 0.1875, "step": 50330 }, { "epoch": 0.9088430510114908, "grad_norm": 0.3508830666542053, "learning_rate": 4.071795637701059e-07, "loss": 0.1795, "step": 50335 }, { "epoch": 0.9089333304443915, "grad_norm": 0.7108884453773499, "learning_rate": 4.063788530595647e-07, "loss": 0.3063, "step": 50340 }, { "epoch": 0.9090236098772922, "grad_norm": 0.18200215697288513, "learning_rate": 4.055789140860922e-07, "loss": 0.2763, "step": 50345 }, { "epoch": 0.9091138893101929, "grad_norm": 0.4111839532852173, "learning_rate": 4.047797469140391e-07, "loss": 0.1744, "step": 50350 }, { "epoch": 0.9092041687430936, "grad_norm": 0.4100380837917328, "learning_rate": 4.039813516076918e-07, "loss": 0.1646, "step": 50355 }, { "epoch": 0.9092944481759944, "grad_norm": 0.5537596940994263, "learning_rate": 4.031837282312734e-07, "loss": 0.2745, "step": 50360 }, { "epoch": 0.9093847276088951, "grad_norm": 0.7820747494697571, "learning_rate": 4.023868768489503e-07, "loss": 0.2231, "step": 50365 }, { "epoch": 0.9094750070417957, "grad_norm": 0.40522339940071106, "learning_rate": 4.015907975248201e-07, "loss": 0.2613, "step": 50370 }, { "epoch": 0.9095652864746965, "grad_norm": 0.4584490954875946, "learning_rate": 4.007954903229239e-07, "loss": 0.1947, "step": 50375 }, { "epoch": 0.9096555659075972, "grad_norm": 0.4288844168186188, "learning_rate": 4.00000955307237e-07, "loss": 0.201, "step": 50380 }, { "epoch": 0.909745845340498, "grad_norm": 0.6215588450431824, "learning_rate": 3.9920719254167515e-07, "loss": 0.2047, "step": 50385 }, { "epoch": 0.9098361247733986, "grad_norm": 0.25456854701042175, "learning_rate": 3.984142020900905e-07, "loss": 0.1589, "step": 50390 }, { "epoch": 0.9099264042062993, "grad_norm": 0.33527636528015137, "learning_rate": 3.9762198401627095e-07, "loss": 0.2378, "step": 50395 }, { "epoch": 0.9100166836392001, "grad_norm": 1.9328597784042358, "learning_rate": 3.968305383839499e-07, "loss": 0.2021, "step": 50400 }, { "epoch": 0.9101069630721008, "grad_norm": 0.6006541848182678, "learning_rate": 3.960398652567876e-07, "loss": 0.2291, "step": 50405 }, { "epoch": 0.9101972425050014, "grad_norm": 0.296835333108902, "learning_rate": 3.952499646983909e-07, "loss": 0.2481, "step": 50410 }, { "epoch": 0.9102875219379022, "grad_norm": 0.25313642621040344, "learning_rate": 3.944608367723013e-07, "loss": 0.2859, "step": 50415 }, { "epoch": 0.9103778013708029, "grad_norm": 0.3476182818412781, "learning_rate": 3.93672481541999e-07, "loss": 0.2489, "step": 50420 }, { "epoch": 0.9104680808037037, "grad_norm": 0.3778388798236847, "learning_rate": 3.928848990709011e-07, "loss": 0.1478, "step": 50425 }, { "epoch": 0.9105583602366043, "grad_norm": 0.3250458836555481, "learning_rate": 3.9209808942236247e-07, "loss": 0.1719, "step": 50430 }, { "epoch": 0.910648639669505, "grad_norm": 0.48391348123550415, "learning_rate": 3.913120526596759e-07, "loss": 0.1562, "step": 50435 }, { "epoch": 0.9107389191024058, "grad_norm": 0.6892566680908203, "learning_rate": 3.9052678884607176e-07, "loss": 0.2654, "step": 50440 }, { "epoch": 0.9108291985353065, "grad_norm": 0.7143829464912415, "learning_rate": 3.8974229804472076e-07, "loss": 0.2494, "step": 50445 }, { "epoch": 0.9109194779682072, "grad_norm": 0.28805986046791077, "learning_rate": 3.8895858031873013e-07, "loss": 0.2096, "step": 50450 }, { "epoch": 0.9110097574011079, "grad_norm": 0.7758084535598755, "learning_rate": 3.881756357311406e-07, "loss": 0.2978, "step": 50455 }, { "epoch": 0.9111000368340086, "grad_norm": 0.6385540962219238, "learning_rate": 3.873934643449373e-07, "loss": 0.3422, "step": 50460 }, { "epoch": 0.9111903162669094, "grad_norm": 0.4996768534183502, "learning_rate": 3.8661206622303883e-07, "loss": 0.1942, "step": 50465 }, { "epoch": 0.91128059569981, "grad_norm": 0.6436729431152344, "learning_rate": 3.8583144142830485e-07, "loss": 0.2363, "step": 50470 }, { "epoch": 0.9113708751327108, "grad_norm": 0.29036223888397217, "learning_rate": 3.8505159002352853e-07, "loss": 0.1936, "step": 50475 }, { "epoch": 0.9114611545656115, "grad_norm": 0.3660075068473816, "learning_rate": 3.842725120714441e-07, "loss": 0.2528, "step": 50480 }, { "epoch": 0.9115514339985122, "grad_norm": 0.8161829113960266, "learning_rate": 3.834942076347248e-07, "loss": 0.3586, "step": 50485 }, { "epoch": 0.9116417134314129, "grad_norm": 0.37517833709716797, "learning_rate": 3.8271667677597603e-07, "loss": 0.2175, "step": 50490 }, { "epoch": 0.9117319928643136, "grad_norm": 0.5656657814979553, "learning_rate": 3.819399195577467e-07, "loss": 0.1661, "step": 50495 }, { "epoch": 0.9118222722972144, "grad_norm": 0.6404244899749756, "learning_rate": 3.811639360425201e-07, "loss": 0.213, "step": 50500 }, { "epoch": 0.9119125517301151, "grad_norm": 0.6275199055671692, "learning_rate": 3.8038872629271973e-07, "loss": 0.2142, "step": 50505 }, { "epoch": 0.9120028311630157, "grad_norm": 0.5586143732070923, "learning_rate": 3.7961429037070455e-07, "loss": 0.251, "step": 50510 }, { "epoch": 0.9120931105959165, "grad_norm": 0.27319806814193726, "learning_rate": 3.7884062833877355e-07, "loss": 0.2309, "step": 50515 }, { "epoch": 0.9121833900288172, "grad_norm": 0.5357292890548706, "learning_rate": 3.780677402591615e-07, "loss": 0.2423, "step": 50520 }, { "epoch": 0.912273669461718, "grad_norm": 0.4977418780326843, "learning_rate": 3.7729562619404083e-07, "loss": 0.2505, "step": 50525 }, { "epoch": 0.9123639488946186, "grad_norm": 0.5652428865432739, "learning_rate": 3.7652428620552293e-07, "loss": 0.2298, "step": 50530 }, { "epoch": 0.9124542283275193, "grad_norm": 0.2480408400297165, "learning_rate": 3.757537203556583e-07, "loss": 0.2606, "step": 50535 }, { "epoch": 0.9125445077604201, "grad_norm": 0.5349669456481934, "learning_rate": 3.749839287064305e-07, "loss": 0.2329, "step": 50540 }, { "epoch": 0.9126347871933208, "grad_norm": 0.6430692672729492, "learning_rate": 3.742149113197657e-07, "loss": 0.2907, "step": 50545 }, { "epoch": 0.9127250666262214, "grad_norm": 0.47174084186553955, "learning_rate": 3.734466682575233e-07, "loss": 0.3091, "step": 50550 }, { "epoch": 0.9128153460591222, "grad_norm": 0.44207125902175903, "learning_rate": 3.726791995815071e-07, "loss": 0.2782, "step": 50555 }, { "epoch": 0.9129056254920229, "grad_norm": 0.7439970970153809, "learning_rate": 3.7191250535345227e-07, "loss": 0.3023, "step": 50560 }, { "epoch": 0.9129959049249237, "grad_norm": 0.4343739151954651, "learning_rate": 3.711465856350316e-07, "loss": 0.2131, "step": 50565 }, { "epoch": 0.9130861843578243, "grad_norm": 0.08801253139972687, "learning_rate": 3.7038144048785916e-07, "loss": 0.1739, "step": 50570 }, { "epoch": 0.913176463790725, "grad_norm": 0.3697124421596527, "learning_rate": 3.696170699734858e-07, "loss": 0.1934, "step": 50575 }, { "epoch": 0.9132667432236258, "grad_norm": 0.8626468181610107, "learning_rate": 3.6885347415340113e-07, "loss": 0.3168, "step": 50580 }, { "epoch": 0.9133570226565265, "grad_norm": 0.5063561797142029, "learning_rate": 3.6809065308902824e-07, "loss": 0.1698, "step": 50585 }, { "epoch": 0.9134473020894271, "grad_norm": 0.5662740468978882, "learning_rate": 3.6732860684173255e-07, "loss": 0.1597, "step": 50590 }, { "epoch": 0.9135375815223279, "grad_norm": 0.4462043046951294, "learning_rate": 3.6656733547281273e-07, "loss": 0.292, "step": 50595 }, { "epoch": 0.9136278609552286, "grad_norm": 0.7503654360771179, "learning_rate": 3.658068390435099e-07, "loss": 0.2067, "step": 50600 }, { "epoch": 0.9137181403881294, "grad_norm": 0.3239396810531616, "learning_rate": 3.6504711761499947e-07, "loss": 0.2041, "step": 50605 }, { "epoch": 0.91380841982103, "grad_norm": 0.4279657006263733, "learning_rate": 3.6428817124839477e-07, "loss": 0.2585, "step": 50610 }, { "epoch": 0.9138986992539307, "grad_norm": 0.5558903813362122, "learning_rate": 3.635300000047481e-07, "loss": 0.2321, "step": 50615 }, { "epoch": 0.9139889786868315, "grad_norm": 0.5142278671264648, "learning_rate": 3.627726039450508e-07, "loss": 0.2074, "step": 50620 }, { "epoch": 0.9140792581197322, "grad_norm": 0.5785174369812012, "learning_rate": 3.620159831302261e-07, "loss": 0.2398, "step": 50625 }, { "epoch": 0.9141695375526329, "grad_norm": 0.4012088477611542, "learning_rate": 3.612601376211422e-07, "loss": 0.2587, "step": 50630 }, { "epoch": 0.9142598169855336, "grad_norm": 0.3788507282733917, "learning_rate": 3.605050674785993e-07, "loss": 0.2198, "step": 50635 }, { "epoch": 0.9143500964184343, "grad_norm": 0.6386476755142212, "learning_rate": 3.597507727633365e-07, "loss": 0.2315, "step": 50640 }, { "epoch": 0.9144403758513351, "grad_norm": 0.5969813466072083, "learning_rate": 3.589972535360342e-07, "loss": 0.1424, "step": 50645 }, { "epoch": 0.9145306552842357, "grad_norm": 0.535224437713623, "learning_rate": 3.582445098573062e-07, "loss": 0.2146, "step": 50650 }, { "epoch": 0.9146209347171365, "grad_norm": 0.46723034977912903, "learning_rate": 3.574925417877051e-07, "loss": 0.214, "step": 50655 }, { "epoch": 0.9147112141500372, "grad_norm": 0.2729646563529968, "learning_rate": 3.567413493877203e-07, "loss": 0.2081, "step": 50660 }, { "epoch": 0.9148014935829379, "grad_norm": 0.6797147989273071, "learning_rate": 3.5599093271778127e-07, "loss": 0.2154, "step": 50665 }, { "epoch": 0.9148917730158386, "grad_norm": 0.5753675699234009, "learning_rate": 3.5524129183825194e-07, "loss": 0.2217, "step": 50670 }, { "epoch": 0.9149820524487393, "grad_norm": 0.27585065364837646, "learning_rate": 3.544924268094374e-07, "loss": 0.1983, "step": 50675 }, { "epoch": 0.91507233188164, "grad_norm": 0.592746376991272, "learning_rate": 3.5374433769157614e-07, "loss": 0.236, "step": 50680 }, { "epoch": 0.9151626113145408, "grad_norm": 0.4515919089317322, "learning_rate": 3.5299702454485e-07, "loss": 0.2203, "step": 50685 }, { "epoch": 0.9152528907474414, "grad_norm": 0.396528422832489, "learning_rate": 3.5225048742937197e-07, "loss": 0.2219, "step": 50690 }, { "epoch": 0.9153431701803422, "grad_norm": 0.40932905673980713, "learning_rate": 3.515047264051963e-07, "loss": 0.0992, "step": 50695 }, { "epoch": 0.9154334496132429, "grad_norm": 0.7595083713531494, "learning_rate": 3.5075974153231384e-07, "loss": 0.168, "step": 50700 }, { "epoch": 0.9155237290461437, "grad_norm": 0.3031374216079712, "learning_rate": 3.5001553287065225e-07, "loss": 0.1691, "step": 50705 }, { "epoch": 0.9156140084790443, "grad_norm": 0.43266284465789795, "learning_rate": 3.492721004800803e-07, "loss": 0.1986, "step": 50710 }, { "epoch": 0.915704287911945, "grad_norm": 0.8472728133201599, "learning_rate": 3.4852944442040014e-07, "loss": 0.2709, "step": 50715 }, { "epoch": 0.9157945673448458, "grad_norm": 0.9106569886207581, "learning_rate": 3.4778756475135287e-07, "loss": 0.1658, "step": 50720 }, { "epoch": 0.9158848467777465, "grad_norm": 0.6724917888641357, "learning_rate": 3.4704646153261855e-07, "loss": 0.1571, "step": 50725 }, { "epoch": 0.9159751262106473, "grad_norm": 0.2121821939945221, "learning_rate": 3.463061348238117e-07, "loss": 0.224, "step": 50730 }, { "epoch": 0.9160654056435479, "grad_norm": 0.5543504357337952, "learning_rate": 3.45566584684488e-07, "loss": 0.2384, "step": 50735 }, { "epoch": 0.9161556850764486, "grad_norm": 0.5977898240089417, "learning_rate": 3.4482781117413764e-07, "loss": 0.2982, "step": 50740 }, { "epoch": 0.9162459645093494, "grad_norm": 0.618023157119751, "learning_rate": 3.440898143521898e-07, "loss": 0.1717, "step": 50745 }, { "epoch": 0.9163362439422501, "grad_norm": 0.4433342218399048, "learning_rate": 3.4335259427801135e-07, "loss": 0.1922, "step": 50750 }, { "epoch": 0.9164265233751507, "grad_norm": 0.38910043239593506, "learning_rate": 3.4261615101090607e-07, "loss": 0.2132, "step": 50755 }, { "epoch": 0.9165168028080515, "grad_norm": 1.1365541219711304, "learning_rate": 3.4188048461011534e-07, "loss": 0.3055, "step": 50760 }, { "epoch": 0.9166070822409522, "grad_norm": 0.19863960146903992, "learning_rate": 3.411455951348186e-07, "loss": 0.1518, "step": 50765 }, { "epoch": 0.916697361673853, "grad_norm": 0.3658634424209595, "learning_rate": 3.404114826441307e-07, "loss": 0.1368, "step": 50770 }, { "epoch": 0.9167876411067536, "grad_norm": 0.3282088041305542, "learning_rate": 3.396781471971078e-07, "loss": 0.3235, "step": 50775 }, { "epoch": 0.9168779205396543, "grad_norm": 0.3839718997478485, "learning_rate": 3.389455888527415e-07, "loss": 0.2708, "step": 50780 }, { "epoch": 0.9169681999725551, "grad_norm": 0.34831714630126953, "learning_rate": 3.382138076699593e-07, "loss": 0.1988, "step": 50785 }, { "epoch": 0.9170584794054558, "grad_norm": 0.26568031311035156, "learning_rate": 3.374828037076294e-07, "loss": 0.2896, "step": 50790 }, { "epoch": 0.9171487588383564, "grad_norm": 0.3606593906879425, "learning_rate": 3.3675257702455276e-07, "loss": 0.2307, "step": 50795 }, { "epoch": 0.9172390382712572, "grad_norm": 0.4034231901168823, "learning_rate": 3.3602312767947456e-07, "loss": 0.2867, "step": 50800 }, { "epoch": 0.9173293177041579, "grad_norm": 0.16336488723754883, "learning_rate": 3.3529445573107e-07, "loss": 0.1683, "step": 50805 }, { "epoch": 0.9174195971370587, "grad_norm": 0.357759028673172, "learning_rate": 3.3456656123795784e-07, "loss": 0.1406, "step": 50810 }, { "epoch": 0.9175098765699593, "grad_norm": 0.25141188502311707, "learning_rate": 3.338394442586912e-07, "loss": 0.1415, "step": 50815 }, { "epoch": 0.91760015600286, "grad_norm": 0.31993165612220764, "learning_rate": 3.331131048517622e-07, "loss": 0.2068, "step": 50820 }, { "epoch": 0.9176904354357608, "grad_norm": 0.7054659724235535, "learning_rate": 3.323875430755985e-07, "loss": 0.2548, "step": 50825 }, { "epoch": 0.9177807148686615, "grad_norm": 0.5586223602294922, "learning_rate": 3.316627589885668e-07, "loss": 0.2769, "step": 50830 }, { "epoch": 0.9178709943015622, "grad_norm": 0.4388918876647949, "learning_rate": 3.309387526489705e-07, "loss": 0.3059, "step": 50835 }, { "epoch": 0.9179612737344629, "grad_norm": 1.2424386739730835, "learning_rate": 3.3021552411504954e-07, "loss": 0.1947, "step": 50840 }, { "epoch": 0.9180515531673636, "grad_norm": 0.40078264474868774, "learning_rate": 3.2949307344498526e-07, "loss": 0.2116, "step": 50845 }, { "epoch": 0.9181418326002644, "grad_norm": 0.635747492313385, "learning_rate": 3.2877140069689116e-07, "loss": 0.1659, "step": 50850 }, { "epoch": 0.918232112033165, "grad_norm": 0.36611708998680115, "learning_rate": 3.2805050592882193e-07, "loss": 0.2203, "step": 50855 }, { "epoch": 0.9183223914660658, "grad_norm": 0.3921454846858978, "learning_rate": 3.273303891987667e-07, "loss": 0.1992, "step": 50860 }, { "epoch": 0.9184126708989665, "grad_norm": 0.4269610047340393, "learning_rate": 3.266110505646558e-07, "loss": 0.206, "step": 50865 }, { "epoch": 0.9185029503318672, "grad_norm": 0.34178775548934937, "learning_rate": 3.258924900843519e-07, "loss": 0.2422, "step": 50870 }, { "epoch": 0.9185932297647679, "grad_norm": 0.6991662979125977, "learning_rate": 3.251747078156597e-07, "loss": 0.2438, "step": 50875 }, { "epoch": 0.9186835091976686, "grad_norm": 0.4647800326347351, "learning_rate": 3.244577038163199e-07, "loss": 0.2393, "step": 50880 }, { "epoch": 0.9187737886305694, "grad_norm": 0.4164331555366516, "learning_rate": 3.237414781440107e-07, "loss": 0.1718, "step": 50885 }, { "epoch": 0.9188640680634701, "grad_norm": 0.4060598313808441, "learning_rate": 3.2302603085634486e-07, "loss": 0.2006, "step": 50890 }, { "epoch": 0.9189543474963707, "grad_norm": 0.41936469078063965, "learning_rate": 3.2231136201087754e-07, "loss": 0.1816, "step": 50895 }, { "epoch": 0.9190446269292715, "grad_norm": 0.6043384671211243, "learning_rate": 3.2159747166509716e-07, "loss": 0.3408, "step": 50900 }, { "epoch": 0.9191349063621722, "grad_norm": 0.4837489426136017, "learning_rate": 3.2088435987643106e-07, "loss": 0.2979, "step": 50905 }, { "epoch": 0.919225185795073, "grad_norm": 0.46801280975341797, "learning_rate": 3.2017202670224455e-07, "loss": 0.2624, "step": 50910 }, { "epoch": 0.9193154652279736, "grad_norm": 0.4052257537841797, "learning_rate": 3.194604721998396e-07, "loss": 0.309, "step": 50915 }, { "epoch": 0.9194057446608743, "grad_norm": 0.37288016080856323, "learning_rate": 3.187496964264547e-07, "loss": 0.2381, "step": 50920 }, { "epoch": 0.9194960240937751, "grad_norm": 0.45037224888801575, "learning_rate": 3.180396994392687e-07, "loss": 0.2332, "step": 50925 }, { "epoch": 0.9195863035266758, "grad_norm": 0.40705618262290955, "learning_rate": 3.173304812953937e-07, "loss": 0.185, "step": 50930 }, { "epoch": 0.9196765829595764, "grad_norm": 1.0847456455230713, "learning_rate": 3.1662204205188064e-07, "loss": 0.3054, "step": 50935 }, { "epoch": 0.9197668623924772, "grad_norm": 0.35280418395996094, "learning_rate": 3.159143817657195e-07, "loss": 0.1544, "step": 50940 }, { "epoch": 0.9198571418253779, "grad_norm": 0.5095433592796326, "learning_rate": 3.152075004938371e-07, "loss": 0.1899, "step": 50945 }, { "epoch": 0.9199474212582787, "grad_norm": 0.177003413438797, "learning_rate": 3.145013982930956e-07, "loss": 0.1124, "step": 50950 }, { "epoch": 0.9200377006911793, "grad_norm": 0.5592557787895203, "learning_rate": 3.137960752202962e-07, "loss": 0.1869, "step": 50955 }, { "epoch": 0.92012798012408, "grad_norm": 0.4516811966896057, "learning_rate": 3.1309153133217805e-07, "loss": 0.31, "step": 50960 }, { "epoch": 0.9202182595569808, "grad_norm": 0.35320770740509033, "learning_rate": 3.123877666854136e-07, "loss": 0.2033, "step": 50965 }, { "epoch": 0.9203085389898815, "grad_norm": 0.48333561420440674, "learning_rate": 3.116847813366186e-07, "loss": 0.2591, "step": 50970 }, { "epoch": 0.9203988184227822, "grad_norm": 0.4667608141899109, "learning_rate": 3.109825753423412e-07, "loss": 0.3112, "step": 50975 }, { "epoch": 0.9204890978556829, "grad_norm": 0.4377197325229645, "learning_rate": 3.1028114875907067e-07, "loss": 0.2024, "step": 50980 }, { "epoch": 0.9205793772885836, "grad_norm": 0.23441138863563538, "learning_rate": 3.0958050164323075e-07, "loss": 0.1782, "step": 50985 }, { "epoch": 0.9206696567214844, "grad_norm": 0.5151461362838745, "learning_rate": 3.0888063405118294e-07, "loss": 0.2156, "step": 50990 }, { "epoch": 0.920759936154385, "grad_norm": 0.7904641032218933, "learning_rate": 3.081815460392268e-07, "loss": 0.1485, "step": 50995 }, { "epoch": 0.9208502155872857, "grad_norm": 0.428873747587204, "learning_rate": 3.0748323766360054e-07, "loss": 0.2196, "step": 51000 }, { "epoch": 0.9209404950201865, "grad_norm": 0.29614976048469543, "learning_rate": 3.067857089804738e-07, "loss": 0.1931, "step": 51005 }, { "epoch": 0.9210307744530872, "grad_norm": 0.6210939288139343, "learning_rate": 3.0608896004596154e-07, "loss": 0.2243, "step": 51010 }, { "epoch": 0.9211210538859879, "grad_norm": 0.33381879329681396, "learning_rate": 3.053929909161102e-07, "loss": 0.2125, "step": 51015 }, { "epoch": 0.9212113333188886, "grad_norm": 0.44750410318374634, "learning_rate": 3.046978016469071e-07, "loss": 0.1754, "step": 51020 }, { "epoch": 0.9213016127517893, "grad_norm": 0.6163250207901001, "learning_rate": 3.0400339229427533e-07, "loss": 0.2049, "step": 51025 }, { "epoch": 0.9213918921846901, "grad_norm": 0.5448737740516663, "learning_rate": 3.0330976291407133e-07, "loss": 0.1916, "step": 51030 }, { "epoch": 0.9214821716175907, "grad_norm": 0.45817968249320984, "learning_rate": 3.02616913562096e-07, "loss": 0.1981, "step": 51035 }, { "epoch": 0.9215724510504915, "grad_norm": 0.3027898371219635, "learning_rate": 3.0192484429408363e-07, "loss": 0.2708, "step": 51040 }, { "epoch": 0.9216627304833922, "grad_norm": 0.43227240443229675, "learning_rate": 3.012335551657053e-07, "loss": 0.1804, "step": 51045 }, { "epoch": 0.9217530099162929, "grad_norm": 0.44979313015937805, "learning_rate": 3.005430462325709e-07, "loss": 0.2141, "step": 51050 }, { "epoch": 0.9218432893491936, "grad_norm": 0.19083057343959808, "learning_rate": 2.998533175502272e-07, "loss": 0.1663, "step": 51055 }, { "epoch": 0.9219335687820943, "grad_norm": 0.3405175805091858, "learning_rate": 2.991643691741564e-07, "loss": 0.2445, "step": 51060 }, { "epoch": 0.9220238482149951, "grad_norm": 0.4007622003555298, "learning_rate": 2.984762011597808e-07, "loss": 0.2264, "step": 51065 }, { "epoch": 0.9221141276478958, "grad_norm": 0.6208809614181519, "learning_rate": 2.9778881356245736e-07, "loss": 0.2062, "step": 51070 }, { "epoch": 0.9222044070807964, "grad_norm": 0.48946911096572876, "learning_rate": 2.9710220643748176e-07, "loss": 0.2162, "step": 51075 }, { "epoch": 0.9222946865136972, "grad_norm": 0.2105833739042282, "learning_rate": 2.9641637984008653e-07, "loss": 0.1951, "step": 51080 }, { "epoch": 0.9223849659465979, "grad_norm": 0.41803836822509766, "learning_rate": 2.9573133382544306e-07, "loss": 0.2241, "step": 51085 }, { "epoch": 0.9224752453794987, "grad_norm": 0.3910962641239166, "learning_rate": 2.95047068448655e-07, "loss": 0.2617, "step": 51090 }, { "epoch": 0.9225655248123993, "grad_norm": 0.1976976990699768, "learning_rate": 2.943635837647707e-07, "loss": 0.2, "step": 51095 }, { "epoch": 0.9226558042453, "grad_norm": 0.3609817922115326, "learning_rate": 2.9368087982876825e-07, "loss": 0.2004, "step": 51100 }, { "epoch": 0.9227460836782008, "grad_norm": 0.31081682443618774, "learning_rate": 2.92998956695566e-07, "loss": 0.2911, "step": 51105 }, { "epoch": 0.9228363631111015, "grad_norm": 0.32722151279449463, "learning_rate": 2.923178144200223e-07, "loss": 0.2769, "step": 51110 }, { "epoch": 0.9229266425440021, "grad_norm": 0.27847838401794434, "learning_rate": 2.9163745305692883e-07, "loss": 0.1281, "step": 51115 }, { "epoch": 0.9230169219769029, "grad_norm": 0.6383860111236572, "learning_rate": 2.9095787266101627e-07, "loss": 0.2101, "step": 51120 }, { "epoch": 0.9231072014098036, "grad_norm": 0.37489333748817444, "learning_rate": 2.9027907328694983e-07, "loss": 0.2812, "step": 51125 }, { "epoch": 0.9231974808427044, "grad_norm": 0.3819452226161957, "learning_rate": 2.896010549893369e-07, "loss": 0.2206, "step": 51130 }, { "epoch": 0.9232877602756051, "grad_norm": 0.42919087409973145, "learning_rate": 2.8892381782271606e-07, "loss": 0.2742, "step": 51135 }, { "epoch": 0.9233780397085057, "grad_norm": 0.32791903614997864, "learning_rate": 2.882473618415682e-07, "loss": 0.1806, "step": 51140 }, { "epoch": 0.9234683191414065, "grad_norm": 0.45618417859077454, "learning_rate": 2.875716871003087e-07, "loss": 0.274, "step": 51145 }, { "epoch": 0.9235585985743072, "grad_norm": 0.5508531928062439, "learning_rate": 2.868967936532918e-07, "loss": 0.1669, "step": 51150 }, { "epoch": 0.923648878007208, "grad_norm": 0.5229293704032898, "learning_rate": 2.862226815548064e-07, "loss": 0.304, "step": 51155 }, { "epoch": 0.9237391574401086, "grad_norm": 0.5950000286102295, "learning_rate": 2.8554935085908007e-07, "loss": 0.1958, "step": 51160 }, { "epoch": 0.9238294368730093, "grad_norm": 0.630179226398468, "learning_rate": 2.8487680162027745e-07, "loss": 0.2085, "step": 51165 }, { "epoch": 0.9239197163059101, "grad_norm": 0.5596440434455872, "learning_rate": 2.842050338924995e-07, "loss": 0.1888, "step": 51170 }, { "epoch": 0.9240099957388108, "grad_norm": 0.6010776162147522, "learning_rate": 2.8353404772978656e-07, "loss": 0.3384, "step": 51175 }, { "epoch": 0.9241002751717114, "grad_norm": 0.46091702580451965, "learning_rate": 2.8286384318611526e-07, "loss": 0.1176, "step": 51180 }, { "epoch": 0.9241905546046122, "grad_norm": 0.6372133493423462, "learning_rate": 2.821944203153959e-07, "loss": 0.1736, "step": 51185 }, { "epoch": 0.9242808340375129, "grad_norm": 0.3395882844924927, "learning_rate": 2.8152577917148203e-07, "loss": 0.287, "step": 51190 }, { "epoch": 0.9243711134704137, "grad_norm": 0.5429771542549133, "learning_rate": 2.8085791980815626e-07, "loss": 0.2081, "step": 51195 }, { "epoch": 0.9244613929033143, "grad_norm": 0.5102635025978088, "learning_rate": 2.801908422791488e-07, "loss": 0.1562, "step": 51200 }, { "epoch": 0.924551672336215, "grad_norm": 0.4636192321777344, "learning_rate": 2.795245466381158e-07, "loss": 0.2662, "step": 51205 }, { "epoch": 0.9246419517691158, "grad_norm": 0.21078205108642578, "learning_rate": 2.788590329386587e-07, "loss": 0.1973, "step": 51210 }, { "epoch": 0.9247322312020165, "grad_norm": 0.42441755533218384, "learning_rate": 2.781943012343147e-07, "loss": 0.2438, "step": 51215 }, { "epoch": 0.9248225106349172, "grad_norm": 0.16268227994441986, "learning_rate": 2.7753035157855323e-07, "loss": 0.15, "step": 51220 }, { "epoch": 0.9249127900678179, "grad_norm": 0.3093563914299011, "learning_rate": 2.7686718402478606e-07, "loss": 0.185, "step": 51225 }, { "epoch": 0.9250030695007186, "grad_norm": 0.4721256494522095, "learning_rate": 2.762047986263594e-07, "loss": 0.2026, "step": 51230 }, { "epoch": 0.9250933489336194, "grad_norm": 0.5277528762817383, "learning_rate": 2.755431954365584e-07, "loss": 0.3178, "step": 51235 }, { "epoch": 0.92518362836652, "grad_norm": 0.5060920119285583, "learning_rate": 2.7488237450860376e-07, "loss": 0.2432, "step": 51240 }, { "epoch": 0.9252739077994208, "grad_norm": 0.5695164799690247, "learning_rate": 2.7422233589565415e-07, "loss": 0.1769, "step": 51245 }, { "epoch": 0.9253641872323215, "grad_norm": 0.5778248906135559, "learning_rate": 2.7356307965080376e-07, "loss": 0.2684, "step": 51250 }, { "epoch": 0.9254544666652222, "grad_norm": 0.5844367146492004, "learning_rate": 2.729046058270857e-07, "loss": 0.2107, "step": 51255 }, { "epoch": 0.9255447460981229, "grad_norm": 0.7740316987037659, "learning_rate": 2.7224691447746866e-07, "loss": 0.235, "step": 51260 }, { "epoch": 0.9256350255310236, "grad_norm": 0.32647475600242615, "learning_rate": 2.715900056548615e-07, "loss": 0.2859, "step": 51265 }, { "epoch": 0.9257253049639244, "grad_norm": 0.5408141016960144, "learning_rate": 2.7093387941210413e-07, "loss": 0.3178, "step": 51270 }, { "epoch": 0.9258155843968251, "grad_norm": 0.42315220832824707, "learning_rate": 2.7027853580197994e-07, "loss": 0.1929, "step": 51275 }, { "epoch": 0.9259058638297257, "grad_norm": 0.6220344305038452, "learning_rate": 2.696239748772045e-07, "loss": 0.206, "step": 51280 }, { "epoch": 0.9259961432626265, "grad_norm": 0.408003032207489, "learning_rate": 2.689701966904357e-07, "loss": 0.1913, "step": 51285 }, { "epoch": 0.9260864226955272, "grad_norm": 0.5364543199539185, "learning_rate": 2.6831720129426144e-07, "loss": 0.2675, "step": 51290 }, { "epoch": 0.926176702128428, "grad_norm": 0.344568133354187, "learning_rate": 2.6766498874121306e-07, "loss": 0.2519, "step": 51295 }, { "epoch": 0.9262669815613286, "grad_norm": 0.5036171078681946, "learning_rate": 2.6701355908375525e-07, "loss": 0.1585, "step": 51300 }, { "epoch": 0.9263572609942293, "grad_norm": 0.6221877336502075, "learning_rate": 2.6636291237428945e-07, "loss": 0.2354, "step": 51305 }, { "epoch": 0.9264475404271301, "grad_norm": 0.48638632893562317, "learning_rate": 2.6571304866515936e-07, "loss": 0.2335, "step": 51310 }, { "epoch": 0.9265378198600308, "grad_norm": 0.5129445791244507, "learning_rate": 2.650639680086375e-07, "loss": 0.2657, "step": 51315 }, { "epoch": 0.9266280992929314, "grad_norm": 0.39693352580070496, "learning_rate": 2.644156704569412e-07, "loss": 0.176, "step": 51320 }, { "epoch": 0.9267183787258322, "grad_norm": 0.3678065836429596, "learning_rate": 2.637681560622185e-07, "loss": 0.344, "step": 51325 }, { "epoch": 0.9268086581587329, "grad_norm": 0.18631500005722046, "learning_rate": 2.631214248765601e-07, "loss": 0.243, "step": 51330 }, { "epoch": 0.9268989375916337, "grad_norm": 0.47880470752716064, "learning_rate": 2.624754769519866e-07, "loss": 0.1966, "step": 51335 }, { "epoch": 0.9269892170245343, "grad_norm": 0.2807424068450928, "learning_rate": 2.618303123404642e-07, "loss": 0.3082, "step": 51340 }, { "epoch": 0.927079496457435, "grad_norm": 0.8420343399047852, "learning_rate": 2.61185931093888e-07, "loss": 0.209, "step": 51345 }, { "epoch": 0.9271697758903358, "grad_norm": 0.34128251671791077, "learning_rate": 2.6054233326409775e-07, "loss": 0.2254, "step": 51350 }, { "epoch": 0.9272600553232365, "grad_norm": 0.35374507308006287, "learning_rate": 2.5989951890286305e-07, "loss": 0.2736, "step": 51355 }, { "epoch": 0.9273503347561372, "grad_norm": 0.5286627411842346, "learning_rate": 2.5925748806189476e-07, "loss": 0.1907, "step": 51360 }, { "epoch": 0.9274406141890379, "grad_norm": 0.37487369775772095, "learning_rate": 2.5861624079283945e-07, "loss": 0.1802, "step": 51365 }, { "epoch": 0.9275308936219386, "grad_norm": 0.3012831509113312, "learning_rate": 2.5797577714728015e-07, "loss": 0.1892, "step": 51370 }, { "epoch": 0.9276211730548394, "grad_norm": 0.38257500529289246, "learning_rate": 2.573360971767391e-07, "loss": 0.2031, "step": 51375 }, { "epoch": 0.92771145248774, "grad_norm": 0.2673783004283905, "learning_rate": 2.5669720093267404e-07, "loss": 0.1802, "step": 51380 }, { "epoch": 0.9278017319206407, "grad_norm": 0.3368590474128723, "learning_rate": 2.56059088466476e-07, "loss": 0.247, "step": 51385 }, { "epoch": 0.9278920113535415, "grad_norm": 0.2542653977870941, "learning_rate": 2.554217598294817e-07, "loss": 0.3199, "step": 51390 }, { "epoch": 0.9279822907864422, "grad_norm": 0.20993247628211975, "learning_rate": 2.547852150729557e-07, "loss": 0.2204, "step": 51395 }, { "epoch": 0.9280725702193429, "grad_norm": 0.5144657492637634, "learning_rate": 2.541494542481049e-07, "loss": 0.1955, "step": 51400 }, { "epoch": 0.9281628496522436, "grad_norm": 0.3926786780357361, "learning_rate": 2.535144774060705e-07, "loss": 0.1924, "step": 51405 }, { "epoch": 0.9282531290851443, "grad_norm": 0.47530850768089294, "learning_rate": 2.5288028459793167e-07, "loss": 0.2114, "step": 51410 }, { "epoch": 0.9283434085180451, "grad_norm": 0.6984689831733704, "learning_rate": 2.522468758747076e-07, "loss": 0.2636, "step": 51415 }, { "epoch": 0.9284336879509457, "grad_norm": 0.25490066409111023, "learning_rate": 2.5161425128734876e-07, "loss": 0.1891, "step": 51420 }, { "epoch": 0.9285239673838465, "grad_norm": 0.16208931803703308, "learning_rate": 2.5098241088674645e-07, "loss": 0.2138, "step": 51425 }, { "epoch": 0.9286142468167472, "grad_norm": 0.5864415168762207, "learning_rate": 2.5035135472372576e-07, "loss": 0.2718, "step": 51430 }, { "epoch": 0.9287045262496479, "grad_norm": 0.49469009041786194, "learning_rate": 2.497210828490515e-07, "loss": 0.27, "step": 51435 }, { "epoch": 0.9287948056825486, "grad_norm": 1.019500494003296, "learning_rate": 2.490915953134254e-07, "loss": 0.2049, "step": 51440 }, { "epoch": 0.9288850851154493, "grad_norm": 0.3572063446044922, "learning_rate": 2.484628921674848e-07, "loss": 0.2085, "step": 51445 }, { "epoch": 0.9289753645483501, "grad_norm": 0.36977294087409973, "learning_rate": 2.4783497346180353e-07, "loss": 0.2381, "step": 51450 }, { "epoch": 0.9290656439812508, "grad_norm": 0.4619942605495453, "learning_rate": 2.4720783924689464e-07, "loss": 0.1716, "step": 51455 }, { "epoch": 0.9291559234141514, "grad_norm": 0.338832288980484, "learning_rate": 2.4658148957320327e-07, "loss": 0.2055, "step": 51460 }, { "epoch": 0.9292462028470522, "grad_norm": 0.6222464442253113, "learning_rate": 2.459559244911192e-07, "loss": 0.1892, "step": 51465 }, { "epoch": 0.9293364822799529, "grad_norm": 0.6086744666099548, "learning_rate": 2.4533114405095984e-07, "loss": 0.2002, "step": 51470 }, { "epoch": 0.9294267617128537, "grad_norm": 0.729422390460968, "learning_rate": 2.4470714830298835e-07, "loss": 0.1867, "step": 51475 }, { "epoch": 0.9295170411457543, "grad_norm": 0.1963096708059311, "learning_rate": 2.440839372973991e-07, "loss": 0.2713, "step": 51480 }, { "epoch": 0.929607320578655, "grad_norm": 0.7741760015487671, "learning_rate": 2.4346151108432415e-07, "loss": 0.1345, "step": 51485 }, { "epoch": 0.9296976000115558, "grad_norm": 0.39452943205833435, "learning_rate": 2.4283986971383455e-07, "loss": 0.2173, "step": 51490 }, { "epoch": 0.9297878794444565, "grad_norm": 0.6194738745689392, "learning_rate": 2.4221901323593475e-07, "loss": 0.1802, "step": 51495 }, { "epoch": 0.9298781588773571, "grad_norm": 0.28373393416404724, "learning_rate": 2.415989417005693e-07, "loss": 0.2472, "step": 51500 }, { "epoch": 0.9299684383102579, "grad_norm": 0.29273226857185364, "learning_rate": 2.4097965515761933e-07, "loss": 0.2798, "step": 51505 }, { "epoch": 0.9300587177431586, "grad_norm": 0.33628299832344055, "learning_rate": 2.403611536569017e-07, "loss": 0.2241, "step": 51510 }, { "epoch": 0.9301489971760594, "grad_norm": 0.57901531457901, "learning_rate": 2.397434372481688e-07, "loss": 0.2853, "step": 51515 }, { "epoch": 0.9302392766089601, "grad_norm": 0.3291673958301544, "learning_rate": 2.391265059811132e-07, "loss": 0.1768, "step": 51520 }, { "epoch": 0.9303295560418607, "grad_norm": 0.48461225628852844, "learning_rate": 2.3851035990536066e-07, "loss": 0.2379, "step": 51525 }, { "epoch": 0.9304198354747615, "grad_norm": 0.4078727066516876, "learning_rate": 2.378949990704782e-07, "loss": 0.1943, "step": 51530 }, { "epoch": 0.9305101149076622, "grad_norm": 0.24602115154266357, "learning_rate": 2.3728042352596404e-07, "loss": 0.2172, "step": 51535 }, { "epoch": 0.930600394340563, "grad_norm": 0.34291091561317444, "learning_rate": 2.3666663332125972e-07, "loss": 0.1879, "step": 51540 }, { "epoch": 0.9306906737734636, "grad_norm": 0.5660107135772705, "learning_rate": 2.3605362850573688e-07, "loss": 0.1819, "step": 51545 }, { "epoch": 0.9307809532063643, "grad_norm": 0.5777392387390137, "learning_rate": 2.354414091287105e-07, "loss": 0.1868, "step": 51550 }, { "epoch": 0.9308712326392651, "grad_norm": 0.3865366578102112, "learning_rate": 2.3482997523942673e-07, "loss": 0.1685, "step": 51555 }, { "epoch": 0.9309615120721658, "grad_norm": 0.19842804968357086, "learning_rate": 2.3421932688707183e-07, "loss": 0.2207, "step": 51560 }, { "epoch": 0.9310517915050665, "grad_norm": 0.5594784021377563, "learning_rate": 2.3360946412076758e-07, "loss": 0.1626, "step": 51565 }, { "epoch": 0.9311420709379672, "grad_norm": 0.4227134883403778, "learning_rate": 2.3300038698957362e-07, "loss": 0.2835, "step": 51570 }, { "epoch": 0.9312323503708679, "grad_norm": 0.3799756169319153, "learning_rate": 2.323920955424863e-07, "loss": 0.1396, "step": 51575 }, { "epoch": 0.9313226298037687, "grad_norm": 0.3990386128425598, "learning_rate": 2.3178458982843655e-07, "loss": 0.1872, "step": 51580 }, { "epoch": 0.9314129092366693, "grad_norm": 0.24942395091056824, "learning_rate": 2.311778698962963e-07, "loss": 0.235, "step": 51585 }, { "epoch": 0.93150318866957, "grad_norm": 0.31736132502555847, "learning_rate": 2.305719357948688e-07, "loss": 0.1348, "step": 51590 }, { "epoch": 0.9315934681024708, "grad_norm": 0.41779983043670654, "learning_rate": 2.2996678757289836e-07, "loss": 0.2574, "step": 51595 }, { "epoch": 0.9316837475353715, "grad_norm": 0.4522228538990021, "learning_rate": 2.2936242527906495e-07, "loss": 0.3507, "step": 51600 }, { "epoch": 0.9317740269682722, "grad_norm": 0.52217698097229, "learning_rate": 2.2875884896198519e-07, "loss": 0.259, "step": 51605 }, { "epoch": 0.9318643064011729, "grad_norm": 0.3688105642795563, "learning_rate": 2.281560586702125e-07, "loss": 0.2284, "step": 51610 }, { "epoch": 0.9319545858340736, "grad_norm": 1.195549488067627, "learning_rate": 2.27554054452237e-07, "loss": 0.2717, "step": 51615 }, { "epoch": 0.9320448652669744, "grad_norm": 0.687444269657135, "learning_rate": 2.2695283635648435e-07, "loss": 0.2383, "step": 51620 }, { "epoch": 0.932135144699875, "grad_norm": 0.44127342104911804, "learning_rate": 2.2635240443132034e-07, "loss": 0.264, "step": 51625 }, { "epoch": 0.9322254241327758, "grad_norm": 0.4385707974433899, "learning_rate": 2.25752758725043e-07, "loss": 0.255, "step": 51630 }, { "epoch": 0.9323157035656765, "grad_norm": 0.9365828037261963, "learning_rate": 2.2515389928589038e-07, "loss": 0.2694, "step": 51635 }, { "epoch": 0.9324059829985772, "grad_norm": 0.40874359011650085, "learning_rate": 2.2455582616203732e-07, "loss": 0.2996, "step": 51640 }, { "epoch": 0.9324962624314779, "grad_norm": 0.44071194529533386, "learning_rate": 2.2395853940159417e-07, "loss": 0.2285, "step": 51645 }, { "epoch": 0.9325865418643786, "grad_norm": 0.3842937648296356, "learning_rate": 2.233620390526059e-07, "loss": 0.156, "step": 51650 }, { "epoch": 0.9326768212972794, "grad_norm": 0.5506059527397156, "learning_rate": 2.2276632516306074e-07, "loss": 0.3147, "step": 51655 }, { "epoch": 0.9327671007301801, "grad_norm": 0.6792159080505371, "learning_rate": 2.2217139778087594e-07, "loss": 0.2612, "step": 51660 }, { "epoch": 0.9328573801630807, "grad_norm": 0.4426134526729584, "learning_rate": 2.2157725695391207e-07, "loss": 0.1906, "step": 51665 }, { "epoch": 0.9329476595959815, "grad_norm": 0.6154745221138, "learning_rate": 2.2098390272996095e-07, "loss": 0.3561, "step": 51670 }, { "epoch": 0.9330379390288822, "grad_norm": 0.696847677230835, "learning_rate": 2.2039133515675327e-07, "loss": 0.1871, "step": 51675 }, { "epoch": 0.933128218461783, "grad_norm": 0.41318702697753906, "learning_rate": 2.1979955428195976e-07, "loss": 0.2813, "step": 51680 }, { "epoch": 0.9332184978946836, "grad_norm": 0.5265457034111023, "learning_rate": 2.1920856015318237e-07, "loss": 0.1673, "step": 51685 }, { "epoch": 0.9333087773275843, "grad_norm": 0.1904546171426773, "learning_rate": 2.1861835281796417e-07, "loss": 0.1848, "step": 51690 }, { "epoch": 0.9333990567604851, "grad_norm": 0.3450382351875305, "learning_rate": 2.1802893232378053e-07, "loss": 0.2669, "step": 51695 }, { "epoch": 0.9334893361933858, "grad_norm": 0.5738722681999207, "learning_rate": 2.1744029871804795e-07, "loss": 0.2131, "step": 51700 }, { "epoch": 0.9335796156262864, "grad_norm": 0.5521915555000305, "learning_rate": 2.1685245204811744e-07, "loss": 0.2703, "step": 51705 }, { "epoch": 0.9336698950591872, "grad_norm": 0.2938435971736908, "learning_rate": 2.1626539236127676e-07, "loss": 0.2245, "step": 51710 }, { "epoch": 0.9337601744920879, "grad_norm": 0.4499363601207733, "learning_rate": 2.156791197047503e-07, "loss": 0.2809, "step": 51715 }, { "epoch": 0.9338504539249887, "grad_norm": 2.3798131942749023, "learning_rate": 2.1509363412570038e-07, "loss": 0.3425, "step": 51720 }, { "epoch": 0.9339407333578893, "grad_norm": 0.4902951121330261, "learning_rate": 2.1450893567122378e-07, "loss": 0.2242, "step": 51725 }, { "epoch": 0.93403101279079, "grad_norm": 0.28112733364105225, "learning_rate": 2.1392502438835726e-07, "loss": 0.2116, "step": 51730 }, { "epoch": 0.9341212922236908, "grad_norm": 0.4254879057407379, "learning_rate": 2.1334190032406886e-07, "loss": 0.256, "step": 51735 }, { "epoch": 0.9342115716565915, "grad_norm": 0.41759738326072693, "learning_rate": 2.1275956352526994e-07, "loss": 0.1774, "step": 51740 }, { "epoch": 0.9343018510894922, "grad_norm": 0.2610630393028259, "learning_rate": 2.1217801403880412e-07, "loss": 0.1912, "step": 51745 }, { "epoch": 0.9343921305223929, "grad_norm": 0.3194853961467743, "learning_rate": 2.1159725191145287e-07, "loss": 0.2587, "step": 51750 }, { "epoch": 0.9344824099552936, "grad_norm": 0.5504977107048035, "learning_rate": 2.1101727718993437e-07, "loss": 0.2209, "step": 51755 }, { "epoch": 0.9345726893881944, "grad_norm": 1.0065027475357056, "learning_rate": 2.1043808992090243e-07, "loss": 0.1515, "step": 51760 }, { "epoch": 0.934662968821095, "grad_norm": 0.24719925224781036, "learning_rate": 2.0985969015094864e-07, "loss": 0.1959, "step": 51765 }, { "epoch": 0.9347532482539957, "grad_norm": 0.23635120689868927, "learning_rate": 2.092820779266014e-07, "loss": 0.1884, "step": 51770 }, { "epoch": 0.9348435276868965, "grad_norm": 0.17379799485206604, "learning_rate": 2.0870525329432678e-07, "loss": 0.1901, "step": 51775 }, { "epoch": 0.9349338071197972, "grad_norm": 0.17449960112571716, "learning_rate": 2.0812921630052442e-07, "loss": 0.1689, "step": 51780 }, { "epoch": 0.9350240865526979, "grad_norm": 0.24730928242206573, "learning_rate": 2.0755396699153385e-07, "loss": 0.192, "step": 51785 }, { "epoch": 0.9351143659855986, "grad_norm": 0.24990248680114746, "learning_rate": 2.06979505413627e-07, "loss": 0.1725, "step": 51790 }, { "epoch": 0.9352046454184993, "grad_norm": 0.37935662269592285, "learning_rate": 2.0640583161301796e-07, "loss": 0.2294, "step": 51795 }, { "epoch": 0.9352949248514001, "grad_norm": 0.5428740382194519, "learning_rate": 2.058329456358521e-07, "loss": 0.2519, "step": 51800 }, { "epoch": 0.9353852042843007, "grad_norm": 0.3672719895839691, "learning_rate": 2.0526084752821695e-07, "loss": 0.1027, "step": 51805 }, { "epoch": 0.9354754837172015, "grad_norm": 0.43215203285217285, "learning_rate": 2.046895373361313e-07, "loss": 0.2256, "step": 51810 }, { "epoch": 0.9355657631501022, "grad_norm": 0.5134435892105103, "learning_rate": 2.0411901510555386e-07, "loss": 0.3509, "step": 51815 }, { "epoch": 0.935656042583003, "grad_norm": 0.5209430456161499, "learning_rate": 2.0354928088237802e-07, "loss": 0.1723, "step": 51820 }, { "epoch": 0.9357463220159036, "grad_norm": 0.33558085560798645, "learning_rate": 2.0298033471243595e-07, "loss": 0.2411, "step": 51825 }, { "epoch": 0.9358366014488043, "grad_norm": 0.27937522530555725, "learning_rate": 2.0241217664149437e-07, "loss": 0.1683, "step": 51830 }, { "epoch": 0.9359268808817051, "grad_norm": 0.3353129029273987, "learning_rate": 2.0184480671525786e-07, "loss": 0.2219, "step": 51835 }, { "epoch": 0.9360171603146058, "grad_norm": 0.8231786489486694, "learning_rate": 2.0127822497936655e-07, "loss": 0.2422, "step": 51840 }, { "epoch": 0.9361074397475064, "grad_norm": 0.696200430393219, "learning_rate": 2.0071243147940067e-07, "loss": 0.3133, "step": 51845 }, { "epoch": 0.9361977191804072, "grad_norm": 0.3378933072090149, "learning_rate": 2.0014742626087046e-07, "loss": 0.1784, "step": 51850 }, { "epoch": 0.9362879986133079, "grad_norm": 0.2539663016796112, "learning_rate": 1.9958320936922848e-07, "loss": 0.1111, "step": 51855 }, { "epoch": 0.9363782780462087, "grad_norm": 0.19143852591514587, "learning_rate": 1.9901978084986062e-07, "loss": 0.1522, "step": 51860 }, { "epoch": 0.9364685574791093, "grad_norm": 0.4747665226459503, "learning_rate": 1.9845714074809062e-07, "loss": 0.1578, "step": 51865 }, { "epoch": 0.93655883691201, "grad_norm": 0.29267823696136475, "learning_rate": 1.978952891091812e-07, "loss": 0.1915, "step": 51870 }, { "epoch": 0.9366491163449108, "grad_norm": 0.4691658616065979, "learning_rate": 1.973342259783273e-07, "loss": 0.258, "step": 51875 }, { "epoch": 0.9367393957778115, "grad_norm": 1.3541239500045776, "learning_rate": 1.967739514006628e-07, "loss": 0.2346, "step": 51880 }, { "epoch": 0.9368296752107121, "grad_norm": 0.4629501700401306, "learning_rate": 1.962144654212572e-07, "loss": 0.2451, "step": 51885 }, { "epoch": 0.9369199546436129, "grad_norm": 0.5224490761756897, "learning_rate": 1.9565576808511788e-07, "loss": 0.2326, "step": 51890 }, { "epoch": 0.9370102340765136, "grad_norm": 0.40172770619392395, "learning_rate": 1.9509785943718662e-07, "loss": 0.2084, "step": 51895 }, { "epoch": 0.9371005135094144, "grad_norm": 0.22561655938625336, "learning_rate": 1.945407395223442e-07, "loss": 0.2111, "step": 51900 }, { "epoch": 0.937190792942315, "grad_norm": 0.5190891027450562, "learning_rate": 1.93984408385407e-07, "loss": 0.3468, "step": 51905 }, { "epoch": 0.9372810723752157, "grad_norm": 0.9866564869880676, "learning_rate": 1.934288660711281e-07, "loss": 0.2527, "step": 51910 }, { "epoch": 0.9373713518081165, "grad_norm": 0.8047907948493958, "learning_rate": 1.928741126241951e-07, "loss": 0.1663, "step": 51915 }, { "epoch": 0.9374616312410172, "grad_norm": 0.6488373875617981, "learning_rate": 1.9232014808923672e-07, "loss": 0.2403, "step": 51920 }, { "epoch": 0.937551910673918, "grad_norm": 0.36024126410484314, "learning_rate": 1.9176697251081177e-07, "loss": 0.1215, "step": 51925 }, { "epoch": 0.9376421901068186, "grad_norm": 0.5580694675445557, "learning_rate": 1.9121458593342246e-07, "loss": 0.2512, "step": 51930 }, { "epoch": 0.9377324695397193, "grad_norm": 0.3380827307701111, "learning_rate": 1.9066298840150211e-07, "loss": 0.2354, "step": 51935 }, { "epoch": 0.9378227489726201, "grad_norm": 0.4437563419342041, "learning_rate": 1.901121799594241e-07, "loss": 0.3053, "step": 51940 }, { "epoch": 0.9379130284055208, "grad_norm": 0.2973403334617615, "learning_rate": 1.8956216065149636e-07, "loss": 0.2504, "step": 51945 }, { "epoch": 0.9380033078384215, "grad_norm": 0.33137649297714233, "learning_rate": 1.8901293052196456e-07, "loss": 0.1983, "step": 51950 }, { "epoch": 0.9380935872713222, "grad_norm": 0.24417562782764435, "learning_rate": 1.8846448961500896e-07, "loss": 0.12, "step": 51955 }, { "epoch": 0.9381838667042229, "grad_norm": 0.5119628310203552, "learning_rate": 1.879168379747487e-07, "loss": 0.1649, "step": 51960 }, { "epoch": 0.9382741461371237, "grad_norm": 0.4930247366428375, "learning_rate": 1.873699756452374e-07, "loss": 0.2816, "step": 51965 }, { "epoch": 0.9383644255700243, "grad_norm": 0.19329699873924255, "learning_rate": 1.8682390267046768e-07, "loss": 0.2226, "step": 51970 }, { "epoch": 0.938454705002925, "grad_norm": 0.5004608035087585, "learning_rate": 1.862786190943666e-07, "loss": 0.2517, "step": 51975 }, { "epoch": 0.9385449844358258, "grad_norm": 0.401940256357193, "learning_rate": 1.857341249607969e-07, "loss": 0.1777, "step": 51980 }, { "epoch": 0.9386352638687265, "grad_norm": 0.3844544291496277, "learning_rate": 1.8519042031356127e-07, "loss": 0.2107, "step": 51985 }, { "epoch": 0.9387255433016272, "grad_norm": 0.48071059584617615, "learning_rate": 1.8464750519639473e-07, "loss": 0.2061, "step": 51990 }, { "epoch": 0.9388158227345279, "grad_norm": 0.395354300737381, "learning_rate": 1.8410537965297237e-07, "loss": 0.1423, "step": 51995 }, { "epoch": 0.9389061021674286, "grad_norm": 0.22170841693878174, "learning_rate": 1.8356404372690262e-07, "loss": 0.1431, "step": 52000 }, { "epoch": 0.9389963816003294, "grad_norm": 0.31132036447525024, "learning_rate": 1.830234974617351e-07, "loss": 0.1529, "step": 52005 }, { "epoch": 0.93908666103323, "grad_norm": 0.4811148941516876, "learning_rate": 1.8248374090094944e-07, "loss": 0.1695, "step": 52010 }, { "epoch": 0.9391769404661308, "grad_norm": 0.5122590661048889, "learning_rate": 1.8194477408796875e-07, "loss": 0.2564, "step": 52015 }, { "epoch": 0.9392672198990315, "grad_norm": 0.5909358859062195, "learning_rate": 1.8140659706614493e-07, "loss": 0.1825, "step": 52020 }, { "epoch": 0.9393574993319322, "grad_norm": 0.34511274099349976, "learning_rate": 1.8086920987877344e-07, "loss": 0.2724, "step": 52025 }, { "epoch": 0.9394477787648329, "grad_norm": 0.7616056799888611, "learning_rate": 1.8033261256908075e-07, "loss": 0.2216, "step": 52030 }, { "epoch": 0.9395380581977336, "grad_norm": 0.4209730625152588, "learning_rate": 1.7979680518023346e-07, "loss": 0.2234, "step": 52035 }, { "epoch": 0.9396283376306344, "grad_norm": 0.5408231019973755, "learning_rate": 1.7926178775533488e-07, "loss": 0.1385, "step": 52040 }, { "epoch": 0.9397186170635351, "grad_norm": 0.4944550693035126, "learning_rate": 1.7872756033742055e-07, "loss": 0.2979, "step": 52045 }, { "epoch": 0.9398088964964357, "grad_norm": 0.30177101492881775, "learning_rate": 1.7819412296946725e-07, "loss": 0.2299, "step": 52050 }, { "epoch": 0.9398991759293365, "grad_norm": 0.23153111338615417, "learning_rate": 1.7766147569438397e-07, "loss": 0.192, "step": 52055 }, { "epoch": 0.9399894553622372, "grad_norm": 0.4247056841850281, "learning_rate": 1.7712961855502087e-07, "loss": 0.3326, "step": 52060 }, { "epoch": 0.940079734795138, "grad_norm": 0.3396017849445343, "learning_rate": 1.7659855159416038e-07, "loss": 0.2892, "step": 52065 }, { "epoch": 0.9401700142280386, "grad_norm": 0.5537087321281433, "learning_rate": 1.7606827485452395e-07, "loss": 0.2346, "step": 52070 }, { "epoch": 0.9402602936609393, "grad_norm": 0.6220576167106628, "learning_rate": 1.7553878837876737e-07, "loss": 0.2804, "step": 52075 }, { "epoch": 0.9403505730938401, "grad_norm": 0.5737428069114685, "learning_rate": 1.750100922094844e-07, "loss": 0.1685, "step": 52080 }, { "epoch": 0.9404408525267408, "grad_norm": 0.507656455039978, "learning_rate": 1.7448218638920545e-07, "loss": 0.2028, "step": 52085 }, { "epoch": 0.9405311319596414, "grad_norm": 0.642570972442627, "learning_rate": 1.7395507096039654e-07, "loss": 0.1771, "step": 52090 }, { "epoch": 0.9406214113925422, "grad_norm": 0.56353759765625, "learning_rate": 1.7342874596545822e-07, "loss": 0.1809, "step": 52095 }, { "epoch": 0.9407116908254429, "grad_norm": 0.2150426059961319, "learning_rate": 1.7290321144673216e-07, "loss": 0.2133, "step": 52100 }, { "epoch": 0.9408019702583437, "grad_norm": 0.30578625202178955, "learning_rate": 1.7237846744649344e-07, "loss": 0.2366, "step": 52105 }, { "epoch": 0.9408922496912443, "grad_norm": 0.7995226979255676, "learning_rate": 1.7185451400695275e-07, "loss": 0.2499, "step": 52110 }, { "epoch": 0.940982529124145, "grad_norm": 0.4063202142715454, "learning_rate": 1.7133135117025856e-07, "loss": 0.2984, "step": 52115 }, { "epoch": 0.9410728085570458, "grad_norm": 0.5872689485549927, "learning_rate": 1.7080897897849723e-07, "loss": 0.2491, "step": 52120 }, { "epoch": 0.9411630879899465, "grad_norm": 1.0243349075317383, "learning_rate": 1.7028739747368738e-07, "loss": 0.1595, "step": 52125 }, { "epoch": 0.9412533674228472, "grad_norm": 0.46754711866378784, "learning_rate": 1.6976660669778655e-07, "loss": 0.243, "step": 52130 }, { "epoch": 0.9413436468557479, "grad_norm": 0.2676316797733307, "learning_rate": 1.6924660669269122e-07, "loss": 0.1927, "step": 52135 }, { "epoch": 0.9414339262886486, "grad_norm": 0.4048113226890564, "learning_rate": 1.6872739750022903e-07, "loss": 0.2179, "step": 52140 }, { "epoch": 0.9415242057215494, "grad_norm": 0.4328083395957947, "learning_rate": 1.682089791621677e-07, "loss": 0.2187, "step": 52145 }, { "epoch": 0.94161448515445, "grad_norm": 0.48077961802482605, "learning_rate": 1.6769135172020945e-07, "loss": 0.122, "step": 52150 }, { "epoch": 0.9417047645873508, "grad_norm": 0.5391274094581604, "learning_rate": 1.6717451521599536e-07, "loss": 0.1919, "step": 52155 }, { "epoch": 0.9417950440202515, "grad_norm": 0.4807625114917755, "learning_rate": 1.6665846969109888e-07, "loss": 0.2772, "step": 52160 }, { "epoch": 0.9418853234531522, "grad_norm": 1.064616322517395, "learning_rate": 1.6614321518703347e-07, "loss": 0.2024, "step": 52165 }, { "epoch": 0.9419756028860529, "grad_norm": 0.35680803656578064, "learning_rate": 1.6562875174524707e-07, "loss": 0.2104, "step": 52170 }, { "epoch": 0.9420658823189536, "grad_norm": 0.32618042826652527, "learning_rate": 1.6511507940712546e-07, "loss": 0.2215, "step": 52175 }, { "epoch": 0.9421561617518543, "grad_norm": 0.42294156551361084, "learning_rate": 1.6460219821398894e-07, "loss": 0.248, "step": 52180 }, { "epoch": 0.9422464411847551, "grad_norm": 0.3701134920120239, "learning_rate": 1.640901082070956e-07, "loss": 0.2675, "step": 52185 }, { "epoch": 0.9423367206176557, "grad_norm": 1.1385633945465088, "learning_rate": 1.6357880942763804e-07, "loss": 0.2164, "step": 52190 }, { "epoch": 0.9424270000505565, "grad_norm": 0.3978947103023529, "learning_rate": 1.6306830191674783e-07, "loss": 0.2493, "step": 52195 }, { "epoch": 0.9425172794834572, "grad_norm": 0.3072848320007324, "learning_rate": 1.62558585715491e-07, "loss": 0.1876, "step": 52200 }, { "epoch": 0.942607558916358, "grad_norm": 0.39237838983535767, "learning_rate": 1.620496608648725e-07, "loss": 0.2458, "step": 52205 }, { "epoch": 0.9426978383492586, "grad_norm": 0.49989035725593567, "learning_rate": 1.615415274058285e-07, "loss": 0.2544, "step": 52210 }, { "epoch": 0.9427881177821593, "grad_norm": 0.638972818851471, "learning_rate": 1.6103418537923742e-07, "loss": 0.1837, "step": 52215 }, { "epoch": 0.9428783972150601, "grad_norm": 0.32173827290534973, "learning_rate": 1.6052763482590995e-07, "loss": 0.1646, "step": 52220 }, { "epoch": 0.9429686766479608, "grad_norm": 0.14592932164669037, "learning_rate": 1.6002187578659344e-07, "loss": 0.1162, "step": 52225 }, { "epoch": 0.9430589560808614, "grad_norm": 0.47503066062927246, "learning_rate": 1.595169083019732e-07, "loss": 0.2758, "step": 52230 }, { "epoch": 0.9431492355137622, "grad_norm": 0.44942447543144226, "learning_rate": 1.5901273241267e-07, "loss": 0.349, "step": 52235 }, { "epoch": 0.9432395149466629, "grad_norm": 0.4225345551967621, "learning_rate": 1.5850934815924367e-07, "loss": 0.2135, "step": 52240 }, { "epoch": 0.9433297943795637, "grad_norm": 0.4064311981201172, "learning_rate": 1.5800675558218404e-07, "loss": 0.2737, "step": 52245 }, { "epoch": 0.9434200738124643, "grad_norm": 0.2946653366088867, "learning_rate": 1.575049547219232e-07, "loss": 0.1376, "step": 52250 }, { "epoch": 0.943510353245365, "grad_norm": 0.36680835485458374, "learning_rate": 1.5700394561882658e-07, "loss": 0.1992, "step": 52255 }, { "epoch": 0.9436006326782658, "grad_norm": 0.43676671385765076, "learning_rate": 1.565037283131976e-07, "loss": 0.1873, "step": 52260 }, { "epoch": 0.9436909121111665, "grad_norm": 0.4742371439933777, "learning_rate": 1.56004302845274e-07, "loss": 0.3535, "step": 52265 }, { "epoch": 0.9437811915440671, "grad_norm": 0.2743320167064667, "learning_rate": 1.5550566925523258e-07, "loss": 0.1865, "step": 52270 }, { "epoch": 0.9438714709769679, "grad_norm": 0.5959087014198303, "learning_rate": 1.5500782758318233e-07, "loss": 0.2989, "step": 52275 }, { "epoch": 0.9439617504098686, "grad_norm": 0.7350186705589294, "learning_rate": 1.5451077786917345e-07, "loss": 0.2391, "step": 52280 }, { "epoch": 0.9440520298427694, "grad_norm": 0.5229212045669556, "learning_rate": 1.5401452015318728e-07, "loss": 0.1791, "step": 52285 }, { "epoch": 0.94414230927567, "grad_norm": 0.39683276414871216, "learning_rate": 1.5351905447514747e-07, "loss": 0.323, "step": 52290 }, { "epoch": 0.9442325887085707, "grad_norm": 0.37934693694114685, "learning_rate": 1.5302438087490767e-07, "loss": 0.2551, "step": 52295 }, { "epoch": 0.9443228681414715, "grad_norm": 0.5581970810890198, "learning_rate": 1.5253049939226272e-07, "loss": 0.2526, "step": 52300 }, { "epoch": 0.9444131475743722, "grad_norm": 0.6557458639144897, "learning_rate": 1.5203741006694083e-07, "loss": 0.2037, "step": 52305 }, { "epoch": 0.944503427007273, "grad_norm": 0.5137622952461243, "learning_rate": 1.5154511293860807e-07, "loss": 0.2785, "step": 52310 }, { "epoch": 0.9445937064401736, "grad_norm": 0.47347983717918396, "learning_rate": 1.51053608046865e-07, "loss": 0.2556, "step": 52315 }, { "epoch": 0.9446839858730743, "grad_norm": 0.42270901799201965, "learning_rate": 1.50562895431251e-07, "loss": 0.2347, "step": 52320 }, { "epoch": 0.9447742653059751, "grad_norm": 0.44284293055534363, "learning_rate": 1.5007297513123908e-07, "loss": 0.314, "step": 52325 }, { "epoch": 0.9448645447388758, "grad_norm": 0.5739048719406128, "learning_rate": 1.4958384718624097e-07, "loss": 0.2303, "step": 52330 }, { "epoch": 0.9449548241717765, "grad_norm": 0.5385386943817139, "learning_rate": 1.49095511635603e-07, "loss": 0.2542, "step": 52335 }, { "epoch": 0.9450451036046772, "grad_norm": 0.6142498254776001, "learning_rate": 1.4860796851860814e-07, "loss": 0.3362, "step": 52340 }, { "epoch": 0.9451353830375779, "grad_norm": 0.28703126311302185, "learning_rate": 1.481212178744762e-07, "loss": 0.2327, "step": 52345 }, { "epoch": 0.9452256624704787, "grad_norm": 0.5515416264533997, "learning_rate": 1.4763525974236027e-07, "loss": 0.2608, "step": 52350 }, { "epoch": 0.9453159419033793, "grad_norm": 0.4463517963886261, "learning_rate": 1.4715009416135574e-07, "loss": 0.2052, "step": 52355 }, { "epoch": 0.94540622133628, "grad_norm": 0.36427173018455505, "learning_rate": 1.4666572117048917e-07, "loss": 0.3779, "step": 52360 }, { "epoch": 0.9454965007691808, "grad_norm": 0.3629922568798065, "learning_rate": 1.4618214080872272e-07, "loss": 0.2401, "step": 52365 }, { "epoch": 0.9455867802020815, "grad_norm": 0.3709052801132202, "learning_rate": 1.4569935311496087e-07, "loss": 0.1854, "step": 52370 }, { "epoch": 0.9456770596349822, "grad_norm": 0.698991596698761, "learning_rate": 1.4521735812803805e-07, "loss": 0.2725, "step": 52375 }, { "epoch": 0.9457673390678829, "grad_norm": 0.49911487102508545, "learning_rate": 1.4473615588672662e-07, "loss": 0.1953, "step": 52380 }, { "epoch": 0.9458576185007836, "grad_norm": 0.16943359375, "learning_rate": 1.4425574642973782e-07, "loss": 0.1517, "step": 52385 }, { "epoch": 0.9459478979336844, "grad_norm": 0.4964551329612732, "learning_rate": 1.437761297957152e-07, "loss": 0.2814, "step": 52390 }, { "epoch": 0.946038177366585, "grad_norm": 0.23414237797260284, "learning_rate": 1.4329730602324122e-07, "loss": 0.2067, "step": 52395 }, { "epoch": 0.9461284567994858, "grad_norm": 0.3901708722114563, "learning_rate": 1.4281927515083504e-07, "loss": 0.1551, "step": 52400 }, { "epoch": 0.9462187362323865, "grad_norm": 0.42421460151672363, "learning_rate": 1.4234203721694816e-07, "loss": 0.2296, "step": 52405 }, { "epoch": 0.9463090156652872, "grad_norm": 0.3116353750228882, "learning_rate": 1.4186559225997322e-07, "loss": 0.1827, "step": 52410 }, { "epoch": 0.9463992950981879, "grad_norm": 0.18112727999687195, "learning_rate": 1.4138994031823506e-07, "loss": 0.1922, "step": 52415 }, { "epoch": 0.9464895745310886, "grad_norm": 0.4041883945465088, "learning_rate": 1.4091508142999865e-07, "loss": 0.2435, "step": 52420 }, { "epoch": 0.9465798539639894, "grad_norm": 0.31055948138237, "learning_rate": 1.4044101563346014e-07, "loss": 0.2331, "step": 52425 }, { "epoch": 0.9466701333968901, "grad_norm": 0.2543698251247406, "learning_rate": 1.3996774296675564e-07, "loss": 0.2201, "step": 52430 }, { "epoch": 0.9467604128297907, "grad_norm": 0.37123754620552063, "learning_rate": 1.3949526346795695e-07, "loss": 0.1751, "step": 52435 }, { "epoch": 0.9468506922626915, "grad_norm": 0.38278284668922424, "learning_rate": 1.3902357717507254e-07, "loss": 0.3449, "step": 52440 }, { "epoch": 0.9469409716955922, "grad_norm": 0.6106435060501099, "learning_rate": 1.3855268412604427e-07, "loss": 0.2112, "step": 52445 }, { "epoch": 0.947031251128493, "grad_norm": 0.6555020213127136, "learning_rate": 1.3808258435875411e-07, "loss": 0.2682, "step": 52450 }, { "epoch": 0.9471215305613936, "grad_norm": 0.5614919066429138, "learning_rate": 1.376132779110151e-07, "loss": 0.2209, "step": 52455 }, { "epoch": 0.9472118099942943, "grad_norm": 0.22632837295532227, "learning_rate": 1.371447648205826e-07, "loss": 0.2477, "step": 52460 }, { "epoch": 0.9473020894271951, "grad_norm": 0.39801183342933655, "learning_rate": 1.3667704512514311e-07, "loss": 0.1773, "step": 52465 }, { "epoch": 0.9473923688600958, "grad_norm": 0.33802229166030884, "learning_rate": 1.3621011886232327e-07, "loss": 0.2323, "step": 52470 }, { "epoch": 0.9474826482929964, "grad_norm": 0.316730260848999, "learning_rate": 1.3574398606968186e-07, "loss": 0.2709, "step": 52475 }, { "epoch": 0.9475729277258972, "grad_norm": 0.6402896642684937, "learning_rate": 1.352786467847178e-07, "loss": 0.1968, "step": 52480 }, { "epoch": 0.9476632071587979, "grad_norm": 0.5211960673332214, "learning_rate": 1.3481410104486114e-07, "loss": 0.1975, "step": 52485 }, { "epoch": 0.9477534865916987, "grad_norm": 1.256142497062683, "learning_rate": 1.3435034888748534e-07, "loss": 0.2066, "step": 52490 }, { "epoch": 0.9478437660245993, "grad_norm": 0.3416600525379181, "learning_rate": 1.3388739034989274e-07, "loss": 0.1517, "step": 52495 }, { "epoch": 0.9479340454575, "grad_norm": 0.33880066871643066, "learning_rate": 1.334252254693258e-07, "loss": 0.2901, "step": 52500 }, { "epoch": 0.9480243248904008, "grad_norm": 0.46627411246299744, "learning_rate": 1.3296385428296255e-07, "loss": 0.3176, "step": 52505 }, { "epoch": 0.9481146043233015, "grad_norm": 0.42374828457832336, "learning_rate": 1.3250327682791663e-07, "loss": 0.2587, "step": 52510 }, { "epoch": 0.9482048837562022, "grad_norm": 0.4953526258468628, "learning_rate": 1.3204349314123954e-07, "loss": 0.2192, "step": 52515 }, { "epoch": 0.9482951631891029, "grad_norm": 0.2568039298057556, "learning_rate": 1.3158450325991613e-07, "loss": 0.1924, "step": 52520 }, { "epoch": 0.9483854426220036, "grad_norm": 0.22638952732086182, "learning_rate": 1.3112630722086795e-07, "loss": 0.1758, "step": 52525 }, { "epoch": 0.9484757220549044, "grad_norm": 0.37685781717300415, "learning_rate": 1.3066890506095552e-07, "loss": 0.1785, "step": 52530 }, { "epoch": 0.948566001487805, "grad_norm": 0.4343778192996979, "learning_rate": 1.3021229681697388e-07, "loss": 0.1827, "step": 52535 }, { "epoch": 0.9486562809207058, "grad_norm": 0.7477774024009705, "learning_rate": 1.2975648252565032e-07, "loss": 0.2522, "step": 52540 }, { "epoch": 0.9487465603536065, "grad_norm": 0.4742632806301117, "learning_rate": 1.2930146222365657e-07, "loss": 0.2197, "step": 52545 }, { "epoch": 0.9488368397865072, "grad_norm": 0.6767007112503052, "learning_rate": 1.2884723594759118e-07, "loss": 0.1603, "step": 52550 }, { "epoch": 0.9489271192194079, "grad_norm": 0.46887606382369995, "learning_rate": 1.2839380373399712e-07, "loss": 0.3118, "step": 52555 }, { "epoch": 0.9490173986523086, "grad_norm": 0.45550402998924255, "learning_rate": 1.2794116561934743e-07, "loss": 0.2259, "step": 52560 }, { "epoch": 0.9491076780852093, "grad_norm": 0.39493048191070557, "learning_rate": 1.2748932164005302e-07, "loss": 0.2386, "step": 52565 }, { "epoch": 0.9491979575181101, "grad_norm": 0.25530895590782166, "learning_rate": 1.2703827183246364e-07, "loss": 0.214, "step": 52570 }, { "epoch": 0.9492882369510107, "grad_norm": 0.2733272612094879, "learning_rate": 1.2658801623286254e-07, "loss": 0.1374, "step": 52575 }, { "epoch": 0.9493785163839115, "grad_norm": 0.43154987692832947, "learning_rate": 1.261385548774674e-07, "loss": 0.1989, "step": 52580 }, { "epoch": 0.9494687958168122, "grad_norm": 0.5476710796356201, "learning_rate": 1.2568988780243708e-07, "loss": 0.1594, "step": 52585 }, { "epoch": 0.949559075249713, "grad_norm": 0.7292976379394531, "learning_rate": 1.2524201504386157e-07, "loss": 0.2246, "step": 52590 }, { "epoch": 0.9496493546826136, "grad_norm": 0.30767586827278137, "learning_rate": 1.2479493663776875e-07, "loss": 0.2405, "step": 52595 }, { "epoch": 0.9497396341155143, "grad_norm": 0.5088165998458862, "learning_rate": 1.243486526201254e-07, "loss": 0.2151, "step": 52600 }, { "epoch": 0.9498299135484151, "grad_norm": 0.3825433552265167, "learning_rate": 1.2390316302682837e-07, "loss": 0.2879, "step": 52605 }, { "epoch": 0.9499201929813158, "grad_norm": 0.35412609577178955, "learning_rate": 1.2345846789371674e-07, "loss": 0.2937, "step": 52610 }, { "epoch": 0.9500104724142164, "grad_norm": 1.0277363061904907, "learning_rate": 1.230145672565608e-07, "loss": 0.2591, "step": 52615 }, { "epoch": 0.9501007518471172, "grad_norm": 1.2222204208374023, "learning_rate": 1.225714611510709e-07, "loss": 0.2046, "step": 52620 }, { "epoch": 0.9501910312800179, "grad_norm": 0.31255319714546204, "learning_rate": 1.221291496128918e-07, "loss": 0.2869, "step": 52625 }, { "epoch": 0.9502813107129187, "grad_norm": 0.5221478343009949, "learning_rate": 1.2168763267760175e-07, "loss": 0.2165, "step": 52630 }, { "epoch": 0.9503715901458193, "grad_norm": 0.2570783495903015, "learning_rate": 1.2124691038072124e-07, "loss": 0.2298, "step": 52635 }, { "epoch": 0.95046186957872, "grad_norm": 0.4935098886489868, "learning_rate": 1.2080698275770074e-07, "loss": 0.1582, "step": 52640 }, { "epoch": 0.9505521490116208, "grad_norm": 0.5720986723899841, "learning_rate": 1.203678498439287e-07, "loss": 0.1775, "step": 52645 }, { "epoch": 0.9506424284445215, "grad_norm": 0.4462292492389679, "learning_rate": 1.1992951167473344e-07, "loss": 0.231, "step": 52650 }, { "epoch": 0.9507327078774221, "grad_norm": 0.5290933847427368, "learning_rate": 1.1949196828537235e-07, "loss": 0.2116, "step": 52655 }, { "epoch": 0.9508229873103229, "grad_norm": 0.38296404480934143, "learning_rate": 1.1905521971104394e-07, "loss": 0.1249, "step": 52660 }, { "epoch": 0.9509132667432236, "grad_norm": 0.5403286814689636, "learning_rate": 1.1861926598688234e-07, "loss": 0.2489, "step": 52665 }, { "epoch": 0.9510035461761244, "grad_norm": 0.1880694478750229, "learning_rate": 1.1818410714795614e-07, "loss": 0.1901, "step": 52670 }, { "epoch": 0.951093825609025, "grad_norm": 0.4349796772003174, "learning_rate": 1.1774974322927068e-07, "loss": 0.2221, "step": 52675 }, { "epoch": 0.9511841050419257, "grad_norm": 0.3575138747692108, "learning_rate": 1.1731617426576802e-07, "loss": 0.1986, "step": 52680 }, { "epoch": 0.9512743844748265, "grad_norm": 0.39403751492500305, "learning_rate": 1.168834002923247e-07, "loss": 0.2165, "step": 52685 }, { "epoch": 0.9513646639077272, "grad_norm": 0.3441208302974701, "learning_rate": 1.164514213437551e-07, "loss": 0.1894, "step": 52690 }, { "epoch": 0.9514549433406279, "grad_norm": 0.3887169063091278, "learning_rate": 1.1602023745480694e-07, "loss": 0.1778, "step": 52695 }, { "epoch": 0.9515452227735286, "grad_norm": 0.6288618445396423, "learning_rate": 1.1558984866016809e-07, "loss": 0.25, "step": 52700 }, { "epoch": 0.9516355022064293, "grad_norm": 0.9938123226165771, "learning_rate": 1.1516025499445971e-07, "loss": 0.2619, "step": 52705 }, { "epoch": 0.9517257816393301, "grad_norm": 0.6329373121261597, "learning_rate": 1.147314564922386e-07, "loss": 0.1876, "step": 52710 }, { "epoch": 0.9518160610722308, "grad_norm": 0.38888344168663025, "learning_rate": 1.143034531879994e-07, "loss": 0.207, "step": 52715 }, { "epoch": 0.9519063405051315, "grad_norm": 0.44606176018714905, "learning_rate": 1.1387624511617013e-07, "loss": 0.3276, "step": 52720 }, { "epoch": 0.9519966199380322, "grad_norm": 0.4698202908039093, "learning_rate": 1.1344983231111884e-07, "loss": 0.2329, "step": 52725 }, { "epoch": 0.9520868993709329, "grad_norm": 0.34974223375320435, "learning_rate": 1.1302421480714588e-07, "loss": 0.2286, "step": 52730 }, { "epoch": 0.9521771788038337, "grad_norm": 0.48850712180137634, "learning_rate": 1.1259939263849052e-07, "loss": 0.225, "step": 52735 }, { "epoch": 0.9522674582367343, "grad_norm": 0.7180843353271484, "learning_rate": 1.1217536583932542e-07, "loss": 0.201, "step": 52740 }, { "epoch": 0.952357737669635, "grad_norm": 0.5990698337554932, "learning_rate": 1.117521344437611e-07, "loss": 0.2144, "step": 52745 }, { "epoch": 0.9524480171025358, "grad_norm": 0.41236624121665955, "learning_rate": 1.113296984858414e-07, "loss": 0.2034, "step": 52750 }, { "epoch": 0.9525382965354365, "grad_norm": 0.6211836338043213, "learning_rate": 1.109080579995514e-07, "loss": 0.2231, "step": 52755 }, { "epoch": 0.9526285759683372, "grad_norm": 0.6268085837364197, "learning_rate": 1.1048721301880728e-07, "loss": 0.2212, "step": 52760 }, { "epoch": 0.9527188554012379, "grad_norm": 0.3917376399040222, "learning_rate": 1.1006716357746194e-07, "loss": 0.2131, "step": 52765 }, { "epoch": 0.9528091348341386, "grad_norm": 0.8501490950584412, "learning_rate": 1.0964790970930839e-07, "loss": 0.1896, "step": 52770 }, { "epoch": 0.9528994142670394, "grad_norm": 0.34583914279937744, "learning_rate": 1.0922945144806963e-07, "loss": 0.254, "step": 52775 }, { "epoch": 0.95298969369994, "grad_norm": 0.22469790279865265, "learning_rate": 1.0881178882740872e-07, "loss": 0.1903, "step": 52780 }, { "epoch": 0.9530799731328408, "grad_norm": 0.44391101598739624, "learning_rate": 1.0839492188092327e-07, "loss": 0.2337, "step": 52785 }, { "epoch": 0.9531702525657415, "grad_norm": 0.4980946481227875, "learning_rate": 1.0797885064214752e-07, "loss": 0.2129, "step": 52790 }, { "epoch": 0.9532605319986422, "grad_norm": 0.35762354731559753, "learning_rate": 1.075635751445514e-07, "loss": 0.2233, "step": 52795 }, { "epoch": 0.9533508114315429, "grad_norm": 0.49305427074432373, "learning_rate": 1.071490954215415e-07, "loss": 0.2123, "step": 52800 }, { "epoch": 0.9534410908644436, "grad_norm": 0.41991478204727173, "learning_rate": 1.0673541150645783e-07, "loss": 0.1823, "step": 52805 }, { "epoch": 0.9535313702973444, "grad_norm": 0.3152017891407013, "learning_rate": 1.0632252343257931e-07, "loss": 0.1342, "step": 52810 }, { "epoch": 0.9536216497302451, "grad_norm": 0.2789387106895447, "learning_rate": 1.059104312331205e-07, "loss": 0.1647, "step": 52815 }, { "epoch": 0.9537119291631457, "grad_norm": 0.5095995664596558, "learning_rate": 1.0549913494123043e-07, "loss": 0.182, "step": 52820 }, { "epoch": 0.9538022085960465, "grad_norm": 0.6552473306655884, "learning_rate": 1.0508863458999485e-07, "loss": 0.232, "step": 52825 }, { "epoch": 0.9538924880289472, "grad_norm": 0.3909153938293457, "learning_rate": 1.0467893021243514e-07, "loss": 0.2768, "step": 52830 }, { "epoch": 0.953982767461848, "grad_norm": 0.37738755345344543, "learning_rate": 1.0427002184150936e-07, "loss": 0.2332, "step": 52835 }, { "epoch": 0.9540730468947486, "grad_norm": 0.2699531614780426, "learning_rate": 1.0386190951011233e-07, "loss": 0.2465, "step": 52840 }, { "epoch": 0.9541633263276493, "grad_norm": 0.3892914056777954, "learning_rate": 1.0345459325107221e-07, "loss": 0.2359, "step": 52845 }, { "epoch": 0.9542536057605501, "grad_norm": 0.12257574498653412, "learning_rate": 1.0304807309715615e-07, "loss": 0.1991, "step": 52850 }, { "epoch": 0.9543438851934508, "grad_norm": 0.4543693959712982, "learning_rate": 1.0264234908106352e-07, "loss": 0.1992, "step": 52855 }, { "epoch": 0.9544341646263514, "grad_norm": 0.5937663316726685, "learning_rate": 1.0223742123543378e-07, "loss": 0.2249, "step": 52860 }, { "epoch": 0.9545244440592522, "grad_norm": 0.32612088322639465, "learning_rate": 1.0183328959283977e-07, "loss": 0.2424, "step": 52865 }, { "epoch": 0.9546147234921529, "grad_norm": 0.5018953680992126, "learning_rate": 1.0142995418579105e-07, "loss": 0.1275, "step": 52870 }, { "epoch": 0.9547050029250537, "grad_norm": 0.5463442802429199, "learning_rate": 1.0102741504673386e-07, "loss": 0.2147, "step": 52875 }, { "epoch": 0.9547952823579543, "grad_norm": 0.32160958647727966, "learning_rate": 1.0062567220804676e-07, "loss": 0.1512, "step": 52880 }, { "epoch": 0.954885561790855, "grad_norm": 0.33108359575271606, "learning_rate": 1.0022472570205055e-07, "loss": 0.241, "step": 52885 }, { "epoch": 0.9549758412237558, "grad_norm": 0.2833552062511444, "learning_rate": 9.982457556099612e-08, "loss": 0.2393, "step": 52890 }, { "epoch": 0.9550661206566565, "grad_norm": 0.39436647295951843, "learning_rate": 9.942522181707326e-08, "loss": 0.3652, "step": 52895 }, { "epoch": 0.9551564000895572, "grad_norm": 0.7457765340805054, "learning_rate": 9.902666450240739e-08, "loss": 0.1964, "step": 52900 }, { "epoch": 0.9552466795224579, "grad_norm": 0.5097097754478455, "learning_rate": 9.862890364905953e-08, "loss": 0.1545, "step": 52905 }, { "epoch": 0.9553369589553586, "grad_norm": 0.3292642831802368, "learning_rate": 9.82319392890252e-08, "loss": 0.2628, "step": 52910 }, { "epoch": 0.9554272383882594, "grad_norm": 0.7061537504196167, "learning_rate": 9.783577145423995e-08, "loss": 0.1994, "step": 52915 }, { "epoch": 0.95551751782116, "grad_norm": 0.4803351163864136, "learning_rate": 9.744040017657053e-08, "loss": 0.237, "step": 52920 }, { "epoch": 0.9556077972540608, "grad_norm": 0.4237426519393921, "learning_rate": 9.70458254878226e-08, "loss": 0.2182, "step": 52925 }, { "epoch": 0.9556980766869615, "grad_norm": 0.7886397242546082, "learning_rate": 9.665204741973633e-08, "loss": 0.2221, "step": 52930 }, { "epoch": 0.9557883561198622, "grad_norm": 0.5053573846817017, "learning_rate": 9.62590660039886e-08, "loss": 0.1368, "step": 52935 }, { "epoch": 0.9558786355527629, "grad_norm": 0.5820841193199158, "learning_rate": 9.58668812721919e-08, "loss": 0.1859, "step": 52940 }, { "epoch": 0.9559689149856636, "grad_norm": 0.3846931457519531, "learning_rate": 9.547549325589544e-08, "loss": 0.214, "step": 52945 }, { "epoch": 0.9560591944185644, "grad_norm": 1.6069108247756958, "learning_rate": 9.508490198658183e-08, "loss": 0.3033, "step": 52950 }, { "epoch": 0.9561494738514651, "grad_norm": 0.403764009475708, "learning_rate": 9.469510749567257e-08, "loss": 0.2093, "step": 52955 }, { "epoch": 0.9562397532843657, "grad_norm": 0.22472268342971802, "learning_rate": 9.43061098145226e-08, "loss": 0.2622, "step": 52960 }, { "epoch": 0.9563300327172665, "grad_norm": 0.21604177355766296, "learning_rate": 9.391790897442577e-08, "loss": 0.2468, "step": 52965 }, { "epoch": 0.9564203121501672, "grad_norm": 0.3919534981250763, "learning_rate": 9.35305050066082e-08, "loss": 0.3053, "step": 52970 }, { "epoch": 0.956510591583068, "grad_norm": 0.6001983284950256, "learning_rate": 9.314389794223499e-08, "loss": 0.2177, "step": 52975 }, { "epoch": 0.9566008710159686, "grad_norm": 0.48056331276893616, "learning_rate": 9.27580878124057e-08, "loss": 0.2077, "step": 52980 }, { "epoch": 0.9566911504488693, "grad_norm": 0.31555619835853577, "learning_rate": 9.237307464815437e-08, "loss": 0.2056, "step": 52985 }, { "epoch": 0.9567814298817701, "grad_norm": 0.5026416182518005, "learning_rate": 9.198885848045402e-08, "loss": 0.2561, "step": 52990 }, { "epoch": 0.9568717093146708, "grad_norm": 1.641403079032898, "learning_rate": 9.160543934021216e-08, "loss": 0.1711, "step": 52995 }, { "epoch": 0.9569619887475714, "grad_norm": 0.5881418585777283, "learning_rate": 9.122281725827298e-08, "loss": 0.2398, "step": 53000 }, { "epoch": 0.9570522681804722, "grad_norm": 0.5745043158531189, "learning_rate": 9.084099226541298e-08, "loss": 0.1363, "step": 53005 }, { "epoch": 0.9571425476133729, "grad_norm": 0.3548424541950226, "learning_rate": 9.045996439234872e-08, "loss": 0.224, "step": 53010 }, { "epoch": 0.9572328270462737, "grad_norm": 0.8307169079780579, "learning_rate": 9.007973366973121e-08, "loss": 0.2867, "step": 53015 }, { "epoch": 0.9573231064791743, "grad_norm": 0.748156726360321, "learning_rate": 8.970030012814712e-08, "loss": 0.2731, "step": 53020 }, { "epoch": 0.957413385912075, "grad_norm": 0.29039686918258667, "learning_rate": 8.932166379811868e-08, "loss": 0.2667, "step": 53025 }, { "epoch": 0.9575036653449758, "grad_norm": 0.44262897968292236, "learning_rate": 8.894382471010487e-08, "loss": 0.1928, "step": 53030 }, { "epoch": 0.9575939447778765, "grad_norm": 0.35700151324272156, "learning_rate": 8.856678289450027e-08, "loss": 0.2164, "step": 53035 }, { "epoch": 0.9576842242107771, "grad_norm": 0.3340015411376953, "learning_rate": 8.819053838163505e-08, "loss": 0.1913, "step": 53040 }, { "epoch": 0.9577745036436779, "grad_norm": 0.4953787326812744, "learning_rate": 8.781509120177501e-08, "loss": 0.2271, "step": 53045 }, { "epoch": 0.9578647830765786, "grad_norm": 0.5200175642967224, "learning_rate": 8.744044138512265e-08, "loss": 0.2455, "step": 53050 }, { "epoch": 0.9579550625094794, "grad_norm": 0.46813035011291504, "learning_rate": 8.706658896181386e-08, "loss": 0.2572, "step": 53055 }, { "epoch": 0.95804534194238, "grad_norm": 0.31738176941871643, "learning_rate": 8.66935339619257e-08, "loss": 0.2258, "step": 53060 }, { "epoch": 0.9581356213752807, "grad_norm": 0.4683874249458313, "learning_rate": 8.632127641546528e-08, "loss": 0.2134, "step": 53065 }, { "epoch": 0.9582259008081815, "grad_norm": 0.40266793966293335, "learning_rate": 8.594981635237975e-08, "loss": 0.2604, "step": 53070 }, { "epoch": 0.9583161802410822, "grad_norm": 0.5166845321655273, "learning_rate": 8.557915380254855e-08, "loss": 0.2116, "step": 53075 }, { "epoch": 0.9584064596739829, "grad_norm": 0.31063374876976013, "learning_rate": 8.520928879579005e-08, "loss": 0.263, "step": 53080 }, { "epoch": 0.9584967391068836, "grad_norm": 0.31503283977508545, "learning_rate": 8.48402213618571e-08, "loss": 0.2032, "step": 53085 }, { "epoch": 0.9585870185397843, "grad_norm": 1.0543475151062012, "learning_rate": 8.447195153043708e-08, "loss": 0.1469, "step": 53090 }, { "epoch": 0.9586772979726851, "grad_norm": 0.45366400480270386, "learning_rate": 8.410447933115518e-08, "loss": 0.1851, "step": 53095 }, { "epoch": 0.9587675774055857, "grad_norm": 1.8845783472061157, "learning_rate": 8.373780479357329e-08, "loss": 0.231, "step": 53100 }, { "epoch": 0.9588578568384865, "grad_norm": 0.47017449140548706, "learning_rate": 8.337192794718784e-08, "loss": 0.2515, "step": 53105 }, { "epoch": 0.9589481362713872, "grad_norm": 0.439563512802124, "learning_rate": 8.300684882142862e-08, "loss": 0.1526, "step": 53110 }, { "epoch": 0.9590384157042879, "grad_norm": 0.7139844298362732, "learning_rate": 8.264256744566546e-08, "loss": 0.2517, "step": 53115 }, { "epoch": 0.9591286951371887, "grad_norm": 0.17191582918167114, "learning_rate": 8.227908384920158e-08, "loss": 0.297, "step": 53120 }, { "epoch": 0.9592189745700893, "grad_norm": 0.30800798535346985, "learning_rate": 8.191639806127582e-08, "loss": 0.1924, "step": 53125 }, { "epoch": 0.95930925400299, "grad_norm": 0.32242152094841003, "learning_rate": 8.155451011106485e-08, "loss": 0.1799, "step": 53130 }, { "epoch": 0.9593995334358908, "grad_norm": 0.4862722158432007, "learning_rate": 8.119342002767983e-08, "loss": 0.241, "step": 53135 }, { "epoch": 0.9594898128687915, "grad_norm": 0.5938839316368103, "learning_rate": 8.08331278401675e-08, "loss": 0.1877, "step": 53140 }, { "epoch": 0.9595800923016922, "grad_norm": 0.28677666187286377, "learning_rate": 8.047363357751137e-08, "loss": 0.1845, "step": 53145 }, { "epoch": 0.9596703717345929, "grad_norm": 0.2129906713962555, "learning_rate": 8.011493726862829e-08, "loss": 0.1233, "step": 53150 }, { "epoch": 0.9597606511674937, "grad_norm": 0.4362054169178009, "learning_rate": 7.975703894237407e-08, "loss": 0.2467, "step": 53155 }, { "epoch": 0.9598509306003944, "grad_norm": 0.6214504837989807, "learning_rate": 7.939993862753903e-08, "loss": 0.2055, "step": 53160 }, { "epoch": 0.959941210033295, "grad_norm": 0.4493671953678131, "learning_rate": 7.904363635284906e-08, "loss": 0.239, "step": 53165 }, { "epoch": 0.9600314894661958, "grad_norm": 0.4206450581550598, "learning_rate": 7.86881321469668e-08, "loss": 0.2206, "step": 53170 }, { "epoch": 0.9601217688990965, "grad_norm": 0.4071847200393677, "learning_rate": 7.833342603848936e-08, "loss": 0.2473, "step": 53175 }, { "epoch": 0.9602120483319972, "grad_norm": 1.0784540176391602, "learning_rate": 7.797951805595062e-08, "loss": 0.2786, "step": 53180 }, { "epoch": 0.9603023277648979, "grad_norm": 0.3914712071418762, "learning_rate": 7.762640822781886e-08, "loss": 0.1983, "step": 53185 }, { "epoch": 0.9603926071977986, "grad_norm": 0.4481930136680603, "learning_rate": 7.727409658249918e-08, "loss": 0.222, "step": 53190 }, { "epoch": 0.9604828866306994, "grad_norm": 0.3223609924316406, "learning_rate": 7.692258314833445e-08, "loss": 0.1913, "step": 53195 }, { "epoch": 0.9605731660636001, "grad_norm": 0.44718512892723083, "learning_rate": 7.657186795359983e-08, "loss": 0.245, "step": 53200 }, { "epoch": 0.9606634454965007, "grad_norm": 0.9813588261604309, "learning_rate": 7.62219510265083e-08, "loss": 0.3685, "step": 53205 }, { "epoch": 0.9607537249294015, "grad_norm": 0.4945777952671051, "learning_rate": 7.587283239520848e-08, "loss": 0.2221, "step": 53210 }, { "epoch": 0.9608440043623022, "grad_norm": 0.6341715455055237, "learning_rate": 7.552451208778233e-08, "loss": 0.1329, "step": 53215 }, { "epoch": 0.960934283795203, "grad_norm": 0.5160859227180481, "learning_rate": 7.517699013225299e-08, "loss": 0.2517, "step": 53220 }, { "epoch": 0.9610245632281036, "grad_norm": 0.495863676071167, "learning_rate": 7.483026655657255e-08, "loss": 0.2899, "step": 53225 }, { "epoch": 0.9611148426610043, "grad_norm": 0.5978807210922241, "learning_rate": 7.448434138863537e-08, "loss": 0.2264, "step": 53230 }, { "epoch": 0.9612051220939051, "grad_norm": 0.3395076394081116, "learning_rate": 7.413921465626806e-08, "loss": 0.2343, "step": 53235 }, { "epoch": 0.9612954015268058, "grad_norm": 0.3495229482650757, "learning_rate": 7.379488638723176e-08, "loss": 0.1622, "step": 53240 }, { "epoch": 0.9613856809597064, "grad_norm": 0.2799985706806183, "learning_rate": 7.345135660922765e-08, "loss": 0.2223, "step": 53245 }, { "epoch": 0.9614759603926072, "grad_norm": 0.6174278259277344, "learning_rate": 7.310862534988805e-08, "loss": 0.2237, "step": 53250 }, { "epoch": 0.9615662398255079, "grad_norm": 0.5111470222473145, "learning_rate": 7.276669263678316e-08, "loss": 0.2449, "step": 53255 }, { "epoch": 0.9616565192584087, "grad_norm": 0.20767705142498016, "learning_rate": 7.242555849742095e-08, "loss": 0.1808, "step": 53260 }, { "epoch": 0.9617467986913093, "grad_norm": 0.4902808964252472, "learning_rate": 7.20852229592417e-08, "loss": 0.1326, "step": 53265 }, { "epoch": 0.96183707812421, "grad_norm": 0.6132269501686096, "learning_rate": 7.17456860496235e-08, "loss": 0.1469, "step": 53270 }, { "epoch": 0.9619273575571108, "grad_norm": 0.3463001847267151, "learning_rate": 7.140694779588009e-08, "loss": 0.2011, "step": 53275 }, { "epoch": 0.9620176369900115, "grad_norm": 0.6098634004592896, "learning_rate": 7.106900822525964e-08, "loss": 0.1988, "step": 53280 }, { "epoch": 0.9621079164229122, "grad_norm": 0.2960973381996155, "learning_rate": 7.073186736494708e-08, "loss": 0.2279, "step": 53285 }, { "epoch": 0.9621981958558129, "grad_norm": 0.5420827269554138, "learning_rate": 7.039552524206183e-08, "loss": 0.2263, "step": 53290 }, { "epoch": 0.9622884752887136, "grad_norm": 0.5859010219573975, "learning_rate": 7.005998188366337e-08, "loss": 0.2199, "step": 53295 }, { "epoch": 0.9623787547216144, "grad_norm": 0.4214431941509247, "learning_rate": 6.972523731674008e-08, "loss": 0.2139, "step": 53300 }, { "epoch": 0.962469034154515, "grad_norm": 0.5375816226005554, "learning_rate": 6.939129156822267e-08, "loss": 0.2319, "step": 53305 }, { "epoch": 0.9625593135874158, "grad_norm": 0.3661133348941803, "learning_rate": 6.905814466497296e-08, "loss": 0.2255, "step": 53310 }, { "epoch": 0.9626495930203165, "grad_norm": 0.6833767890930176, "learning_rate": 6.872579663379065e-08, "loss": 0.2102, "step": 53315 }, { "epoch": 0.9627398724532172, "grad_norm": 0.34302306175231934, "learning_rate": 6.839424750140988e-08, "loss": 0.1961, "step": 53320 }, { "epoch": 0.9628301518861179, "grad_norm": 0.4643937051296234, "learning_rate": 6.806349729450268e-08, "loss": 0.1428, "step": 53325 }, { "epoch": 0.9629204313190186, "grad_norm": 0.890001654624939, "learning_rate": 6.773354603967552e-08, "loss": 0.2088, "step": 53330 }, { "epoch": 0.9630107107519194, "grad_norm": 0.37487348914146423, "learning_rate": 6.74043937634683e-08, "loss": 0.3113, "step": 53335 }, { "epoch": 0.9631009901848201, "grad_norm": 0.4764295220375061, "learning_rate": 6.707604049236316e-08, "loss": 0.1891, "step": 53340 }, { "epoch": 0.9631912696177207, "grad_norm": 0.16798852384090424, "learning_rate": 6.674848625276898e-08, "loss": 0.2206, "step": 53345 }, { "epoch": 0.9632815490506215, "grad_norm": 0.4167994260787964, "learning_rate": 6.64217310710391e-08, "loss": 0.1883, "step": 53350 }, { "epoch": 0.9633718284835222, "grad_norm": 0.5369573831558228, "learning_rate": 6.609577497345476e-08, "loss": 0.2549, "step": 53355 }, { "epoch": 0.963462107916423, "grad_norm": 0.4055411219596863, "learning_rate": 6.577061798624051e-08, "loss": 0.1853, "step": 53360 }, { "epoch": 0.9635523873493236, "grad_norm": 0.3651867210865021, "learning_rate": 6.544626013555099e-08, "loss": 0.1254, "step": 53365 }, { "epoch": 0.9636426667822243, "grad_norm": 0.2772580683231354, "learning_rate": 6.512270144747867e-08, "loss": 0.1921, "step": 53370 }, { "epoch": 0.9637329462151251, "grad_norm": 0.3850838840007782, "learning_rate": 6.47999419480505e-08, "loss": 0.2453, "step": 53375 }, { "epoch": 0.9638232256480258, "grad_norm": 0.4329755902290344, "learning_rate": 6.447798166323238e-08, "loss": 0.2601, "step": 53380 }, { "epoch": 0.9639135050809264, "grad_norm": 0.2568431496620178, "learning_rate": 6.415682061892248e-08, "loss": 0.1775, "step": 53385 }, { "epoch": 0.9640037845138272, "grad_norm": 0.7551835179328918, "learning_rate": 6.383645884095457e-08, "loss": 0.2182, "step": 53390 }, { "epoch": 0.9640940639467279, "grad_norm": 0.3586956560611725, "learning_rate": 6.351689635510139e-08, "loss": 0.2476, "step": 53395 }, { "epoch": 0.9641843433796287, "grad_norm": 0.48093101382255554, "learning_rate": 6.319813318706903e-08, "loss": 0.2034, "step": 53400 }, { "epoch": 0.9642746228125293, "grad_norm": 0.6688908338546753, "learning_rate": 6.288016936249808e-08, "loss": 0.256, "step": 53405 }, { "epoch": 0.96436490224543, "grad_norm": 0.26767441630363464, "learning_rate": 6.25630049069692e-08, "loss": 0.1174, "step": 53410 }, { "epoch": 0.9644551816783308, "grad_norm": 0.5681080222129822, "learning_rate": 6.224663984599422e-08, "loss": 0.2648, "step": 53415 }, { "epoch": 0.9645454611112315, "grad_norm": 0.19477705657482147, "learning_rate": 6.193107420502164e-08, "loss": 0.2603, "step": 53420 }, { "epoch": 0.9646357405441321, "grad_norm": 0.4034990668296814, "learning_rate": 6.161630800943786e-08, "loss": 0.2282, "step": 53425 }, { "epoch": 0.9647260199770329, "grad_norm": 0.49816352128982544, "learning_rate": 6.130234128456258e-08, "loss": 0.2564, "step": 53430 }, { "epoch": 0.9648162994099336, "grad_norm": 0.3927150070667267, "learning_rate": 6.098917405565341e-08, "loss": 0.1817, "step": 53435 }, { "epoch": 0.9649065788428344, "grad_norm": 0.512175977230072, "learning_rate": 6.067680634790129e-08, "loss": 0.1719, "step": 53440 }, { "epoch": 0.964996858275735, "grad_norm": 0.30420270562171936, "learning_rate": 6.036523818643502e-08, "loss": 0.2328, "step": 53445 }, { "epoch": 0.9650871377086357, "grad_norm": 0.8397376537322998, "learning_rate": 6.005446959631567e-08, "loss": 0.1231, "step": 53450 }, { "epoch": 0.9651774171415365, "grad_norm": 0.18549306690692902, "learning_rate": 5.974450060254433e-08, "loss": 0.1716, "step": 53455 }, { "epoch": 0.9652676965744372, "grad_norm": 0.35602420568466187, "learning_rate": 5.943533123005552e-08, "loss": 0.1806, "step": 53460 }, { "epoch": 0.9653579760073379, "grad_norm": 0.5836049318313599, "learning_rate": 5.9126961503720437e-08, "loss": 0.2974, "step": 53465 }, { "epoch": 0.9654482554402386, "grad_norm": 0.5807859897613525, "learning_rate": 5.881939144834259e-08, "loss": 0.2645, "step": 53470 }, { "epoch": 0.9655385348731393, "grad_norm": 0.4430500268936157, "learning_rate": 5.851262108866662e-08, "loss": 0.2112, "step": 53475 }, { "epoch": 0.9656288143060401, "grad_norm": 0.39009523391723633, "learning_rate": 5.820665044936724e-08, "loss": 0.1803, "step": 53480 }, { "epoch": 0.9657190937389407, "grad_norm": 0.44665607810020447, "learning_rate": 5.7901479555060314e-08, "loss": 0.2182, "step": 53485 }, { "epoch": 0.9658093731718415, "grad_norm": 0.3570907711982727, "learning_rate": 5.759710843029287e-08, "loss": 0.2087, "step": 53490 }, { "epoch": 0.9658996526047422, "grad_norm": 0.49240604043006897, "learning_rate": 5.729353709954977e-08, "loss": 0.2469, "step": 53495 }, { "epoch": 0.9659899320376429, "grad_norm": 0.744106113910675, "learning_rate": 5.699076558725036e-08, "loss": 0.3114, "step": 53500 }, { "epoch": 0.9660802114705437, "grad_norm": 0.5872280597686768, "learning_rate": 5.668879391775295e-08, "loss": 0.2079, "step": 53505 }, { "epoch": 0.9661704909034443, "grad_norm": 0.641364574432373, "learning_rate": 5.6387622115345877e-08, "loss": 0.2465, "step": 53510 }, { "epoch": 0.966260770336345, "grad_norm": 0.38412678241729736, "learning_rate": 5.608725020425754e-08, "loss": 0.2284, "step": 53515 }, { "epoch": 0.9663510497692458, "grad_norm": 0.3273337483406067, "learning_rate": 5.578767820865083e-08, "loss": 0.2677, "step": 53520 }, { "epoch": 0.9664413292021465, "grad_norm": 0.3032999634742737, "learning_rate": 5.548890615262426e-08, "loss": 0.2144, "step": 53525 }, { "epoch": 0.9665316086350472, "grad_norm": 0.40187644958496094, "learning_rate": 5.519093406021192e-08, "loss": 0.2244, "step": 53530 }, { "epoch": 0.9666218880679479, "grad_norm": 0.2785539925098419, "learning_rate": 5.489376195538243e-08, "loss": 0.2216, "step": 53535 }, { "epoch": 0.9667121675008487, "grad_norm": 0.38451331853866577, "learning_rate": 5.4597389862042216e-08, "loss": 0.2483, "step": 53540 }, { "epoch": 0.9668024469337494, "grad_norm": 0.41696181893348694, "learning_rate": 5.430181780403221e-08, "loss": 0.2812, "step": 53545 }, { "epoch": 0.96689272636665, "grad_norm": 0.3749529719352722, "learning_rate": 5.400704580512894e-08, "loss": 0.1698, "step": 53550 }, { "epoch": 0.9669830057995508, "grad_norm": 0.5485641956329346, "learning_rate": 5.371307388904345e-08, "loss": 0.299, "step": 53555 }, { "epoch": 0.9670732852324515, "grad_norm": 0.6841853857040405, "learning_rate": 5.341990207942571e-08, "loss": 0.2233, "step": 53560 }, { "epoch": 0.9671635646653522, "grad_norm": 0.6185765862464905, "learning_rate": 5.312753039985685e-08, "loss": 0.1981, "step": 53565 }, { "epoch": 0.9672538440982529, "grad_norm": 0.25791651010513306, "learning_rate": 5.283595887385917e-08, "loss": 0.1868, "step": 53570 }, { "epoch": 0.9673441235311536, "grad_norm": 0.34983909130096436, "learning_rate": 5.2545187524885024e-08, "loss": 0.25, "step": 53575 }, { "epoch": 0.9674344029640544, "grad_norm": 0.5966994166374207, "learning_rate": 5.225521637632569e-08, "loss": 0.3569, "step": 53580 }, { "epoch": 0.9675246823969551, "grad_norm": 0.2861376404762268, "learning_rate": 5.196604545150585e-08, "loss": 0.1452, "step": 53585 }, { "epoch": 0.9676149618298557, "grad_norm": 0.4372887909412384, "learning_rate": 5.1677674773690234e-08, "loss": 0.1444, "step": 53590 }, { "epoch": 0.9677052412627565, "grad_norm": 0.18914885818958282, "learning_rate": 5.1390104366072505e-08, "loss": 0.2534, "step": 53595 }, { "epoch": 0.9677955206956572, "grad_norm": 0.45735007524490356, "learning_rate": 5.110333425178971e-08, "loss": 0.2267, "step": 53600 }, { "epoch": 0.967885800128558, "grad_norm": 0.2701546251773834, "learning_rate": 5.0817364453906727e-08, "loss": 0.2206, "step": 53605 }, { "epoch": 0.9679760795614586, "grad_norm": 0.41877853870391846, "learning_rate": 5.053219499542961e-08, "loss": 0.183, "step": 53610 }, { "epoch": 0.9680663589943593, "grad_norm": 0.5866236686706543, "learning_rate": 5.0247825899297773e-08, "loss": 0.2052, "step": 53615 }, { "epoch": 0.9681566384272601, "grad_norm": 0.5392400622367859, "learning_rate": 4.9964257188386264e-08, "loss": 0.1449, "step": 53620 }, { "epoch": 0.9682469178601608, "grad_norm": 0.5776108503341675, "learning_rate": 4.9681488885506835e-08, "loss": 0.2987, "step": 53625 }, { "epoch": 0.9683371972930614, "grad_norm": 0.5677412748336792, "learning_rate": 4.939952101340573e-08, "loss": 0.2401, "step": 53630 }, { "epoch": 0.9684274767259622, "grad_norm": 0.5053970813751221, "learning_rate": 4.911835359476591e-08, "loss": 0.2752, "step": 53635 }, { "epoch": 0.9685177561588629, "grad_norm": 0.5509980916976929, "learning_rate": 4.883798665220374e-08, "loss": 0.2565, "step": 53640 }, { "epoch": 0.9686080355917637, "grad_norm": 0.30780690908432007, "learning_rate": 4.85584202082745e-08, "loss": 0.3142, "step": 53645 }, { "epoch": 0.9686983150246643, "grad_norm": 0.3872641623020172, "learning_rate": 4.827965428546577e-08, "loss": 0.1649, "step": 53650 }, { "epoch": 0.968788594457565, "grad_norm": 0.3725968301296234, "learning_rate": 4.800168890620294e-08, "loss": 0.2596, "step": 53655 }, { "epoch": 0.9688788738904658, "grad_norm": 0.6582515835762024, "learning_rate": 4.7724524092845895e-08, "loss": 0.2926, "step": 53660 }, { "epoch": 0.9689691533233665, "grad_norm": 0.48744910955429077, "learning_rate": 4.7448159867691244e-08, "loss": 0.1601, "step": 53665 }, { "epoch": 0.9690594327562672, "grad_norm": 0.3776699900627136, "learning_rate": 4.717259625297011e-08, "loss": 0.2596, "step": 53670 }, { "epoch": 0.9691497121891679, "grad_norm": 0.5383025407791138, "learning_rate": 4.689783327085029e-08, "loss": 0.2028, "step": 53675 }, { "epoch": 0.9692399916220686, "grad_norm": 0.48457786440849304, "learning_rate": 4.662387094343301e-08, "loss": 0.2108, "step": 53680 }, { "epoch": 0.9693302710549694, "grad_norm": 0.9042235016822815, "learning_rate": 4.6350709292756205e-08, "loss": 0.2879, "step": 53685 }, { "epoch": 0.96942055048787, "grad_norm": 0.5203196406364441, "learning_rate": 4.607834834079672e-08, "loss": 0.2075, "step": 53690 }, { "epoch": 0.9695108299207708, "grad_norm": 0.42074522376060486, "learning_rate": 4.5806788109460373e-08, "loss": 0.1693, "step": 53695 }, { "epoch": 0.9696011093536715, "grad_norm": 0.8473619818687439, "learning_rate": 4.553602862059525e-08, "loss": 0.2932, "step": 53700 }, { "epoch": 0.9696913887865722, "grad_norm": 0.6807908415794373, "learning_rate": 4.5266069895980594e-08, "loss": 0.2788, "step": 53705 }, { "epoch": 0.9697816682194729, "grad_norm": 0.30918651819229126, "learning_rate": 4.499691195733236e-08, "loss": 0.2647, "step": 53710 }, { "epoch": 0.9698719476523736, "grad_norm": 0.4880746603012085, "learning_rate": 4.472855482630212e-08, "loss": 0.2247, "step": 53715 }, { "epoch": 0.9699622270852744, "grad_norm": 0.30650821328163147, "learning_rate": 4.446099852447816e-08, "loss": 0.2894, "step": 53720 }, { "epoch": 0.9700525065181751, "grad_norm": 0.35868415236473083, "learning_rate": 4.419424307338327e-08, "loss": 0.2829, "step": 53725 }, { "epoch": 0.9701427859510757, "grad_norm": 0.6965757608413696, "learning_rate": 4.392828849447583e-08, "loss": 0.2363, "step": 53730 }, { "epoch": 0.9702330653839765, "grad_norm": 0.5765778422355652, "learning_rate": 4.366313480914986e-08, "loss": 0.2097, "step": 53735 }, { "epoch": 0.9703233448168772, "grad_norm": 0.3773828446865082, "learning_rate": 4.339878203873604e-08, "loss": 0.2333, "step": 53740 }, { "epoch": 0.970413624249778, "grad_norm": 0.457940012216568, "learning_rate": 4.3135230204497394e-08, "loss": 0.2028, "step": 53745 }, { "epoch": 0.9705039036826786, "grad_norm": 0.471177339553833, "learning_rate": 4.287247932763694e-08, "loss": 0.1695, "step": 53750 }, { "epoch": 0.9705941831155793, "grad_norm": 0.5329426527023315, "learning_rate": 4.2610529429290005e-08, "loss": 0.2217, "step": 53755 }, { "epoch": 0.9706844625484801, "grad_norm": 0.45549896359443665, "learning_rate": 4.234938053052862e-08, "loss": 0.2349, "step": 53760 }, { "epoch": 0.9707747419813808, "grad_norm": 0.532945990562439, "learning_rate": 4.2089032652360415e-08, "loss": 0.2345, "step": 53765 }, { "epoch": 0.9708650214142814, "grad_norm": 0.41036728024482727, "learning_rate": 4.182948581572866e-08, "loss": 0.2363, "step": 53770 }, { "epoch": 0.9709553008471822, "grad_norm": 0.4287985861301422, "learning_rate": 4.157074004151107e-08, "loss": 0.1736, "step": 53775 }, { "epoch": 0.9710455802800829, "grad_norm": 0.4294351637363434, "learning_rate": 4.131279535052435e-08, "loss": 0.1674, "step": 53780 }, { "epoch": 0.9711358597129837, "grad_norm": 0.3271455764770508, "learning_rate": 4.1055651763515226e-08, "loss": 0.2346, "step": 53785 }, { "epoch": 0.9712261391458843, "grad_norm": 0.44987601041793823, "learning_rate": 4.079930930117049e-08, "loss": 0.2933, "step": 53790 }, { "epoch": 0.971316418578785, "grad_norm": 0.5628604888916016, "learning_rate": 4.0543767984111417e-08, "loss": 0.1721, "step": 53795 }, { "epoch": 0.9714066980116858, "grad_norm": 0.46557164192199707, "learning_rate": 4.028902783289379e-08, "loss": 0.237, "step": 53800 }, { "epoch": 0.9714969774445865, "grad_norm": 0.4409957230091095, "learning_rate": 4.003508886801011e-08, "loss": 0.2053, "step": 53805 }, { "epoch": 0.9715872568774871, "grad_norm": 0.5898836851119995, "learning_rate": 3.9781951109887364e-08, "loss": 0.1727, "step": 53810 }, { "epoch": 0.9716775363103879, "grad_norm": 0.47835275530815125, "learning_rate": 3.952961457888815e-08, "loss": 0.1725, "step": 53815 }, { "epoch": 0.9717678157432886, "grad_norm": 0.46880072355270386, "learning_rate": 3.92780792953118e-08, "loss": 0.2866, "step": 53820 }, { "epoch": 0.9718580951761894, "grad_norm": 0.14378659427165985, "learning_rate": 3.902734527939434e-08, "loss": 0.1799, "step": 53825 }, { "epoch": 0.97194837460909, "grad_norm": 0.4346258342266083, "learning_rate": 3.877741255130185e-08, "loss": 0.2799, "step": 53830 }, { "epoch": 0.9720386540419907, "grad_norm": 0.2553173303604126, "learning_rate": 3.852828113114271e-08, "loss": 0.1726, "step": 53835 }, { "epoch": 0.9721289334748915, "grad_norm": 0.33480632305145264, "learning_rate": 3.827995103895532e-08, "loss": 0.2129, "step": 53840 }, { "epoch": 0.9722192129077922, "grad_norm": 0.5246515870094299, "learning_rate": 3.8032422294718154e-08, "loss": 0.2672, "step": 53845 }, { "epoch": 0.9723094923406929, "grad_norm": 0.12980012595653534, "learning_rate": 3.778569491834083e-08, "loss": 0.1648, "step": 53850 }, { "epoch": 0.9723997717735936, "grad_norm": 0.38328951597213745, "learning_rate": 3.753976892967304e-08, "loss": 0.3238, "step": 53855 }, { "epoch": 0.9724900512064943, "grad_norm": 0.5784655809402466, "learning_rate": 3.729464434849672e-08, "loss": 0.1323, "step": 53860 }, { "epoch": 0.9725803306393951, "grad_norm": 0.38006168603897095, "learning_rate": 3.705032119453056e-08, "loss": 0.2283, "step": 53865 }, { "epoch": 0.9726706100722957, "grad_norm": 0.6768224239349365, "learning_rate": 3.6806799487427716e-08, "loss": 0.1879, "step": 53870 }, { "epoch": 0.9727608895051965, "grad_norm": 0.42994022369384766, "learning_rate": 3.656407924677918e-08, "loss": 0.1891, "step": 53875 }, { "epoch": 0.9728511689380972, "grad_norm": 0.824230968952179, "learning_rate": 3.632216049210824e-08, "loss": 0.2687, "step": 53880 }, { "epoch": 0.9729414483709979, "grad_norm": 0.1999301314353943, "learning_rate": 3.608104324287709e-08, "loss": 0.2878, "step": 53885 }, { "epoch": 0.9730317278038986, "grad_norm": 0.14496801793575287, "learning_rate": 3.584072751848133e-08, "loss": 0.2171, "step": 53890 }, { "epoch": 0.9731220072367993, "grad_norm": 0.4517415761947632, "learning_rate": 3.5601213338253284e-08, "loss": 0.2225, "step": 53895 }, { "epoch": 0.9732122866697, "grad_norm": 0.32180681824684143, "learning_rate": 3.5362500721458635e-08, "loss": 0.294, "step": 53900 }, { "epoch": 0.9733025661026008, "grad_norm": 0.3482528328895569, "learning_rate": 3.5124589687300926e-08, "loss": 0.2145, "step": 53905 }, { "epoch": 0.9733928455355015, "grad_norm": 0.2886830270290375, "learning_rate": 3.4887480254919285e-08, "loss": 0.1946, "step": 53910 }, { "epoch": 0.9734831249684022, "grad_norm": 0.6015817523002625, "learning_rate": 3.465117244338512e-08, "loss": 0.1654, "step": 53915 }, { "epoch": 0.9735734044013029, "grad_norm": 0.5138819813728333, "learning_rate": 3.4415666271709893e-08, "loss": 0.2839, "step": 53920 }, { "epoch": 0.9736636838342037, "grad_norm": 0.39462965726852417, "learning_rate": 3.418096175883734e-08, "loss": 0.165, "step": 53925 }, { "epoch": 0.9737539632671044, "grad_norm": 0.2814905643463135, "learning_rate": 3.3947058923647916e-08, "loss": 0.1518, "step": 53930 }, { "epoch": 0.973844242700005, "grad_norm": 0.556002140045166, "learning_rate": 3.3713957784956566e-08, "loss": 0.238, "step": 53935 }, { "epoch": 0.9739345221329058, "grad_norm": 0.4952412247657776, "learning_rate": 3.3481658361517174e-08, "loss": 0.1439, "step": 53940 }, { "epoch": 0.9740248015658065, "grad_norm": 0.3634929656982422, "learning_rate": 3.32501606720137e-08, "loss": 0.1923, "step": 53945 }, { "epoch": 0.9741150809987073, "grad_norm": 0.5335572361946106, "learning_rate": 3.3019464735069004e-08, "loss": 0.2511, "step": 53950 }, { "epoch": 0.9742053604316079, "grad_norm": 0.732477605342865, "learning_rate": 3.278957056924159e-08, "loss": 0.1523, "step": 53955 }, { "epoch": 0.9742956398645086, "grad_norm": 0.823394238948822, "learning_rate": 3.256047819302555e-08, "loss": 0.1512, "step": 53960 }, { "epoch": 0.9743859192974094, "grad_norm": 0.2745742201805115, "learning_rate": 3.2332187624848355e-08, "loss": 0.2163, "step": 53965 }, { "epoch": 0.9744761987303101, "grad_norm": 0.20988920331001282, "learning_rate": 3.210469888307422e-08, "loss": 0.2594, "step": 53970 }, { "epoch": 0.9745664781632107, "grad_norm": 0.43002596497535706, "learning_rate": 3.1878011986004044e-08, "loss": 0.1908, "step": 53975 }, { "epoch": 0.9746567575961115, "grad_norm": 0.2920987904071808, "learning_rate": 3.165212695187214e-08, "loss": 0.2134, "step": 53980 }, { "epoch": 0.9747470370290122, "grad_norm": 0.4996824860572815, "learning_rate": 3.1427043798849536e-08, "loss": 0.2194, "step": 53985 }, { "epoch": 0.974837316461913, "grad_norm": 0.35674232244491577, "learning_rate": 3.120276254504395e-08, "loss": 0.2013, "step": 53990 }, { "epoch": 0.9749275958948136, "grad_norm": 0.9280580878257751, "learning_rate": 3.0979283208495416e-08, "loss": 0.2114, "step": 53995 }, { "epoch": 0.9750178753277143, "grad_norm": 0.6595668792724609, "learning_rate": 3.075660580718065e-08, "loss": 0.2647, "step": 54000 }, { "epoch": 0.9751081547606151, "grad_norm": 0.45414796471595764, "learning_rate": 3.053473035901422e-08, "loss": 0.2197, "step": 54005 }, { "epoch": 0.9751984341935158, "grad_norm": 0.702100932598114, "learning_rate": 3.031365688184407e-08, "loss": 0.3289, "step": 54010 }, { "epoch": 0.9752887136264164, "grad_norm": 0.2315141260623932, "learning_rate": 3.009338539345374e-08, "loss": 0.1687, "step": 54015 }, { "epoch": 0.9753789930593172, "grad_norm": 0.4079658091068268, "learning_rate": 2.98739159115613e-08, "loss": 0.1738, "step": 54020 }, { "epoch": 0.9754692724922179, "grad_norm": 0.35546764731407166, "learning_rate": 2.9655248453823726e-08, "loss": 0.1492, "step": 54025 }, { "epoch": 0.9755595519251187, "grad_norm": 0.3246750831604004, "learning_rate": 2.943738303782917e-08, "loss": 0.1771, "step": 54030 }, { "epoch": 0.9756498313580193, "grad_norm": 0.5031123161315918, "learning_rate": 2.9220319681104724e-08, "loss": 0.1374, "step": 54035 }, { "epoch": 0.97574011079092, "grad_norm": 0.34943029284477234, "learning_rate": 2.900405840111087e-08, "loss": 0.2455, "step": 54040 }, { "epoch": 0.9758303902238208, "grad_norm": 0.2979680895805359, "learning_rate": 2.8788599215243683e-08, "loss": 0.1988, "step": 54045 }, { "epoch": 0.9759206696567215, "grad_norm": 0.838577151298523, "learning_rate": 2.8573942140837084e-08, "loss": 0.25, "step": 54050 }, { "epoch": 0.9760109490896222, "grad_norm": 0.37349367141723633, "learning_rate": 2.8360087195156148e-08, "loss": 0.1397, "step": 54055 }, { "epoch": 0.9761012285225229, "grad_norm": 0.2899876534938812, "learning_rate": 2.8147034395406003e-08, "loss": 0.2271, "step": 54060 }, { "epoch": 0.9761915079554236, "grad_norm": 0.7934709787368774, "learning_rate": 2.793478375872516e-08, "loss": 0.2183, "step": 54065 }, { "epoch": 0.9762817873883244, "grad_norm": 0.4726581871509552, "learning_rate": 2.7723335302186628e-08, "loss": 0.1587, "step": 54070 }, { "epoch": 0.976372066821225, "grad_norm": 0.39323365688323975, "learning_rate": 2.7512689042800134e-08, "loss": 0.3155, "step": 54075 }, { "epoch": 0.9764623462541258, "grad_norm": 0.4772615134716034, "learning_rate": 2.7302844997511013e-08, "loss": 0.2656, "step": 54080 }, { "epoch": 0.9765526256870265, "grad_norm": 0.29938217997550964, "learning_rate": 2.7093803183199096e-08, "loss": 0.2542, "step": 54085 }, { "epoch": 0.9766429051199272, "grad_norm": 0.5492769479751587, "learning_rate": 2.6885563616682042e-08, "loss": 0.2459, "step": 54090 }, { "epoch": 0.9767331845528279, "grad_norm": 0.7767098546028137, "learning_rate": 2.6678126314708675e-08, "loss": 0.2211, "step": 54095 }, { "epoch": 0.9768234639857286, "grad_norm": 0.5957434177398682, "learning_rate": 2.6471491293966755e-08, "loss": 0.2162, "step": 54100 }, { "epoch": 0.9769137434186294, "grad_norm": 0.4204747676849365, "learning_rate": 2.6265658571079654e-08, "loss": 0.2596, "step": 54105 }, { "epoch": 0.9770040228515301, "grad_norm": 0.5886346101760864, "learning_rate": 2.606062816260413e-08, "loss": 0.3572, "step": 54110 }, { "epoch": 0.9770943022844307, "grad_norm": 0.45081228017807007, "learning_rate": 2.585640008503254e-08, "loss": 0.2742, "step": 54115 }, { "epoch": 0.9771845817173315, "grad_norm": 0.6001715064048767, "learning_rate": 2.565297435479508e-08, "loss": 0.1913, "step": 54120 }, { "epoch": 0.9772748611502322, "grad_norm": 0.479575514793396, "learning_rate": 2.5450350988255325e-08, "loss": 0.2184, "step": 54125 }, { "epoch": 0.977365140583133, "grad_norm": 1.1889973878860474, "learning_rate": 2.5248530001713567e-08, "loss": 0.2626, "step": 54130 }, { "epoch": 0.9774554200160336, "grad_norm": 0.4684038758277893, "learning_rate": 2.5047511411403492e-08, "loss": 0.3636, "step": 54135 }, { "epoch": 0.9775456994489343, "grad_norm": 0.6237125992774963, "learning_rate": 2.4847295233495493e-08, "loss": 0.2402, "step": 54140 }, { "epoch": 0.9776359788818351, "grad_norm": 0.4272877871990204, "learning_rate": 2.46478814840978e-08, "loss": 0.2284, "step": 54145 }, { "epoch": 0.9777262583147358, "grad_norm": 0.5578311681747437, "learning_rate": 2.4449270179248695e-08, "loss": 0.2594, "step": 54150 }, { "epoch": 0.9778165377476364, "grad_norm": 0.9206238389015198, "learning_rate": 2.425146133492762e-08, "loss": 0.2056, "step": 54155 }, { "epoch": 0.9779068171805372, "grad_norm": 0.6731491088867188, "learning_rate": 2.4054454967045172e-08, "loss": 0.3118, "step": 54160 }, { "epoch": 0.9779970966134379, "grad_norm": 0.5745200514793396, "learning_rate": 2.385825109144979e-08, "loss": 0.1717, "step": 54165 }, { "epoch": 0.9780873760463387, "grad_norm": 0.25734376907348633, "learning_rate": 2.3662849723924407e-08, "loss": 0.2262, "step": 54170 }, { "epoch": 0.9781776554792393, "grad_norm": 0.39289623498916626, "learning_rate": 2.346825088018867e-08, "loss": 0.2184, "step": 54175 }, { "epoch": 0.97826793491214, "grad_norm": 0.43563422560691833, "learning_rate": 2.32744545758945e-08, "loss": 0.2393, "step": 54180 }, { "epoch": 0.9783582143450408, "grad_norm": 0.34247323870658875, "learning_rate": 2.3081460826633874e-08, "loss": 0.2528, "step": 54185 }, { "epoch": 0.9784484937779415, "grad_norm": 0.25040972232818604, "learning_rate": 2.2889269647929922e-08, "loss": 0.2516, "step": 54190 }, { "epoch": 0.9785387732108421, "grad_norm": 0.3073778450489044, "learning_rate": 2.269788105524362e-08, "loss": 0.2036, "step": 54195 }, { "epoch": 0.9786290526437429, "grad_norm": 0.29352474212646484, "learning_rate": 2.2507295063970425e-08, "loss": 0.1861, "step": 54200 }, { "epoch": 0.9787193320766436, "grad_norm": 0.4617655575275421, "learning_rate": 2.231751168944252e-08, "loss": 0.2416, "step": 54205 }, { "epoch": 0.9788096115095444, "grad_norm": 0.32537949085235596, "learning_rate": 2.2128530946925463e-08, "loss": 0.2118, "step": 54210 }, { "epoch": 0.978899890942445, "grad_norm": 0.3557182252407074, "learning_rate": 2.1940352851622658e-08, "loss": 0.2294, "step": 54215 }, { "epoch": 0.9789901703753457, "grad_norm": 0.7561355829238892, "learning_rate": 2.1752977418670885e-08, "loss": 0.2576, "step": 54220 }, { "epoch": 0.9790804498082465, "grad_norm": 0.23972244560718536, "learning_rate": 2.1566404663142527e-08, "loss": 0.2196, "step": 54225 }, { "epoch": 0.9791707292411472, "grad_norm": 0.32039886713027954, "learning_rate": 2.138063460004669e-08, "loss": 0.2188, "step": 54230 }, { "epoch": 0.9792610086740479, "grad_norm": 0.5600818395614624, "learning_rate": 2.119566724432809e-08, "loss": 0.2569, "step": 54235 }, { "epoch": 0.9793512881069486, "grad_norm": 0.6730336546897888, "learning_rate": 2.101150261086482e-08, "loss": 0.2534, "step": 54240 }, { "epoch": 0.9794415675398493, "grad_norm": 0.3314119577407837, "learning_rate": 2.08281407144717e-08, "loss": 0.2092, "step": 54245 }, { "epoch": 0.9795318469727501, "grad_norm": 0.36345618963241577, "learning_rate": 2.0645581569899153e-08, "loss": 0.1609, "step": 54250 }, { "epoch": 0.9796221264056507, "grad_norm": 0.5022575259208679, "learning_rate": 2.0463825191833207e-08, "loss": 0.2596, "step": 54255 }, { "epoch": 0.9797124058385515, "grad_norm": 0.45627039670944214, "learning_rate": 2.0282871594893284e-08, "loss": 0.2048, "step": 54260 }, { "epoch": 0.9798026852714522, "grad_norm": 0.37281811237335205, "learning_rate": 2.0102720793637732e-08, "loss": 0.1054, "step": 54265 }, { "epoch": 0.9798929647043529, "grad_norm": 0.4168417751789093, "learning_rate": 1.9923372802556073e-08, "loss": 0.2717, "step": 54270 }, { "epoch": 0.9799832441372536, "grad_norm": 0.5583621263504028, "learning_rate": 1.974482763607788e-08, "loss": 0.176, "step": 54275 }, { "epoch": 0.9800735235701543, "grad_norm": 0.6229156255722046, "learning_rate": 1.9567085308564994e-08, "loss": 0.2551, "step": 54280 }, { "epoch": 0.9801638030030551, "grad_norm": 0.5340616106987, "learning_rate": 1.9390145834314867e-08, "loss": 0.2462, "step": 54285 }, { "epoch": 0.9802540824359558, "grad_norm": 0.5986381769180298, "learning_rate": 1.921400922756167e-08, "loss": 0.2074, "step": 54290 }, { "epoch": 0.9803443618688565, "grad_norm": 0.6956779360771179, "learning_rate": 1.9038675502474068e-08, "loss": 0.2119, "step": 54295 }, { "epoch": 0.9804346413017572, "grad_norm": 0.5290546417236328, "learning_rate": 1.8864144673156336e-08, "loss": 0.2377, "step": 54300 }, { "epoch": 0.9805249207346579, "grad_norm": 0.6634647846221924, "learning_rate": 1.8690416753648356e-08, "loss": 0.1975, "step": 54305 }, { "epoch": 0.9806152001675587, "grad_norm": 0.5973348617553711, "learning_rate": 1.851749175792561e-08, "loss": 0.28, "step": 54310 }, { "epoch": 0.9807054796004594, "grad_norm": 0.3736438453197479, "learning_rate": 1.8345369699898085e-08, "loss": 0.2068, "step": 54315 }, { "epoch": 0.98079575903336, "grad_norm": 0.5507653951644897, "learning_rate": 1.8174050593411375e-08, "loss": 0.1457, "step": 54320 }, { "epoch": 0.9808860384662608, "grad_norm": 0.3628789484500885, "learning_rate": 1.8003534452247786e-08, "loss": 0.2868, "step": 54325 }, { "epoch": 0.9809763178991615, "grad_norm": 0.39264750480651855, "learning_rate": 1.783382129012412e-08, "loss": 0.1769, "step": 54330 }, { "epoch": 0.9810665973320623, "grad_norm": 0.4039103090763092, "learning_rate": 1.7664911120691684e-08, "loss": 0.2148, "step": 54335 }, { "epoch": 0.9811568767649629, "grad_norm": 0.19867925345897675, "learning_rate": 1.74968039575385e-08, "loss": 0.2144, "step": 54340 }, { "epoch": 0.9812471561978636, "grad_norm": 0.3533666431903839, "learning_rate": 1.7329499814187077e-08, "loss": 0.2703, "step": 54345 }, { "epoch": 0.9813374356307644, "grad_norm": 0.5271742939949036, "learning_rate": 1.7162998704097768e-08, "loss": 0.2183, "step": 54350 }, { "epoch": 0.9814277150636651, "grad_norm": 0.2934906780719757, "learning_rate": 1.6997300640662075e-08, "loss": 0.1751, "step": 54355 }, { "epoch": 0.9815179944965657, "grad_norm": 0.3343862295150757, "learning_rate": 1.6832405637210447e-08, "loss": 0.262, "step": 54360 }, { "epoch": 0.9816082739294665, "grad_norm": 0.1872626692056656, "learning_rate": 1.6668313707006723e-08, "loss": 0.2004, "step": 54365 }, { "epoch": 0.9816985533623672, "grad_norm": 0.4585855305194855, "learning_rate": 1.650502486325145e-08, "loss": 0.2252, "step": 54370 }, { "epoch": 0.981788832795268, "grad_norm": 0.6149754524230957, "learning_rate": 1.634253911908079e-08, "loss": 0.2997, "step": 54375 }, { "epoch": 0.9818791122281686, "grad_norm": 0.3708350956439972, "learning_rate": 1.618085648756318e-08, "loss": 0.2633, "step": 54380 }, { "epoch": 0.9819693916610693, "grad_norm": 0.43999844789505005, "learning_rate": 1.6019976981707096e-08, "loss": 0.3044, "step": 54385 }, { "epoch": 0.9820596710939701, "grad_norm": 0.4915013909339905, "learning_rate": 1.5859900614453306e-08, "loss": 0.1663, "step": 54390 }, { "epoch": 0.9821499505268708, "grad_norm": 0.6268812417984009, "learning_rate": 1.570062739867817e-08, "loss": 0.357, "step": 54395 }, { "epoch": 0.9822402299597714, "grad_norm": 0.6899821162223816, "learning_rate": 1.5542157347195885e-08, "loss": 0.2998, "step": 54400 }, { "epoch": 0.9823305093926722, "grad_norm": 0.2525346577167511, "learning_rate": 1.5384490472751813e-08, "loss": 0.2131, "step": 54405 }, { "epoch": 0.9824207888255729, "grad_norm": 0.601879358291626, "learning_rate": 1.5227626788031358e-08, "loss": 0.1923, "step": 54410 }, { "epoch": 0.9825110682584737, "grad_norm": 0.6370570659637451, "learning_rate": 1.5071566305651096e-08, "loss": 0.1979, "step": 54415 }, { "epoch": 0.9826013476913743, "grad_norm": 0.44733312726020813, "learning_rate": 1.4916309038166542e-08, "loss": 0.2806, "step": 54420 }, { "epoch": 0.982691627124275, "grad_norm": 0.4717170000076294, "learning_rate": 1.4761854998065483e-08, "loss": 0.1165, "step": 54425 }, { "epoch": 0.9827819065571758, "grad_norm": 0.39846286177635193, "learning_rate": 1.4608204197774644e-08, "loss": 0.2344, "step": 54430 }, { "epoch": 0.9828721859900765, "grad_norm": 0.760108232498169, "learning_rate": 1.4455356649651919e-08, "loss": 0.2154, "step": 54435 }, { "epoch": 0.9829624654229772, "grad_norm": 0.3395760655403137, "learning_rate": 1.4303312365995248e-08, "loss": 0.1984, "step": 54440 }, { "epoch": 0.9830527448558779, "grad_norm": 0.5740180611610413, "learning_rate": 1.4152071359032626e-08, "loss": 0.0967, "step": 54445 }, { "epoch": 0.9831430242887786, "grad_norm": 0.21744437515735626, "learning_rate": 1.40016336409321e-08, "loss": 0.2766, "step": 54450 }, { "epoch": 0.9832333037216794, "grad_norm": 0.4723064601421356, "learning_rate": 1.3851999223795098e-08, "loss": 0.253, "step": 54455 }, { "epoch": 0.98332358315458, "grad_norm": 0.6886090636253357, "learning_rate": 1.3703168119658661e-08, "loss": 0.1891, "step": 54460 }, { "epoch": 0.9834138625874808, "grad_norm": 0.6787614226341248, "learning_rate": 1.3555140340494322e-08, "loss": 0.1867, "step": 54465 }, { "epoch": 0.9835041420203815, "grad_norm": 0.42908579111099243, "learning_rate": 1.3407915898211443e-08, "loss": 0.1815, "step": 54470 }, { "epoch": 0.9835944214532822, "grad_norm": 0.4250125288963318, "learning_rate": 1.3261494804651664e-08, "loss": 0.2078, "step": 54475 }, { "epoch": 0.9836847008861829, "grad_norm": 0.5903279185295105, "learning_rate": 1.3115877071593341e-08, "loss": 0.2514, "step": 54480 }, { "epoch": 0.9837749803190836, "grad_norm": 0.3856804072856903, "learning_rate": 1.2971062710751547e-08, "loss": 0.2237, "step": 54485 }, { "epoch": 0.9838652597519844, "grad_norm": 0.3865649104118347, "learning_rate": 1.2827051733775852e-08, "loss": 0.2022, "step": 54490 }, { "epoch": 0.9839555391848851, "grad_norm": 0.4912815988063812, "learning_rate": 1.2683844152249214e-08, "loss": 0.1831, "step": 54495 }, { "epoch": 0.9840458186177857, "grad_norm": 0.3872782289981842, "learning_rate": 1.2541439977692415e-08, "loss": 0.2566, "step": 54500 }, { "epoch": 0.9841360980506865, "grad_norm": 0.6470474600791931, "learning_rate": 1.239983922156185e-08, "loss": 0.2493, "step": 54505 }, { "epoch": 0.9842263774835872, "grad_norm": 0.3223435580730438, "learning_rate": 1.2259041895246182e-08, "loss": 0.1579, "step": 54510 }, { "epoch": 0.984316656916488, "grad_norm": 0.07871793955564499, "learning_rate": 1.211904801007302e-08, "loss": 0.182, "step": 54515 }, { "epoch": 0.9844069363493886, "grad_norm": 0.6256330013275146, "learning_rate": 1.1979857577304465e-08, "loss": 0.2576, "step": 54520 }, { "epoch": 0.9844972157822893, "grad_norm": 0.44302675127983093, "learning_rate": 1.1841470608134897e-08, "loss": 0.1846, "step": 54525 }, { "epoch": 0.9845874952151901, "grad_norm": 0.5845201015472412, "learning_rate": 1.1703887113698742e-08, "loss": 0.2834, "step": 54530 }, { "epoch": 0.9846777746480908, "grad_norm": 0.3106643557548523, "learning_rate": 1.1567107105063812e-08, "loss": 0.1771, "step": 54535 }, { "epoch": 0.9847680540809914, "grad_norm": 0.656623899936676, "learning_rate": 1.143113059323131e-08, "loss": 0.1233, "step": 54540 }, { "epoch": 0.9848583335138922, "grad_norm": 0.6997153162956238, "learning_rate": 1.129595758914026e-08, "loss": 0.346, "step": 54545 }, { "epoch": 0.9849486129467929, "grad_norm": 0.3024485409259796, "learning_rate": 1.1161588103665299e-08, "loss": 0.2499, "step": 54550 }, { "epoch": 0.9850388923796937, "grad_norm": 0.49105653166770935, "learning_rate": 1.1028022147614448e-08, "loss": 0.2373, "step": 54555 }, { "epoch": 0.9851291718125943, "grad_norm": 0.3709699213504791, "learning_rate": 1.0895259731731334e-08, "loss": 0.1944, "step": 54560 }, { "epoch": 0.985219451245495, "grad_norm": 0.4455859661102295, "learning_rate": 1.0763300866698523e-08, "loss": 0.2043, "step": 54565 }, { "epoch": 0.9853097306783958, "grad_norm": 0.5328179597854614, "learning_rate": 1.0632145563128638e-08, "loss": 0.2008, "step": 54570 }, { "epoch": 0.9854000101112965, "grad_norm": 0.5087750554084778, "learning_rate": 1.0501793831572127e-08, "loss": 0.2165, "step": 54575 }, { "epoch": 0.9854902895441972, "grad_norm": 0.4027363657951355, "learning_rate": 1.0372245682516157e-08, "loss": 0.1802, "step": 54580 }, { "epoch": 0.9855805689770979, "grad_norm": 0.47149139642715454, "learning_rate": 1.0243501126381283e-08, "loss": 0.2732, "step": 54585 }, { "epoch": 0.9856708484099986, "grad_norm": 0.4707593619823456, "learning_rate": 1.0115560173524774e-08, "loss": 0.1656, "step": 54590 }, { "epoch": 0.9857611278428994, "grad_norm": 1.3330811262130737, "learning_rate": 9.988422834237287e-09, "loss": 0.2148, "step": 54595 }, { "epoch": 0.9858514072758, "grad_norm": 0.5004041194915771, "learning_rate": 9.862089118747309e-09, "loss": 0.2962, "step": 54600 }, { "epoch": 0.9859416867087007, "grad_norm": 0.3979488015174866, "learning_rate": 9.736559037216708e-09, "loss": 0.2138, "step": 54605 }, { "epoch": 0.9860319661416015, "grad_norm": 0.41310417652130127, "learning_rate": 9.611832599744075e-09, "loss": 0.1875, "step": 54610 }, { "epoch": 0.9861222455745022, "grad_norm": 0.39090973138809204, "learning_rate": 9.487909816361384e-09, "loss": 0.181, "step": 54615 }, { "epoch": 0.9862125250074029, "grad_norm": 0.6974276900291443, "learning_rate": 9.36479069703955e-09, "loss": 0.173, "step": 54620 }, { "epoch": 0.9863028044403036, "grad_norm": 0.3585265874862671, "learning_rate": 9.24247525168065e-09, "loss": 0.2758, "step": 54625 }, { "epoch": 0.9863930838732043, "grad_norm": 0.6234022974967957, "learning_rate": 9.12096349012459e-09, "loss": 0.3286, "step": 54630 }, { "epoch": 0.9864833633061051, "grad_norm": 0.3395076394081116, "learning_rate": 9.000255422146886e-09, "loss": 0.201, "step": 54635 }, { "epoch": 0.9865736427390057, "grad_norm": 0.24648065865039825, "learning_rate": 8.880351057456437e-09, "loss": 0.1741, "step": 54640 }, { "epoch": 0.9866639221719065, "grad_norm": 0.3816898763179779, "learning_rate": 8.761250405699973e-09, "loss": 0.1558, "step": 54645 }, { "epoch": 0.9867542016048072, "grad_norm": 0.6216617226600647, "learning_rate": 8.642953476457604e-09, "loss": 0.228, "step": 54650 }, { "epoch": 0.9868444810377079, "grad_norm": 0.4171766936779022, "learning_rate": 8.525460279245057e-09, "loss": 0.2411, "step": 54655 }, { "epoch": 0.9869347604706086, "grad_norm": 0.17013856768608093, "learning_rate": 8.408770823513657e-09, "loss": 0.1842, "step": 54660 }, { "epoch": 0.9870250399035093, "grad_norm": 0.7724552154541016, "learning_rate": 8.29288511865145e-09, "loss": 0.1417, "step": 54665 }, { "epoch": 0.9871153193364101, "grad_norm": 0.2612602412700653, "learning_rate": 8.17780317397987e-09, "loss": 0.2186, "step": 54670 }, { "epoch": 0.9872055987693108, "grad_norm": 0.43372562527656555, "learning_rate": 8.063524998755956e-09, "loss": 0.2414, "step": 54675 }, { "epoch": 0.9872958782022114, "grad_norm": 0.6091335415840149, "learning_rate": 7.950050602173465e-09, "loss": 0.2543, "step": 54680 }, { "epoch": 0.9873861576351122, "grad_norm": 0.4017021059989929, "learning_rate": 7.83737999336065e-09, "loss": 0.2791, "step": 54685 }, { "epoch": 0.9874764370680129, "grad_norm": 0.584742546081543, "learning_rate": 7.725513181380262e-09, "loss": 0.274, "step": 54690 }, { "epoch": 0.9875667165009137, "grad_norm": 0.38122114539146423, "learning_rate": 7.614450175230658e-09, "loss": 0.1558, "step": 54695 }, { "epoch": 0.9876569959338144, "grad_norm": 0.4123266637325287, "learning_rate": 7.504190983848025e-09, "loss": 0.2442, "step": 54700 }, { "epoch": 0.987747275366715, "grad_norm": 0.29697564244270325, "learning_rate": 7.394735616099713e-09, "loss": 0.2092, "step": 54705 }, { "epoch": 0.9878375547996158, "grad_norm": 0.3298875391483307, "learning_rate": 7.286084080793121e-09, "loss": 0.2783, "step": 54710 }, { "epoch": 0.9879278342325165, "grad_norm": 0.6056180596351624, "learning_rate": 7.1782363866657045e-09, "loss": 0.201, "step": 54715 }, { "epoch": 0.9880181136654173, "grad_norm": 0.8074268102645874, "learning_rate": 7.071192542394967e-09, "loss": 0.2275, "step": 54720 }, { "epoch": 0.9881083930983179, "grad_norm": 0.6272867918014526, "learning_rate": 6.964952556590687e-09, "loss": 0.2298, "step": 54725 }, { "epoch": 0.9881986725312186, "grad_norm": 0.3642462491989136, "learning_rate": 6.859516437799363e-09, "loss": 0.2761, "step": 54730 }, { "epoch": 0.9882889519641194, "grad_norm": 0.52139812707901, "learning_rate": 6.754884194503097e-09, "loss": 0.2101, "step": 54735 }, { "epoch": 0.9883792313970201, "grad_norm": 0.5058034062385559, "learning_rate": 6.651055835118492e-09, "loss": 0.1981, "step": 54740 }, { "epoch": 0.9884695108299207, "grad_norm": 0.38746005296707153, "learning_rate": 6.548031367998864e-09, "loss": 0.188, "step": 54745 }, { "epoch": 0.9885597902628215, "grad_norm": 0.2977273762226105, "learning_rate": 6.445810801428698e-09, "loss": 0.1936, "step": 54750 }, { "epoch": 0.9886500696957222, "grad_norm": 0.47060200572013855, "learning_rate": 6.344394143634747e-09, "loss": 0.1698, "step": 54755 }, { "epoch": 0.988740349128623, "grad_norm": 0.5865183472633362, "learning_rate": 6.243781402772709e-09, "loss": 0.2928, "step": 54760 }, { "epoch": 0.9888306285615236, "grad_norm": 0.414611279964447, "learning_rate": 6.143972586936109e-09, "loss": 0.2012, "step": 54765 }, { "epoch": 0.9889209079944243, "grad_norm": 0.49423834681510925, "learning_rate": 6.0449677041551914e-09, "loss": 0.2292, "step": 54770 }, { "epoch": 0.9890111874273251, "grad_norm": 0.4946470260620117, "learning_rate": 5.9467667623935854e-09, "loss": 0.2609, "step": 54775 }, { "epoch": 0.9891014668602258, "grad_norm": 0.4100695848464966, "learning_rate": 5.849369769550528e-09, "loss": 0.1228, "step": 54780 }, { "epoch": 0.9891917462931265, "grad_norm": 0.45060092210769653, "learning_rate": 5.752776733461973e-09, "loss": 0.2696, "step": 54785 }, { "epoch": 0.9892820257260272, "grad_norm": 0.4754217565059662, "learning_rate": 5.656987661896152e-09, "loss": 0.201, "step": 54790 }, { "epoch": 0.9893723051589279, "grad_norm": 0.3832783102989197, "learning_rate": 5.562002562560232e-09, "loss": 0.2685, "step": 54795 }, { "epoch": 0.9894625845918287, "grad_norm": 0.3240828216075897, "learning_rate": 5.467821443095877e-09, "loss": 0.2837, "step": 54800 }, { "epoch": 0.9895528640247293, "grad_norm": 0.5363389849662781, "learning_rate": 5.374444311075921e-09, "loss": 0.2026, "step": 54805 }, { "epoch": 0.98964314345763, "grad_norm": 0.5036905407905579, "learning_rate": 5.281871174015463e-09, "loss": 0.2955, "step": 54810 }, { "epoch": 0.9897334228905308, "grad_norm": 0.39396196603775024, "learning_rate": 5.190102039358546e-09, "loss": 0.2446, "step": 54815 }, { "epoch": 0.9898237023234315, "grad_norm": 0.5660658478736877, "learning_rate": 5.099136914490377e-09, "loss": 0.1853, "step": 54820 }, { "epoch": 0.9899139817563322, "grad_norm": 0.4224463105201721, "learning_rate": 5.0089758067251026e-09, "loss": 0.1955, "step": 54825 }, { "epoch": 0.9900042611892329, "grad_norm": 0.8481043577194214, "learning_rate": 4.919618723318032e-09, "loss": 0.1669, "step": 54830 }, { "epoch": 0.9900945406221336, "grad_norm": 0.3382450342178345, "learning_rate": 4.831065671455637e-09, "loss": 0.2177, "step": 54835 }, { "epoch": 0.9901848200550344, "grad_norm": 0.38149264454841614, "learning_rate": 4.743316658263331e-09, "loss": 0.24, "step": 54840 }, { "epoch": 0.990275099487935, "grad_norm": 0.36693939566612244, "learning_rate": 4.65637169079769e-09, "loss": 0.1988, "step": 54845 }, { "epoch": 0.9903653789208358, "grad_norm": 1.147173523902893, "learning_rate": 4.570230776053119e-09, "loss": 0.2925, "step": 54850 }, { "epoch": 0.9904556583537365, "grad_norm": 0.6585434079170227, "learning_rate": 4.484893920960742e-09, "loss": 0.219, "step": 54855 }, { "epoch": 0.9905459377866372, "grad_norm": 0.4392758905887604, "learning_rate": 4.400361132383957e-09, "loss": 0.2885, "step": 54860 }, { "epoch": 0.9906362172195379, "grad_norm": 0.37770336866378784, "learning_rate": 4.316632417122879e-09, "loss": 0.2936, "step": 54865 }, { "epoch": 0.9907264966524386, "grad_norm": 0.5151588916778564, "learning_rate": 4.233707781914342e-09, "loss": 0.1824, "step": 54870 }, { "epoch": 0.9908167760853394, "grad_norm": 0.45082736015319824, "learning_rate": 4.151587233426346e-09, "loss": 0.2648, "step": 54875 }, { "epoch": 0.9909070555182401, "grad_norm": 0.6206878423690796, "learning_rate": 4.070270778266938e-09, "loss": 0.2074, "step": 54880 }, { "epoch": 0.9909973349511407, "grad_norm": 0.7082839608192444, "learning_rate": 3.989758422976442e-09, "loss": 0.205, "step": 54885 }, { "epoch": 0.9910876143840415, "grad_norm": 0.5329179763793945, "learning_rate": 3.910050174031898e-09, "loss": 0.1578, "step": 54890 }, { "epoch": 0.9911778938169422, "grad_norm": 0.5654190182685852, "learning_rate": 3.8311460378459564e-09, "loss": 0.1589, "step": 54895 }, { "epoch": 0.991268173249843, "grad_norm": 0.5214343070983887, "learning_rate": 3.75304602076354e-09, "loss": 0.1954, "step": 54900 }, { "epoch": 0.9913584526827436, "grad_norm": 0.5556568503379822, "learning_rate": 3.675750129070732e-09, "loss": 0.2024, "step": 54905 }, { "epoch": 0.9914487321156443, "grad_norm": 0.4347390830516815, "learning_rate": 3.599258368982561e-09, "loss": 0.2587, "step": 54910 }, { "epoch": 0.9915390115485451, "grad_norm": 0.7272732853889465, "learning_rate": 3.5235707466529935e-09, "loss": 0.1688, "step": 54915 }, { "epoch": 0.9916292909814458, "grad_norm": 0.3668297231197357, "learning_rate": 3.448687268170492e-09, "loss": 0.2342, "step": 54920 }, { "epoch": 0.9917195704143464, "grad_norm": 0.26720502972602844, "learning_rate": 3.374607939560237e-09, "loss": 0.1538, "step": 54925 }, { "epoch": 0.9918098498472472, "grad_norm": 0.5334031581878662, "learning_rate": 3.3013327667796857e-09, "loss": 0.1935, "step": 54930 }, { "epoch": 0.9919001292801479, "grad_norm": 1.0811445713043213, "learning_rate": 3.2288617557241218e-09, "loss": 0.333, "step": 54935 }, { "epoch": 0.9919904087130487, "grad_norm": 0.4220045804977417, "learning_rate": 3.1571949122222167e-09, "loss": 0.1772, "step": 54940 }, { "epoch": 0.9920806881459493, "grad_norm": 0.41664108633995056, "learning_rate": 3.0863322420404685e-09, "loss": 0.2029, "step": 54945 }, { "epoch": 0.99217096757885, "grad_norm": 0.353105366230011, "learning_rate": 3.016273750878762e-09, "loss": 0.1825, "step": 54950 }, { "epoch": 0.9922612470117508, "grad_norm": 0.35080665349960327, "learning_rate": 2.9470194443736997e-09, "loss": 0.1995, "step": 54955 }, { "epoch": 0.9923515264446515, "grad_norm": 0.31684285402297974, "learning_rate": 2.8785693280941606e-09, "loss": 0.1536, "step": 54960 }, { "epoch": 0.9924418058775522, "grad_norm": 0.5460395216941833, "learning_rate": 2.810923407547961e-09, "loss": 0.2148, "step": 54965 }, { "epoch": 0.9925320853104529, "grad_norm": 0.5535667538642883, "learning_rate": 2.744081688176303e-09, "loss": 0.2236, "step": 54970 }, { "epoch": 0.9926223647433536, "grad_norm": 0.39629554748535156, "learning_rate": 2.6780441753559983e-09, "loss": 0.1486, "step": 54975 }, { "epoch": 0.9927126441762544, "grad_norm": 0.9714711308479309, "learning_rate": 2.612810874399463e-09, "loss": 0.2227, "step": 54980 }, { "epoch": 0.992802923609155, "grad_norm": 0.7687849402427673, "learning_rate": 2.5483817905547215e-09, "loss": 0.2099, "step": 54985 }, { "epoch": 0.9928932030420557, "grad_norm": 0.4090625047683716, "learning_rate": 2.4847569290031847e-09, "loss": 0.2424, "step": 54990 }, { "epoch": 0.9929834824749565, "grad_norm": 0.2441577911376953, "learning_rate": 2.4219362948652016e-09, "loss": 0.1599, "step": 54995 }, { "epoch": 0.9930737619078572, "grad_norm": 0.3369239866733551, "learning_rate": 2.359919893192286e-09, "loss": 0.1663, "step": 55000 }, { "epoch": 0.9931640413407579, "grad_norm": 0.5537863373756409, "learning_rate": 2.2987077289748914e-09, "loss": 0.1604, "step": 55005 }, { "epoch": 0.9932543207736586, "grad_norm": 0.4537431597709656, "learning_rate": 2.2382998071346362e-09, "loss": 0.1988, "step": 55010 }, { "epoch": 0.9933446002065593, "grad_norm": 0.3401337265968323, "learning_rate": 2.1786961325331867e-09, "loss": 0.14, "step": 55015 }, { "epoch": 0.9934348796394601, "grad_norm": 0.38408929109573364, "learning_rate": 2.1198967099644864e-09, "loss": 0.2203, "step": 55020 }, { "epoch": 0.9935251590723607, "grad_norm": 0.17645908892154694, "learning_rate": 2.061901544159195e-09, "loss": 0.2402, "step": 55025 }, { "epoch": 0.9936154385052615, "grad_norm": 0.8458005785942078, "learning_rate": 2.0047106397813598e-09, "loss": 0.2522, "step": 55030 }, { "epoch": 0.9937057179381622, "grad_norm": 0.4414557218551636, "learning_rate": 1.948324001431745e-09, "loss": 0.1801, "step": 55035 }, { "epoch": 0.993795997371063, "grad_norm": 0.29727786779403687, "learning_rate": 1.892741633646722e-09, "loss": 0.1798, "step": 55040 }, { "epoch": 0.9938862768039636, "grad_norm": 0.2330741137266159, "learning_rate": 1.8379635408971586e-09, "loss": 0.1889, "step": 55045 }, { "epoch": 0.9939765562368643, "grad_norm": 0.7773925065994263, "learning_rate": 1.7839897275895301e-09, "loss": 0.2301, "step": 55050 }, { "epoch": 0.9940668356697651, "grad_norm": 0.48192688822746277, "learning_rate": 1.7308201980670291e-09, "loss": 0.1987, "step": 55055 }, { "epoch": 0.9941571151026658, "grad_norm": 0.23969459533691406, "learning_rate": 1.6784549566051245e-09, "loss": 0.2059, "step": 55060 }, { "epoch": 0.9942473945355664, "grad_norm": 1.0368404388427734, "learning_rate": 1.6268940074160022e-09, "loss": 0.2315, "step": 55065 }, { "epoch": 0.9943376739684672, "grad_norm": 0.4173521101474762, "learning_rate": 1.576137354648566e-09, "loss": 0.2434, "step": 55070 }, { "epoch": 0.9944279534013679, "grad_norm": 0.5417553186416626, "learning_rate": 1.5261850023851055e-09, "loss": 0.1798, "step": 55075 }, { "epoch": 0.9945182328342687, "grad_norm": 0.7902389168739319, "learning_rate": 1.4770369546446284e-09, "loss": 0.2487, "step": 55080 }, { "epoch": 0.9946085122671694, "grad_norm": 0.6232701539993286, "learning_rate": 1.4286932153806388e-09, "loss": 0.2372, "step": 55085 }, { "epoch": 0.99469879170007, "grad_norm": 0.7035704851150513, "learning_rate": 1.3811537884800274e-09, "loss": 0.2957, "step": 55090 }, { "epoch": 0.9947890711329708, "grad_norm": 0.37746915221214294, "learning_rate": 1.3344186777697332e-09, "loss": 0.1739, "step": 55095 }, { "epoch": 0.9948793505658715, "grad_norm": 0.566070556640625, "learning_rate": 1.288487887007861e-09, "loss": 0.2797, "step": 55100 }, { "epoch": 0.9949696299987723, "grad_norm": 0.4011896252632141, "learning_rate": 1.2433614198903432e-09, "loss": 0.1882, "step": 55105 }, { "epoch": 0.9950599094316729, "grad_norm": 0.5778722763061523, "learning_rate": 1.1990392800453888e-09, "loss": 0.2647, "step": 55110 }, { "epoch": 0.9951501888645736, "grad_norm": 0.42765867710113525, "learning_rate": 1.1555214710390339e-09, "loss": 0.238, "step": 55115 }, { "epoch": 0.9952404682974744, "grad_norm": 0.4244074523448944, "learning_rate": 1.1128079963729222e-09, "loss": 0.0957, "step": 55120 }, { "epoch": 0.9953307477303751, "grad_norm": 0.36028406023979187, "learning_rate": 1.0708988594831937e-09, "loss": 0.2058, "step": 55125 }, { "epoch": 0.9954210271632757, "grad_norm": 0.30583226680755615, "learning_rate": 1.0297940637393755e-09, "loss": 0.2262, "step": 55130 }, { "epoch": 0.9955113065961765, "grad_norm": 0.6024458408355713, "learning_rate": 9.894936124499322e-10, "loss": 0.2973, "step": 55135 }, { "epoch": 0.9956015860290772, "grad_norm": 0.4497954845428467, "learning_rate": 9.499975088556046e-10, "loss": 0.2808, "step": 55140 }, { "epoch": 0.995691865461978, "grad_norm": 0.5909044146537781, "learning_rate": 9.11305756132741e-10, "loss": 0.2521, "step": 55145 }, { "epoch": 0.9957821448948786, "grad_norm": 0.40942302346229553, "learning_rate": 8.734183573966271e-10, "loss": 0.2495, "step": 55150 }, { "epoch": 0.9958724243277793, "grad_norm": 0.18043410778045654, "learning_rate": 8.363353156926046e-10, "loss": 0.2246, "step": 55155 }, { "epoch": 0.9959627037606801, "grad_norm": 0.6366596817970276, "learning_rate": 8.00056634003843e-10, "loss": 0.2331, "step": 55160 }, { "epoch": 0.9960529831935808, "grad_norm": 0.4459744095802307, "learning_rate": 7.645823152502285e-10, "loss": 0.2308, "step": 55165 }, { "epoch": 0.9961432626264815, "grad_norm": 0.64663165807724, "learning_rate": 7.299123622839243e-10, "loss": 0.2971, "step": 55170 }, { "epoch": 0.9962335420593822, "grad_norm": 0.4321961998939514, "learning_rate": 6.960467778949209e-10, "loss": 0.262, "step": 55175 }, { "epoch": 0.9963238214922829, "grad_norm": 0.5614007115364075, "learning_rate": 6.629855648065952e-10, "loss": 0.2206, "step": 55180 }, { "epoch": 0.9964141009251837, "grad_norm": 0.3029267489910126, "learning_rate": 6.307287256790417e-10, "loss": 0.3297, "step": 55185 }, { "epoch": 0.9965043803580843, "grad_norm": 0.42381399869918823, "learning_rate": 5.992762631068516e-10, "loss": 0.2677, "step": 55190 }, { "epoch": 0.996594659790985, "grad_norm": 0.864939272403717, "learning_rate": 5.686281796202231e-10, "loss": 0.225, "step": 55195 }, { "epoch": 0.9966849392238858, "grad_norm": 0.5262264013290405, "learning_rate": 5.387844776849615e-10, "loss": 0.3182, "step": 55200 }, { "epoch": 0.9967752186567865, "grad_norm": 0.1654919683933258, "learning_rate": 5.09745159700259e-10, "loss": 0.1415, "step": 55205 }, { "epoch": 0.9968654980896872, "grad_norm": 0.39461642503738403, "learning_rate": 4.815102280042449e-10, "loss": 0.116, "step": 55210 }, { "epoch": 0.9969557775225879, "grad_norm": 0.37544846534729004, "learning_rate": 4.540796848673257e-10, "loss": 0.1863, "step": 55215 }, { "epoch": 0.9970460569554886, "grad_norm": 0.46736714243888855, "learning_rate": 4.2745353249551423e-10, "loss": 0.2557, "step": 55220 }, { "epoch": 0.9971363363883894, "grad_norm": 0.49816399812698364, "learning_rate": 4.016317730315411e-10, "loss": 0.3284, "step": 55225 }, { "epoch": 0.99722661582129, "grad_norm": 0.658227264881134, "learning_rate": 3.766144085515233e-10, "loss": 0.2312, "step": 55230 }, { "epoch": 0.9973168952541908, "grad_norm": 0.62874436378479, "learning_rate": 3.5240144106940546e-10, "loss": 0.2392, "step": 55235 }, { "epoch": 0.9974071746870915, "grad_norm": 0.5400856137275696, "learning_rate": 3.2899287253140846e-10, "loss": 0.2038, "step": 55240 }, { "epoch": 0.9974974541199922, "grad_norm": 0.5790485739707947, "learning_rate": 3.063887048215808e-10, "loss": 0.2685, "step": 55245 }, { "epoch": 0.9975877335528929, "grad_norm": 0.4721094071865082, "learning_rate": 2.8458893975735756e-10, "loss": 0.1797, "step": 55250 }, { "epoch": 0.9976780129857936, "grad_norm": 0.47890564799308777, "learning_rate": 2.635935790940014e-10, "loss": 0.2093, "step": 55255 }, { "epoch": 0.9977682924186944, "grad_norm": 0.7733480334281921, "learning_rate": 2.4340262451905126e-10, "loss": 0.2791, "step": 55260 }, { "epoch": 0.9978585718515951, "grad_norm": 0.4151161015033722, "learning_rate": 2.2401607765676346e-10, "loss": 0.2732, "step": 55265 }, { "epoch": 0.9979488512844957, "grad_norm": 1.0067189931869507, "learning_rate": 2.0543394006811156e-10, "loss": 0.2369, "step": 55270 }, { "epoch": 0.9980391307173965, "grad_norm": 0.37664952874183655, "learning_rate": 1.876562132452353e-10, "loss": 0.2181, "step": 55275 }, { "epoch": 0.9981294101502972, "grad_norm": 0.2857028841972351, "learning_rate": 1.706828986203224e-10, "loss": 0.1915, "step": 55280 }, { "epoch": 0.998219689583198, "grad_norm": 0.4453427493572235, "learning_rate": 1.5451399755894713e-10, "loss": 0.2189, "step": 55285 }, { "epoch": 0.9983099690160986, "grad_norm": 0.3707076609134674, "learning_rate": 1.391495113600705e-10, "loss": 0.2552, "step": 55290 }, { "epoch": 0.9984002484489993, "grad_norm": 0.45158451795578003, "learning_rate": 1.2458944126159113e-10, "loss": 0.2085, "step": 55295 }, { "epoch": 0.9984905278819001, "grad_norm": 0.49200868606567383, "learning_rate": 1.1083378843257386e-10, "loss": 0.2165, "step": 55300 }, { "epoch": 0.9985808073148008, "grad_norm": 0.5954471230506897, "learning_rate": 9.788255398102131e-11, "loss": 0.2107, "step": 55305 }, { "epoch": 0.9986710867477014, "grad_norm": 0.6071249842643738, "learning_rate": 8.573573894943288e-11, "loss": 0.2585, "step": 55310 }, { "epoch": 0.9987613661806022, "grad_norm": 0.17548207938671112, "learning_rate": 7.43933443136946e-11, "loss": 0.1225, "step": 55315 }, { "epoch": 0.9988516456135029, "grad_norm": 0.5218843817710876, "learning_rate": 6.385537098529959e-11, "loss": 0.1846, "step": 55320 }, { "epoch": 0.9989419250464037, "grad_norm": 0.5301579236984253, "learning_rate": 5.4121819814678675e-11, "loss": 0.2038, "step": 55325 }, { "epoch": 0.9990322044793043, "grad_norm": 0.38880106806755066, "learning_rate": 4.519269158231865e-11, "loss": 0.3044, "step": 55330 }, { "epoch": 0.999122483912205, "grad_norm": 0.41869696974754333, "learning_rate": 3.7067987007644023e-11, "loss": 0.3242, "step": 55335 }, { "epoch": 0.9992127633451058, "grad_norm": 0.19082844257354736, "learning_rate": 2.974770674457617e-11, "loss": 0.1967, "step": 55340 }, { "epoch": 0.9993030427780065, "grad_norm": 0.6308541297912598, "learning_rate": 2.3231851381533276e-11, "loss": 0.2056, "step": 55345 }, { "epoch": 0.9993933222109072, "grad_norm": 0.7585052251815796, "learning_rate": 1.7520421442540625e-11, "loss": 0.2667, "step": 55350 }, { "epoch": 0.9994836016438079, "grad_norm": 0.32018110156059265, "learning_rate": 1.2613417387230542e-11, "loss": 0.1958, "step": 55355 }, { "epoch": 0.9995738810767086, "grad_norm": 0.4806329309940338, "learning_rate": 8.510839609732201e-12, "loss": 0.2796, "step": 55360 }, { "epoch": 0.9996641605096094, "grad_norm": 0.6360764503479004, "learning_rate": 5.212688440892066e-12, "loss": 0.2305, "step": 55365 }, { "epoch": 0.99975443994251, "grad_norm": 0.5003007054328918, "learning_rate": 2.7189641449432146e-12, "loss": 0.2016, "step": 55370 }, { "epoch": 0.9998447193754108, "grad_norm": 0.30162450671195984, "learning_rate": 1.0296669239462376e-12, "loss": 0.2986, "step": 55375 }, { "epoch": 0.9999349988083115, "grad_norm": 0.2668619751930237, "learning_rate": 1.44796913348344e-13, "loss": 0.2414, "step": 55380 }, { "epoch": 0.9999891664680519, "step": 55383, "total_flos": 6.312971135569623e+19, "train_loss": 0.2639624638028189, "train_runtime": 248662.3087, "train_samples_per_second": 4.455, "train_steps_per_second": 0.223 } ], "logging_steps": 5, "max_steps": 55383, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 15000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 6.312971135569623e+19, "train_batch_size": 1, "trial_name": null, "trial_params": null }