{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "eval_steps": 500, "global_step": 101745, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 2.948547840188707e-05, "grad_norm": 30.9423866904618, "learning_rate": 7.371007371007371e-10, "loss": 2.8702, "step": 1 }, { "epoch": 0.00014742739200943535, "grad_norm": 37.94247283776182, "learning_rate": 3.6855036855036855e-09, "loss": 3.2686, "step": 5 }, { "epoch": 0.0002948547840188707, "grad_norm": 38.655163544568026, "learning_rate": 7.371007371007371e-09, "loss": 3.448, "step": 10 }, { "epoch": 0.0004422821760283061, "grad_norm": 32.2150378367789, "learning_rate": 1.1056511056511056e-08, "loss": 3.1967, "step": 15 }, { "epoch": 0.0005897095680377414, "grad_norm": 57.50521475604607, "learning_rate": 1.4742014742014742e-08, "loss": 3.4316, "step": 20 }, { "epoch": 0.0007371369600471767, "grad_norm": 30.487307231454224, "learning_rate": 1.8427518427518426e-08, "loss": 3.3402, "step": 25 }, { "epoch": 0.0008845643520566122, "grad_norm": 26.391730461190253, "learning_rate": 2.2113022113022112e-08, "loss": 3.0672, "step": 30 }, { "epoch": 0.0010319917440660474, "grad_norm": 32.620102196364826, "learning_rate": 2.5798525798525798e-08, "loss": 3.2743, "step": 35 }, { "epoch": 0.0011794191360754828, "grad_norm": 47.37788114675061, "learning_rate": 2.9484029484029484e-08, "loss": 3.4071, "step": 40 }, { "epoch": 0.0013268465280849183, "grad_norm": 46.13686191420841, "learning_rate": 3.316953316953317e-08, "loss": 3.4588, "step": 45 }, { "epoch": 0.0014742739200943535, "grad_norm": 34.964634515693795, "learning_rate": 3.685503685503685e-08, "loss": 3.5287, "step": 50 }, { "epoch": 0.001621701312103789, "grad_norm": 30.490571306944762, "learning_rate": 4.0540540540540545e-08, "loss": 3.092, "step": 55 }, { "epoch": 0.0017691287041132243, "grad_norm": 30.033365911776297, "learning_rate": 4.4226044226044224e-08, "loss": 3.2747, "step": 60 }, { "epoch": 0.0019165560961226596, "grad_norm": 53.1302664875802, "learning_rate": 4.791154791154792e-08, "loss": 3.2486, "step": 65 }, { "epoch": 0.0020639834881320948, "grad_norm": 23.816793835683942, "learning_rate": 5.1597051597051596e-08, "loss": 3.3433, "step": 70 }, { "epoch": 0.0022114108801415304, "grad_norm": 56.99966916373158, "learning_rate": 5.528255528255529e-08, "loss": 3.1299, "step": 75 }, { "epoch": 0.0023588382721509656, "grad_norm": 19.757497439138493, "learning_rate": 5.896805896805897e-08, "loss": 3.1651, "step": 80 }, { "epoch": 0.002506265664160401, "grad_norm": 18.892171477849022, "learning_rate": 6.265356265356265e-08, "loss": 3.0892, "step": 85 }, { "epoch": 0.0026536930561698365, "grad_norm": 27.534821152187593, "learning_rate": 6.633906633906635e-08, "loss": 3.1851, "step": 90 }, { "epoch": 0.0028011204481792717, "grad_norm": 23.511604927556654, "learning_rate": 7.002457002457003e-08, "loss": 3.2401, "step": 95 }, { "epoch": 0.002948547840188707, "grad_norm": 28.076525419474038, "learning_rate": 7.37100737100737e-08, "loss": 3.0644, "step": 100 }, { "epoch": 0.0030959752321981426, "grad_norm": 15.252401433649776, "learning_rate": 7.73955773955774e-08, "loss": 3.0673, "step": 105 }, { "epoch": 0.003243402624207578, "grad_norm": 23.78306427255574, "learning_rate": 8.108108108108109e-08, "loss": 3.2094, "step": 110 }, { "epoch": 0.003390830016217013, "grad_norm": 19.042181321541598, "learning_rate": 8.476658476658477e-08, "loss": 3.2051, "step": 115 }, { "epoch": 0.0035382574082264487, "grad_norm": 18.813032361570052, "learning_rate": 8.845208845208845e-08, "loss": 2.9533, "step": 120 }, { "epoch": 0.003685684800235884, "grad_norm": 17.25343183484757, "learning_rate": 9.213759213759213e-08, "loss": 3.0111, "step": 125 }, { "epoch": 0.003833112192245319, "grad_norm": 21.330112960407455, "learning_rate": 9.582309582309583e-08, "loss": 2.9906, "step": 130 }, { "epoch": 0.003980539584254755, "grad_norm": 24.12673226413775, "learning_rate": 9.950859950859951e-08, "loss": 3.1183, "step": 135 }, { "epoch": 0.0041279669762641896, "grad_norm": 11.470982921737553, "learning_rate": 1.0319410319410319e-07, "loss": 2.8709, "step": 140 }, { "epoch": 0.004275394368273625, "grad_norm": 18.209558990875575, "learning_rate": 1.0687960687960687e-07, "loss": 2.8862, "step": 145 }, { "epoch": 0.004422821760283061, "grad_norm": 19.38394544137869, "learning_rate": 1.1056511056511058e-07, "loss": 2.7689, "step": 150 }, { "epoch": 0.004570249152292496, "grad_norm": 14.582834306239926, "learning_rate": 1.1425061425061426e-07, "loss": 2.8648, "step": 155 }, { "epoch": 0.004717676544301931, "grad_norm": 13.411353684277527, "learning_rate": 1.1793611793611794e-07, "loss": 2.8186, "step": 160 }, { "epoch": 0.004865103936311367, "grad_norm": 17.606193372440046, "learning_rate": 1.2162162162162163e-07, "loss": 2.8202, "step": 165 }, { "epoch": 0.005012531328320802, "grad_norm": 10.681759252853546, "learning_rate": 1.253071253071253e-07, "loss": 2.9367, "step": 170 }, { "epoch": 0.005159958720330237, "grad_norm": 23.428430377336817, "learning_rate": 1.28992628992629e-07, "loss": 2.9612, "step": 175 }, { "epoch": 0.005307386112339673, "grad_norm": 9.483139855394048, "learning_rate": 1.326781326781327e-07, "loss": 2.898, "step": 180 }, { "epoch": 0.005454813504349108, "grad_norm": 9.334385646754015, "learning_rate": 1.3636363636363637e-07, "loss": 2.8413, "step": 185 }, { "epoch": 0.0056022408963585435, "grad_norm": 9.869123008279015, "learning_rate": 1.4004914004914005e-07, "loss": 2.8748, "step": 190 }, { "epoch": 0.005749668288367979, "grad_norm": 9.66669341282971, "learning_rate": 1.4373464373464376e-07, "loss": 2.9272, "step": 195 }, { "epoch": 0.005897095680377414, "grad_norm": 7.198923904982146, "learning_rate": 1.474201474201474e-07, "loss": 2.6328, "step": 200 }, { "epoch": 0.0060445230723868495, "grad_norm": 8.115718200800014, "learning_rate": 1.5110565110565112e-07, "loss": 2.5704, "step": 205 }, { "epoch": 0.006191950464396285, "grad_norm": 8.01855134040426, "learning_rate": 1.547911547911548e-07, "loss": 2.8053, "step": 210 }, { "epoch": 0.00633937785640572, "grad_norm": 7.677375423456987, "learning_rate": 1.5847665847665847e-07, "loss": 2.661, "step": 215 }, { "epoch": 0.006486805248415156, "grad_norm": 9.981821687398693, "learning_rate": 1.6216216216216218e-07, "loss": 2.7104, "step": 220 }, { "epoch": 0.006634232640424591, "grad_norm": 8.476963502166862, "learning_rate": 1.6584766584766583e-07, "loss": 2.8028, "step": 225 }, { "epoch": 0.006781660032434026, "grad_norm": 7.846974072603778, "learning_rate": 1.6953316953316954e-07, "loss": 2.6724, "step": 230 }, { "epoch": 0.006929087424443462, "grad_norm": 8.688433839298972, "learning_rate": 1.7321867321867324e-07, "loss": 2.6323, "step": 235 }, { "epoch": 0.007076514816452897, "grad_norm": 7.752874794037548, "learning_rate": 1.769041769041769e-07, "loss": 2.6274, "step": 240 }, { "epoch": 0.007223942208462332, "grad_norm": 6.96765672435493, "learning_rate": 1.805896805896806e-07, "loss": 2.4706, "step": 245 }, { "epoch": 0.007371369600471768, "grad_norm": 7.690507060593333, "learning_rate": 1.8427518427518426e-07, "loss": 2.6568, "step": 250 }, { "epoch": 0.007518796992481203, "grad_norm": 8.667997684106794, "learning_rate": 1.8796068796068796e-07, "loss": 2.5343, "step": 255 }, { "epoch": 0.007666224384490638, "grad_norm": 7.406800498911982, "learning_rate": 1.9164619164619167e-07, "loss": 2.6612, "step": 260 }, { "epoch": 0.007813651776500074, "grad_norm": 8.00415183836285, "learning_rate": 1.9533169533169532e-07, "loss": 2.6175, "step": 265 }, { "epoch": 0.00796107916850951, "grad_norm": 8.180390454395255, "learning_rate": 1.9901719901719903e-07, "loss": 2.6868, "step": 270 }, { "epoch": 0.008108506560518945, "grad_norm": 8.18373435694988, "learning_rate": 2.0270270270270273e-07, "loss": 2.5649, "step": 275 }, { "epoch": 0.008255933952528379, "grad_norm": 7.811064615781247, "learning_rate": 2.0638820638820638e-07, "loss": 2.4854, "step": 280 }, { "epoch": 0.008403361344537815, "grad_norm": 6.290197750889664, "learning_rate": 2.100737100737101e-07, "loss": 2.4887, "step": 285 }, { "epoch": 0.00855078873654725, "grad_norm": 8.499701856149139, "learning_rate": 2.1375921375921374e-07, "loss": 2.6444, "step": 290 }, { "epoch": 0.008698216128556686, "grad_norm": 8.183452175405042, "learning_rate": 2.1744471744471745e-07, "loss": 2.5165, "step": 295 }, { "epoch": 0.008845643520566122, "grad_norm": 7.947862315552764, "learning_rate": 2.2113022113022115e-07, "loss": 2.5701, "step": 300 }, { "epoch": 0.008993070912575557, "grad_norm": 7.432790894665844, "learning_rate": 2.248157248157248e-07, "loss": 2.5363, "step": 305 }, { "epoch": 0.009140498304584991, "grad_norm": 7.258936816630882, "learning_rate": 2.285012285012285e-07, "loss": 2.4149, "step": 310 }, { "epoch": 0.009287925696594427, "grad_norm": 8.74558911918674, "learning_rate": 2.321867321867322e-07, "loss": 2.4467, "step": 315 }, { "epoch": 0.009435353088603863, "grad_norm": 9.450740164193094, "learning_rate": 2.3587223587223587e-07, "loss": 2.5334, "step": 320 }, { "epoch": 0.009582780480613298, "grad_norm": 7.4963603395695095, "learning_rate": 2.3955773955773955e-07, "loss": 2.4235, "step": 325 }, { "epoch": 0.009730207872622734, "grad_norm": 8.49457985769388, "learning_rate": 2.4324324324324326e-07, "loss": 2.5202, "step": 330 }, { "epoch": 0.009877635264632168, "grad_norm": 8.919108859728025, "learning_rate": 2.4692874692874696e-07, "loss": 2.4189, "step": 335 }, { "epoch": 0.010025062656641603, "grad_norm": 6.692091695362736, "learning_rate": 2.506142506142506e-07, "loss": 2.2887, "step": 340 }, { "epoch": 0.010172490048651039, "grad_norm": 8.552166641508457, "learning_rate": 2.542997542997543e-07, "loss": 2.5543, "step": 345 }, { "epoch": 0.010319917440660475, "grad_norm": 8.767602443231015, "learning_rate": 2.57985257985258e-07, "loss": 2.3898, "step": 350 }, { "epoch": 0.01046734483266991, "grad_norm": 8.066827512427823, "learning_rate": 2.616707616707617e-07, "loss": 2.3772, "step": 355 }, { "epoch": 0.010614772224679346, "grad_norm": 7.504763656550345, "learning_rate": 2.653562653562654e-07, "loss": 2.4167, "step": 360 }, { "epoch": 0.01076219961668878, "grad_norm": 7.662270519150018, "learning_rate": 2.6904176904176904e-07, "loss": 2.425, "step": 365 }, { "epoch": 0.010909627008698216, "grad_norm": 7.810510426076602, "learning_rate": 2.7272727272727274e-07, "loss": 2.3223, "step": 370 }, { "epoch": 0.011057054400707651, "grad_norm": 8.196326444626031, "learning_rate": 2.764127764127764e-07, "loss": 2.2426, "step": 375 }, { "epoch": 0.011204481792717087, "grad_norm": 7.919620348933291, "learning_rate": 2.800982800982801e-07, "loss": 2.2106, "step": 380 }, { "epoch": 0.011351909184726523, "grad_norm": 8.454184864162201, "learning_rate": 2.837837837837838e-07, "loss": 2.3529, "step": 385 }, { "epoch": 0.011499336576735958, "grad_norm": 7.543448099998184, "learning_rate": 2.874692874692875e-07, "loss": 2.3493, "step": 390 }, { "epoch": 0.011646763968745392, "grad_norm": 6.98076420273146, "learning_rate": 2.9115479115479117e-07, "loss": 2.2824, "step": 395 }, { "epoch": 0.011794191360754828, "grad_norm": 6.745591825013372, "learning_rate": 2.948402948402948e-07, "loss": 2.2382, "step": 400 }, { "epoch": 0.011941618752764263, "grad_norm": 7.375176860696024, "learning_rate": 2.985257985257985e-07, "loss": 2.2941, "step": 405 }, { "epoch": 0.012089046144773699, "grad_norm": 7.942995926499134, "learning_rate": 3.0221130221130223e-07, "loss": 2.2197, "step": 410 }, { "epoch": 0.012236473536783135, "grad_norm": 7.811424859126737, "learning_rate": 3.0589680589680594e-07, "loss": 2.2346, "step": 415 }, { "epoch": 0.01238390092879257, "grad_norm": 7.782140683177709, "learning_rate": 3.095823095823096e-07, "loss": 2.2287, "step": 420 }, { "epoch": 0.012531328320802004, "grad_norm": 7.77296877032919, "learning_rate": 3.1326781326781324e-07, "loss": 2.1994, "step": 425 }, { "epoch": 0.01267875571281144, "grad_norm": 9.319481845100938, "learning_rate": 3.1695331695331695e-07, "loss": 2.3411, "step": 430 }, { "epoch": 0.012826183104820876, "grad_norm": 8.256365971833413, "learning_rate": 3.2063882063882065e-07, "loss": 2.2705, "step": 435 }, { "epoch": 0.012973610496830311, "grad_norm": 9.636156006675648, "learning_rate": 3.2432432432432436e-07, "loss": 2.0821, "step": 440 }, { "epoch": 0.013121037888839747, "grad_norm": 7.983680494084509, "learning_rate": 3.28009828009828e-07, "loss": 2.2833, "step": 445 }, { "epoch": 0.013268465280849183, "grad_norm": 10.247751599134356, "learning_rate": 3.3169533169533167e-07, "loss": 2.2721, "step": 450 }, { "epoch": 0.013415892672858616, "grad_norm": 7.656683985296896, "learning_rate": 3.3538083538083537e-07, "loss": 2.1859, "step": 455 }, { "epoch": 0.013563320064868052, "grad_norm": 7.532705968407519, "learning_rate": 3.390663390663391e-07, "loss": 2.2112, "step": 460 }, { "epoch": 0.013710747456877488, "grad_norm": 7.902517996741907, "learning_rate": 3.427518427518428e-07, "loss": 2.1779, "step": 465 }, { "epoch": 0.013858174848886923, "grad_norm": 7.986884976223534, "learning_rate": 3.464373464373465e-07, "loss": 2.1862, "step": 470 }, { "epoch": 0.014005602240896359, "grad_norm": 8.969726748505904, "learning_rate": 3.501228501228501e-07, "loss": 2.1419, "step": 475 }, { "epoch": 0.014153029632905795, "grad_norm": 8.57488941756744, "learning_rate": 3.538083538083538e-07, "loss": 2.2078, "step": 480 }, { "epoch": 0.014300457024915229, "grad_norm": 8.073120619979687, "learning_rate": 3.574938574938575e-07, "loss": 2.1343, "step": 485 }, { "epoch": 0.014447884416924664, "grad_norm": 8.048246387328213, "learning_rate": 3.611793611793612e-07, "loss": 2.1276, "step": 490 }, { "epoch": 0.0145953118089341, "grad_norm": 9.700467498092559, "learning_rate": 3.648648648648649e-07, "loss": 2.2528, "step": 495 }, { "epoch": 0.014742739200943536, "grad_norm": 9.200829698172315, "learning_rate": 3.685503685503685e-07, "loss": 2.0964, "step": 500 }, { "epoch": 0.014742739200943536, "eval_loss": 2.0849533081054688, "eval_runtime": 4.2075, "eval_samples_per_second": 94.117, "eval_steps_per_second": 3.09, "step": 500 }, { "epoch": 0.014890166592952971, "grad_norm": 8.224035318002366, "learning_rate": 3.722358722358722e-07, "loss": 2.1641, "step": 505 }, { "epoch": 0.015037593984962405, "grad_norm": 8.628770191413626, "learning_rate": 3.759213759213759e-07, "loss": 2.1306, "step": 510 }, { "epoch": 0.01518502137697184, "grad_norm": 7.18743523577171, "learning_rate": 3.7960687960687963e-07, "loss": 2.1526, "step": 515 }, { "epoch": 0.015332448768981276, "grad_norm": 8.64066190091137, "learning_rate": 3.8329238329238333e-07, "loss": 2.1228, "step": 520 }, { "epoch": 0.015479876160990712, "grad_norm": 7.576278336772019, "learning_rate": 3.86977886977887e-07, "loss": 2.0544, "step": 525 }, { "epoch": 0.015627303553000148, "grad_norm": 8.119714361003552, "learning_rate": 3.9066339066339064e-07, "loss": 2.0757, "step": 530 }, { "epoch": 0.01577473094500958, "grad_norm": 7.893110963915209, "learning_rate": 3.9434889434889435e-07, "loss": 2.0915, "step": 535 }, { "epoch": 0.01592215833701902, "grad_norm": 7.589584255687732, "learning_rate": 3.9803439803439805e-07, "loss": 2.0578, "step": 540 }, { "epoch": 0.016069585729028453, "grad_norm": 7.844041328401866, "learning_rate": 4.0171990171990176e-07, "loss": 2.0601, "step": 545 }, { "epoch": 0.01621701312103789, "grad_norm": 8.4287304332959, "learning_rate": 4.0540540540540546e-07, "loss": 2.0964, "step": 550 }, { "epoch": 0.016364440513047324, "grad_norm": 8.467664612744807, "learning_rate": 4.0909090909090906e-07, "loss": 2.1195, "step": 555 }, { "epoch": 0.016511867905056758, "grad_norm": 8.712396022458508, "learning_rate": 4.1277641277641277e-07, "loss": 2.0425, "step": 560 }, { "epoch": 0.016659295297066196, "grad_norm": 8.438363157694067, "learning_rate": 4.164619164619165e-07, "loss": 2.0386, "step": 565 }, { "epoch": 0.01680672268907563, "grad_norm": 8.406151470831311, "learning_rate": 4.201474201474202e-07, "loss": 2.1302, "step": 570 }, { "epoch": 0.016954150081085067, "grad_norm": 7.317705319994849, "learning_rate": 4.238329238329239e-07, "loss": 2.0128, "step": 575 }, { "epoch": 0.0171015774730945, "grad_norm": 8.168942631309628, "learning_rate": 4.275184275184275e-07, "loss": 2.0046, "step": 580 }, { "epoch": 0.017249004865103935, "grad_norm": 9.878324062758924, "learning_rate": 4.312039312039312e-07, "loss": 2.0647, "step": 585 }, { "epoch": 0.017396432257113372, "grad_norm": 7.554957379960365, "learning_rate": 4.348894348894349e-07, "loss": 1.9964, "step": 590 }, { "epoch": 0.017543859649122806, "grad_norm": 7.194014350926216, "learning_rate": 4.385749385749386e-07, "loss": 2.0818, "step": 595 }, { "epoch": 0.017691287041132243, "grad_norm": 9.145919838331386, "learning_rate": 4.422604422604423e-07, "loss": 2.0723, "step": 600 }, { "epoch": 0.017838714433141677, "grad_norm": 8.56722482835929, "learning_rate": 4.4594594594594596e-07, "loss": 2.0511, "step": 605 }, { "epoch": 0.017986141825151115, "grad_norm": 7.374550662930165, "learning_rate": 4.496314496314496e-07, "loss": 1.9875, "step": 610 }, { "epoch": 0.01813356921716055, "grad_norm": 8.288585894620851, "learning_rate": 4.533169533169533e-07, "loss": 2.0913, "step": 615 }, { "epoch": 0.018280996609169983, "grad_norm": 8.256852276878954, "learning_rate": 4.57002457002457e-07, "loss": 2.0076, "step": 620 }, { "epoch": 0.01842842400117942, "grad_norm": 8.106659785387263, "learning_rate": 4.6068796068796073e-07, "loss": 2.0183, "step": 625 }, { "epoch": 0.018575851393188854, "grad_norm": 8.397321876841133, "learning_rate": 4.643734643734644e-07, "loss": 2.0334, "step": 630 }, { "epoch": 0.01872327878519829, "grad_norm": 7.971633634594784, "learning_rate": 4.6805896805896804e-07, "loss": 2.028, "step": 635 }, { "epoch": 0.018870706177207725, "grad_norm": 7.84466382803252, "learning_rate": 4.7174447174447174e-07, "loss": 1.975, "step": 640 }, { "epoch": 0.01901813356921716, "grad_norm": 7.964683490226853, "learning_rate": 4.7542997542997545e-07, "loss": 1.9245, "step": 645 }, { "epoch": 0.019165560961226596, "grad_norm": 8.23702669712302, "learning_rate": 4.791154791154791e-07, "loss": 1.9275, "step": 650 }, { "epoch": 0.01931298835323603, "grad_norm": 7.590722795352279, "learning_rate": 4.828009828009829e-07, "loss": 1.9209, "step": 655 }, { "epoch": 0.019460415745245468, "grad_norm": 7.4598243148616366, "learning_rate": 4.864864864864865e-07, "loss": 1.9861, "step": 660 }, { "epoch": 0.0196078431372549, "grad_norm": 8.340109466195159, "learning_rate": 4.901719901719903e-07, "loss": 1.9849, "step": 665 }, { "epoch": 0.019755270529264336, "grad_norm": 8.184611683033337, "learning_rate": 4.938574938574939e-07, "loss": 1.9661, "step": 670 }, { "epoch": 0.019902697921273773, "grad_norm": 8.460974971942406, "learning_rate": 4.975429975429975e-07, "loss": 1.9855, "step": 675 }, { "epoch": 0.020050125313283207, "grad_norm": 8.277840122808943, "learning_rate": 5.012285012285012e-07, "loss": 1.9472, "step": 680 }, { "epoch": 0.020197552705292644, "grad_norm": 8.240125283367751, "learning_rate": 5.049140049140049e-07, "loss": 1.9284, "step": 685 }, { "epoch": 0.020344980097302078, "grad_norm": 8.385979220532558, "learning_rate": 5.085995085995086e-07, "loss": 1.9557, "step": 690 }, { "epoch": 0.020492407489311516, "grad_norm": 10.962955841674026, "learning_rate": 5.122850122850123e-07, "loss": 1.9756, "step": 695 }, { "epoch": 0.02063983488132095, "grad_norm": 7.2691204208510936, "learning_rate": 5.15970515970516e-07, "loss": 1.9446, "step": 700 }, { "epoch": 0.020787262273330383, "grad_norm": 8.183946892858604, "learning_rate": 5.196560196560197e-07, "loss": 1.9962, "step": 705 }, { "epoch": 0.02093468966533982, "grad_norm": 8.225950087438225, "learning_rate": 5.233415233415234e-07, "loss": 1.9847, "step": 710 }, { "epoch": 0.021082117057349255, "grad_norm": 8.815060063336569, "learning_rate": 5.270270270270271e-07, "loss": 1.9319, "step": 715 }, { "epoch": 0.021229544449358692, "grad_norm": 8.602166078480584, "learning_rate": 5.307125307125308e-07, "loss": 1.9853, "step": 720 }, { "epoch": 0.021376971841368126, "grad_norm": 7.88740382142766, "learning_rate": 5.343980343980344e-07, "loss": 1.9472, "step": 725 }, { "epoch": 0.02152439923337756, "grad_norm": 8.295602990689945, "learning_rate": 5.380835380835381e-07, "loss": 1.918, "step": 730 }, { "epoch": 0.021671826625386997, "grad_norm": 7.727713537555281, "learning_rate": 5.417690417690417e-07, "loss": 1.9149, "step": 735 }, { "epoch": 0.02181925401739643, "grad_norm": 7.560516628281533, "learning_rate": 5.454545454545455e-07, "loss": 1.9143, "step": 740 }, { "epoch": 0.02196668140940587, "grad_norm": 6.095977896929418, "learning_rate": 5.491400491400491e-07, "loss": 1.8497, "step": 745 }, { "epoch": 0.022114108801415303, "grad_norm": 9.604098719771008, "learning_rate": 5.528255528255528e-07, "loss": 1.8979, "step": 750 }, { "epoch": 0.02226153619342474, "grad_norm": 8.191960973934634, "learning_rate": 5.565110565110566e-07, "loss": 1.8915, "step": 755 }, { "epoch": 0.022408963585434174, "grad_norm": 7.793051678519245, "learning_rate": 5.601965601965602e-07, "loss": 1.89, "step": 760 }, { "epoch": 0.022556390977443608, "grad_norm": 7.445001983996761, "learning_rate": 5.63882063882064e-07, "loss": 1.9454, "step": 765 }, { "epoch": 0.022703818369453045, "grad_norm": 9.065093305889208, "learning_rate": 5.675675675675676e-07, "loss": 1.8869, "step": 770 }, { "epoch": 0.02285124576146248, "grad_norm": 8.529734546497714, "learning_rate": 5.712530712530713e-07, "loss": 1.9438, "step": 775 }, { "epoch": 0.022998673153471916, "grad_norm": 7.447300336977734, "learning_rate": 5.74938574938575e-07, "loss": 1.8353, "step": 780 }, { "epoch": 0.02314610054548135, "grad_norm": 9.29144418572723, "learning_rate": 5.786240786240786e-07, "loss": 1.8275, "step": 785 }, { "epoch": 0.023293527937490784, "grad_norm": 9.39866463178264, "learning_rate": 5.823095823095823e-07, "loss": 1.8416, "step": 790 }, { "epoch": 0.02344095532950022, "grad_norm": 8.924639993643297, "learning_rate": 5.85995085995086e-07, "loss": 1.9419, "step": 795 }, { "epoch": 0.023588382721509656, "grad_norm": 7.325194350864726, "learning_rate": 5.896805896805896e-07, "loss": 1.8519, "step": 800 }, { "epoch": 0.023735810113519093, "grad_norm": 9.880646877460231, "learning_rate": 5.933660933660934e-07, "loss": 1.9493, "step": 805 }, { "epoch": 0.023883237505528527, "grad_norm": 7.087448274281415, "learning_rate": 5.97051597051597e-07, "loss": 1.9422, "step": 810 }, { "epoch": 0.024030664897537964, "grad_norm": 8.162828187753863, "learning_rate": 6.007371007371008e-07, "loss": 1.857, "step": 815 }, { "epoch": 0.024178092289547398, "grad_norm": 8.806278294762134, "learning_rate": 6.044226044226045e-07, "loss": 1.8846, "step": 820 }, { "epoch": 0.024325519681556832, "grad_norm": 8.818979509747843, "learning_rate": 6.081081081081081e-07, "loss": 1.9086, "step": 825 }, { "epoch": 0.02447294707356627, "grad_norm": 8.40551760145513, "learning_rate": 6.117936117936119e-07, "loss": 1.8488, "step": 830 }, { "epoch": 0.024620374465575703, "grad_norm": 8.674573942826687, "learning_rate": 6.154791154791154e-07, "loss": 1.8549, "step": 835 }, { "epoch": 0.02476780185758514, "grad_norm": 9.324729645527269, "learning_rate": 6.191646191646192e-07, "loss": 1.845, "step": 840 }, { "epoch": 0.024915229249594575, "grad_norm": 9.204304864270657, "learning_rate": 6.228501228501228e-07, "loss": 1.9071, "step": 845 }, { "epoch": 0.02506265664160401, "grad_norm": 7.822760355568522, "learning_rate": 6.265356265356265e-07, "loss": 1.8891, "step": 850 }, { "epoch": 0.025210084033613446, "grad_norm": 9.978880757067229, "learning_rate": 6.302211302211302e-07, "loss": 1.8245, "step": 855 }, { "epoch": 0.02535751142562288, "grad_norm": 7.680107757359628, "learning_rate": 6.339066339066339e-07, "loss": 1.8131, "step": 860 }, { "epoch": 0.025504938817632317, "grad_norm": 8.457542739407163, "learning_rate": 6.375921375921377e-07, "loss": 1.9502, "step": 865 }, { "epoch": 0.02565236620964175, "grad_norm": 7.986661977643492, "learning_rate": 6.412776412776413e-07, "loss": 1.8288, "step": 870 }, { "epoch": 0.025799793601651185, "grad_norm": 9.189883661393205, "learning_rate": 6.44963144963145e-07, "loss": 1.8807, "step": 875 }, { "epoch": 0.025947220993660623, "grad_norm": 7.765272954011412, "learning_rate": 6.486486486486487e-07, "loss": 1.8668, "step": 880 }, { "epoch": 0.026094648385670056, "grad_norm": 8.093102140192281, "learning_rate": 6.523341523341524e-07, "loss": 1.9457, "step": 885 }, { "epoch": 0.026242075777679494, "grad_norm": 7.844647644931612, "learning_rate": 6.56019656019656e-07, "loss": 1.8904, "step": 890 }, { "epoch": 0.026389503169688928, "grad_norm": 8.155885937572936, "learning_rate": 6.597051597051597e-07, "loss": 1.8479, "step": 895 }, { "epoch": 0.026536930561698365, "grad_norm": 7.146697174714824, "learning_rate": 6.633906633906633e-07, "loss": 1.8817, "step": 900 }, { "epoch": 0.0266843579537078, "grad_norm": 8.191088776199848, "learning_rate": 6.670761670761671e-07, "loss": 1.8794, "step": 905 }, { "epoch": 0.026831785345717233, "grad_norm": 7.524852055624468, "learning_rate": 6.707616707616707e-07, "loss": 1.8666, "step": 910 }, { "epoch": 0.02697921273772667, "grad_norm": 7.7006243394346985, "learning_rate": 6.744471744471745e-07, "loss": 1.805, "step": 915 }, { "epoch": 0.027126640129736104, "grad_norm": 7.736182733496133, "learning_rate": 6.781326781326782e-07, "loss": 1.8399, "step": 920 }, { "epoch": 0.02727406752174554, "grad_norm": 18.47628582325446, "learning_rate": 6.818181818181818e-07, "loss": 1.8098, "step": 925 }, { "epoch": 0.027421494913754976, "grad_norm": 8.478924317129973, "learning_rate": 6.855036855036856e-07, "loss": 1.9288, "step": 930 }, { "epoch": 0.02756892230576441, "grad_norm": 7.1790573569093, "learning_rate": 6.891891891891892e-07, "loss": 1.8131, "step": 935 }, { "epoch": 0.027716349697773847, "grad_norm": 6.617505722154651, "learning_rate": 6.92874692874693e-07, "loss": 1.7993, "step": 940 }, { "epoch": 0.02786377708978328, "grad_norm": 6.9349119963919925, "learning_rate": 6.965601965601965e-07, "loss": 1.7778, "step": 945 }, { "epoch": 0.028011204481792718, "grad_norm": 7.599613287047876, "learning_rate": 7.002457002457002e-07, "loss": 1.8176, "step": 950 }, { "epoch": 0.028158631873802152, "grad_norm": 8.313132248896938, "learning_rate": 7.039312039312039e-07, "loss": 1.844, "step": 955 }, { "epoch": 0.02830605926581159, "grad_norm": 8.672076138140154, "learning_rate": 7.076167076167076e-07, "loss": 1.7972, "step": 960 }, { "epoch": 0.028453486657821023, "grad_norm": 8.751455123880493, "learning_rate": 7.113022113022113e-07, "loss": 1.7573, "step": 965 }, { "epoch": 0.028600914049830457, "grad_norm": 7.6278763989673, "learning_rate": 7.14987714987715e-07, "loss": 1.7472, "step": 970 }, { "epoch": 0.028748341441839895, "grad_norm": 6.917437467377782, "learning_rate": 7.186732186732187e-07, "loss": 1.756, "step": 975 }, { "epoch": 0.02889576883384933, "grad_norm": 8.808274298351789, "learning_rate": 7.223587223587224e-07, "loss": 1.7983, "step": 980 }, { "epoch": 0.029043196225858766, "grad_norm": 8.384543039928145, "learning_rate": 7.260442260442261e-07, "loss": 1.8312, "step": 985 }, { "epoch": 0.0291906236178682, "grad_norm": 8.079393892228287, "learning_rate": 7.297297297297298e-07, "loss": 1.8575, "step": 990 }, { "epoch": 0.029338051009877634, "grad_norm": 7.78031210843989, "learning_rate": 7.334152334152335e-07, "loss": 1.7676, "step": 995 }, { "epoch": 0.02948547840188707, "grad_norm": 7.2105182077245935, "learning_rate": 7.37100737100737e-07, "loss": 1.8225, "step": 1000 }, { "epoch": 0.02948547840188707, "eval_loss": 1.620565414428711, "eval_runtime": 4.207, "eval_samples_per_second": 94.13, "eval_steps_per_second": 3.09, "step": 1000 }, { "epoch": 0.029632905793896505, "grad_norm": 7.094139398578996, "learning_rate": 7.407862407862408e-07, "loss": 1.7495, "step": 1005 }, { "epoch": 0.029780333185905943, "grad_norm": 7.24262710153455, "learning_rate": 7.444717444717444e-07, "loss": 1.7896, "step": 1010 }, { "epoch": 0.029927760577915376, "grad_norm": 8.310397334205971, "learning_rate": 7.481572481572482e-07, "loss": 1.779, "step": 1015 }, { "epoch": 0.03007518796992481, "grad_norm": 7.365558968687287, "learning_rate": 7.518427518427518e-07, "loss": 1.7919, "step": 1020 }, { "epoch": 0.030222615361934248, "grad_norm": 9.41815875740604, "learning_rate": 7.555282555282556e-07, "loss": 1.7655, "step": 1025 }, { "epoch": 0.03037004275394368, "grad_norm": 8.252462060370522, "learning_rate": 7.592137592137593e-07, "loss": 1.732, "step": 1030 }, { "epoch": 0.03051747014595312, "grad_norm": 7.674211433755938, "learning_rate": 7.628992628992629e-07, "loss": 1.7759, "step": 1035 }, { "epoch": 0.030664897537962553, "grad_norm": 7.408801512525642, "learning_rate": 7.665847665847667e-07, "loss": 1.7738, "step": 1040 }, { "epoch": 0.03081232492997199, "grad_norm": 6.609349392213814, "learning_rate": 7.702702702702703e-07, "loss": 1.7496, "step": 1045 }, { "epoch": 0.030959752321981424, "grad_norm": 9.062171247758307, "learning_rate": 7.73955773955774e-07, "loss": 1.8244, "step": 1050 }, { "epoch": 0.031107179713990858, "grad_norm": 7.935532682543053, "learning_rate": 7.776412776412776e-07, "loss": 1.7652, "step": 1055 }, { "epoch": 0.031254607106000296, "grad_norm": 7.372949961341676, "learning_rate": 7.813267813267813e-07, "loss": 1.7454, "step": 1060 }, { "epoch": 0.03140203449800973, "grad_norm": 7.497961304661548, "learning_rate": 7.85012285012285e-07, "loss": 1.7812, "step": 1065 }, { "epoch": 0.03154946189001916, "grad_norm": 8.68529054282689, "learning_rate": 7.886977886977887e-07, "loss": 1.756, "step": 1070 }, { "epoch": 0.0316968892820286, "grad_norm": 8.70547547962537, "learning_rate": 7.923832923832924e-07, "loss": 1.7741, "step": 1075 }, { "epoch": 0.03184431667403804, "grad_norm": 7.384566967659614, "learning_rate": 7.960687960687961e-07, "loss": 1.7182, "step": 1080 }, { "epoch": 0.03199174406604747, "grad_norm": 7.4795643785276695, "learning_rate": 7.997542997542998e-07, "loss": 1.7136, "step": 1085 }, { "epoch": 0.032139171458056906, "grad_norm": 7.657183749174026, "learning_rate": 8.034398034398035e-07, "loss": 1.7888, "step": 1090 }, { "epoch": 0.03228659885006634, "grad_norm": 7.530536701445304, "learning_rate": 8.071253071253072e-07, "loss": 1.7745, "step": 1095 }, { "epoch": 0.03243402624207578, "grad_norm": 7.871968985725707, "learning_rate": 8.108108108108109e-07, "loss": 1.7465, "step": 1100 }, { "epoch": 0.03258145363408521, "grad_norm": 7.729328376606689, "learning_rate": 8.144963144963145e-07, "loss": 1.8073, "step": 1105 }, { "epoch": 0.03272888102609465, "grad_norm": 8.16229918141774, "learning_rate": 8.181818181818181e-07, "loss": 1.7644, "step": 1110 }, { "epoch": 0.032876308418104086, "grad_norm": 6.482326033668003, "learning_rate": 8.218673218673219e-07, "loss": 1.8361, "step": 1115 }, { "epoch": 0.033023735810113516, "grad_norm": 7.00005675356954, "learning_rate": 8.255528255528255e-07, "loss": 1.7529, "step": 1120 }, { "epoch": 0.033171163202122954, "grad_norm": 8.521725042852445, "learning_rate": 8.292383292383293e-07, "loss": 1.8068, "step": 1125 }, { "epoch": 0.03331859059413239, "grad_norm": 7.661438629997032, "learning_rate": 8.32923832923833e-07, "loss": 1.7599, "step": 1130 }, { "epoch": 0.03346601798614183, "grad_norm": 8.478788908128204, "learning_rate": 8.366093366093366e-07, "loss": 1.8101, "step": 1135 }, { "epoch": 0.03361344537815126, "grad_norm": 7.5660789319100346, "learning_rate": 8.402948402948404e-07, "loss": 1.7847, "step": 1140 }, { "epoch": 0.033760872770160696, "grad_norm": 6.769891642912962, "learning_rate": 8.43980343980344e-07, "loss": 1.744, "step": 1145 }, { "epoch": 0.033908300162170134, "grad_norm": 7.801072016590257, "learning_rate": 8.476658476658478e-07, "loss": 1.6801, "step": 1150 }, { "epoch": 0.034055727554179564, "grad_norm": 6.971457296671511, "learning_rate": 8.513513513513514e-07, "loss": 1.758, "step": 1155 }, { "epoch": 0.034203154946189, "grad_norm": 7.468920797520639, "learning_rate": 8.55036855036855e-07, "loss": 1.7687, "step": 1160 }, { "epoch": 0.03435058233819844, "grad_norm": 7.877697079942964, "learning_rate": 8.587223587223587e-07, "loss": 1.7831, "step": 1165 }, { "epoch": 0.03449800973020787, "grad_norm": 7.616502928373716, "learning_rate": 8.624078624078624e-07, "loss": 1.7931, "step": 1170 }, { "epoch": 0.03464543712221731, "grad_norm": 8.233992252385944, "learning_rate": 8.660933660933661e-07, "loss": 1.7503, "step": 1175 }, { "epoch": 0.034792864514226744, "grad_norm": 7.069620636011881, "learning_rate": 8.697788697788698e-07, "loss": 1.7546, "step": 1180 }, { "epoch": 0.03494029190623618, "grad_norm": 7.197216343933346, "learning_rate": 8.734643734643734e-07, "loss": 1.7701, "step": 1185 }, { "epoch": 0.03508771929824561, "grad_norm": 7.934403805069261, "learning_rate": 8.771498771498772e-07, "loss": 1.7785, "step": 1190 }, { "epoch": 0.03523514669025505, "grad_norm": 7.556500895674563, "learning_rate": 8.808353808353809e-07, "loss": 1.7489, "step": 1195 }, { "epoch": 0.03538257408226449, "grad_norm": 8.798600420536946, "learning_rate": 8.845208845208846e-07, "loss": 1.7875, "step": 1200 }, { "epoch": 0.03553000147427392, "grad_norm": 6.537801794239179, "learning_rate": 8.882063882063883e-07, "loss": 1.778, "step": 1205 }, { "epoch": 0.035677428866283355, "grad_norm": 6.534844154281495, "learning_rate": 8.918918918918919e-07, "loss": 1.7036, "step": 1210 }, { "epoch": 0.03582485625829279, "grad_norm": 8.239670750637549, "learning_rate": 8.955773955773956e-07, "loss": 1.7016, "step": 1215 }, { "epoch": 0.03597228365030223, "grad_norm": 7.318286338853891, "learning_rate": 8.992628992628992e-07, "loss": 1.7379, "step": 1220 }, { "epoch": 0.03611971104231166, "grad_norm": 6.644276293309055, "learning_rate": 9.02948402948403e-07, "loss": 1.6919, "step": 1225 }, { "epoch": 0.0362671384343211, "grad_norm": 8.054061770430122, "learning_rate": 9.066339066339066e-07, "loss": 1.7059, "step": 1230 }, { "epoch": 0.036414565826330535, "grad_norm": 8.273164356256782, "learning_rate": 9.103194103194103e-07, "loss": 1.737, "step": 1235 }, { "epoch": 0.036561993218339965, "grad_norm": 7.013432988159995, "learning_rate": 9.14004914004914e-07, "loss": 1.7285, "step": 1240 }, { "epoch": 0.0367094206103494, "grad_norm": 7.823658281853274, "learning_rate": 9.176904176904177e-07, "loss": 1.7601, "step": 1245 }, { "epoch": 0.03685684800235884, "grad_norm": 8.398152474530105, "learning_rate": 9.213759213759215e-07, "loss": 1.7483, "step": 1250 }, { "epoch": 0.03700427539436827, "grad_norm": 8.196802924493145, "learning_rate": 9.250614250614251e-07, "loss": 1.6862, "step": 1255 }, { "epoch": 0.03715170278637771, "grad_norm": 7.826894754780838, "learning_rate": 9.287469287469288e-07, "loss": 1.6974, "step": 1260 }, { "epoch": 0.037299130178387145, "grad_norm": 7.3985401936417166, "learning_rate": 9.324324324324325e-07, "loss": 1.6734, "step": 1265 }, { "epoch": 0.03744655757039658, "grad_norm": 7.576042896556688, "learning_rate": 9.361179361179361e-07, "loss": 1.7495, "step": 1270 }, { "epoch": 0.03759398496240601, "grad_norm": 7.443697204733762, "learning_rate": 9.398034398034399e-07, "loss": 1.7311, "step": 1275 }, { "epoch": 0.03774141235441545, "grad_norm": 8.139548167988407, "learning_rate": 9.434889434889435e-07, "loss": 1.7321, "step": 1280 }, { "epoch": 0.03788883974642489, "grad_norm": 6.51623956642375, "learning_rate": 9.471744471744472e-07, "loss": 1.6862, "step": 1285 }, { "epoch": 0.03803626713843432, "grad_norm": 7.464406677843607, "learning_rate": 9.508599508599509e-07, "loss": 1.7761, "step": 1290 }, { "epoch": 0.038183694530443756, "grad_norm": 7.612410616783569, "learning_rate": 9.545454545454546e-07, "loss": 1.7338, "step": 1295 }, { "epoch": 0.03833112192245319, "grad_norm": 6.067765043058683, "learning_rate": 9.582309582309582e-07, "loss": 1.7765, "step": 1300 }, { "epoch": 0.03847854931446263, "grad_norm": 7.00478341977447, "learning_rate": 9.619164619164619e-07, "loss": 1.7172, "step": 1305 }, { "epoch": 0.03862597670647206, "grad_norm": 8.420587433344945, "learning_rate": 9.656019656019657e-07, "loss": 1.685, "step": 1310 }, { "epoch": 0.0387734040984815, "grad_norm": 6.439362361022942, "learning_rate": 9.692874692874692e-07, "loss": 1.6571, "step": 1315 }, { "epoch": 0.038920831490490936, "grad_norm": 7.198826264216976, "learning_rate": 9.72972972972973e-07, "loss": 1.7282, "step": 1320 }, { "epoch": 0.039068258882500366, "grad_norm": 7.682266240719534, "learning_rate": 9.766584766584767e-07, "loss": 1.7254, "step": 1325 }, { "epoch": 0.0392156862745098, "grad_norm": 8.6188451464422, "learning_rate": 9.803439803439805e-07, "loss": 1.7196, "step": 1330 }, { "epoch": 0.03936311366651924, "grad_norm": 7.607667506221209, "learning_rate": 9.84029484029484e-07, "loss": 1.6936, "step": 1335 }, { "epoch": 0.03951054105852867, "grad_norm": 7.721691557278778, "learning_rate": 9.877149877149878e-07, "loss": 1.7608, "step": 1340 }, { "epoch": 0.03965796845053811, "grad_norm": 7.378602800936833, "learning_rate": 9.914004914004915e-07, "loss": 1.6681, "step": 1345 }, { "epoch": 0.039805395842547546, "grad_norm": 7.461163938537156, "learning_rate": 9.95085995085995e-07, "loss": 1.711, "step": 1350 }, { "epoch": 0.03995282323455698, "grad_norm": 7.622471571994985, "learning_rate": 9.987714987714988e-07, "loss": 1.6928, "step": 1355 }, { "epoch": 0.040100250626566414, "grad_norm": 7.435173670774585, "learning_rate": 1.0024570024570025e-06, "loss": 1.6652, "step": 1360 }, { "epoch": 0.04024767801857585, "grad_norm": 7.045257539100649, "learning_rate": 1.0061425061425061e-06, "loss": 1.708, "step": 1365 }, { "epoch": 0.04039510541058529, "grad_norm": 7.481815468081788, "learning_rate": 1.0098280098280098e-06, "loss": 1.6844, "step": 1370 }, { "epoch": 0.04054253280259472, "grad_norm": 8.037847348800536, "learning_rate": 1.0135135135135136e-06, "loss": 1.7313, "step": 1375 }, { "epoch": 0.040689960194604156, "grad_norm": 6.5883764992689615, "learning_rate": 1.0171990171990173e-06, "loss": 1.6642, "step": 1380 }, { "epoch": 0.040837387586613594, "grad_norm": 7.473463150122506, "learning_rate": 1.020884520884521e-06, "loss": 1.7026, "step": 1385 }, { "epoch": 0.04098481497862303, "grad_norm": 7.381376161249062, "learning_rate": 1.0245700245700246e-06, "loss": 1.6848, "step": 1390 }, { "epoch": 0.04113224237063246, "grad_norm": 7.4001277021010265, "learning_rate": 1.0282555282555285e-06, "loss": 1.6633, "step": 1395 }, { "epoch": 0.0412796697626419, "grad_norm": 9.854140502535385, "learning_rate": 1.031941031941032e-06, "loss": 1.6686, "step": 1400 }, { "epoch": 0.041427097154651336, "grad_norm": 7.617313216946999, "learning_rate": 1.0356265356265355e-06, "loss": 1.6583, "step": 1405 }, { "epoch": 0.04157452454666077, "grad_norm": 6.673689895830578, "learning_rate": 1.0393120393120394e-06, "loss": 1.6641, "step": 1410 }, { "epoch": 0.041721951938670204, "grad_norm": 6.87388679250766, "learning_rate": 1.0429975429975429e-06, "loss": 1.7202, "step": 1415 }, { "epoch": 0.04186937933067964, "grad_norm": 7.444361364117969, "learning_rate": 1.0466830466830467e-06, "loss": 1.6629, "step": 1420 }, { "epoch": 0.04201680672268908, "grad_norm": 7.004129559957426, "learning_rate": 1.0503685503685504e-06, "loss": 1.6906, "step": 1425 }, { "epoch": 0.04216423411469851, "grad_norm": 6.907902444582434, "learning_rate": 1.0540540540540542e-06, "loss": 1.6742, "step": 1430 }, { "epoch": 0.04231166150670795, "grad_norm": 6.649211050687975, "learning_rate": 1.0577395577395577e-06, "loss": 1.7013, "step": 1435 }, { "epoch": 0.042459088898717384, "grad_norm": 8.016705392365031, "learning_rate": 1.0614250614250615e-06, "loss": 1.6556, "step": 1440 }, { "epoch": 0.042606516290726815, "grad_norm": 6.820940345433916, "learning_rate": 1.0651105651105652e-06, "loss": 1.7227, "step": 1445 }, { "epoch": 0.04275394368273625, "grad_norm": 7.290223228185156, "learning_rate": 1.0687960687960688e-06, "loss": 1.6845, "step": 1450 }, { "epoch": 0.04290137107474569, "grad_norm": 6.561298499612379, "learning_rate": 1.0724815724815725e-06, "loss": 1.6968, "step": 1455 }, { "epoch": 0.04304879846675512, "grad_norm": 6.981516763892902, "learning_rate": 1.0761670761670762e-06, "loss": 1.653, "step": 1460 }, { "epoch": 0.04319622585876456, "grad_norm": 7.845611460511, "learning_rate": 1.0798525798525798e-06, "loss": 1.6762, "step": 1465 }, { "epoch": 0.043343653250773995, "grad_norm": 7.5409216158911, "learning_rate": 1.0835380835380835e-06, "loss": 1.72, "step": 1470 }, { "epoch": 0.04349108064278343, "grad_norm": 7.228781659950707, "learning_rate": 1.0872235872235873e-06, "loss": 1.6699, "step": 1475 }, { "epoch": 0.04363850803479286, "grad_norm": 6.679273688869983, "learning_rate": 1.090909090909091e-06, "loss": 1.6579, "step": 1480 }, { "epoch": 0.0437859354268023, "grad_norm": 7.325385525689727, "learning_rate": 1.0945945945945946e-06, "loss": 1.7261, "step": 1485 }, { "epoch": 0.04393336281881174, "grad_norm": 5.4626301052777135, "learning_rate": 1.0982800982800983e-06, "loss": 1.6601, "step": 1490 }, { "epoch": 0.04408079021082117, "grad_norm": 7.086701144160754, "learning_rate": 1.1019656019656021e-06, "loss": 1.7005, "step": 1495 }, { "epoch": 0.044228217602830605, "grad_norm": 7.924403720024907, "learning_rate": 1.1056511056511056e-06, "loss": 1.705, "step": 1500 }, { "epoch": 0.044228217602830605, "eval_loss": 1.4585223197937012, "eval_runtime": 4.2097, "eval_samples_per_second": 94.069, "eval_steps_per_second": 3.088, "step": 1500 }, { "epoch": 0.04437564499484004, "grad_norm": 7.690856614247496, "learning_rate": 1.1093366093366095e-06, "loss": 1.6992, "step": 1505 }, { "epoch": 0.04452307238684948, "grad_norm": 7.589136345204706, "learning_rate": 1.113022113022113e-06, "loss": 1.667, "step": 1510 }, { "epoch": 0.04467049977885891, "grad_norm": 6.8216389242353115, "learning_rate": 1.1167076167076165e-06, "loss": 1.6493, "step": 1515 }, { "epoch": 0.04481792717086835, "grad_norm": 6.630862108594751, "learning_rate": 1.1203931203931204e-06, "loss": 1.6841, "step": 1520 }, { "epoch": 0.044965354562877785, "grad_norm": 6.288857991547612, "learning_rate": 1.124078624078624e-06, "loss": 1.5829, "step": 1525 }, { "epoch": 0.045112781954887216, "grad_norm": 7.78401752217646, "learning_rate": 1.127764127764128e-06, "loss": 1.6766, "step": 1530 }, { "epoch": 0.04526020934689665, "grad_norm": 6.178786450643782, "learning_rate": 1.1314496314496314e-06, "loss": 1.6007, "step": 1535 }, { "epoch": 0.04540763673890609, "grad_norm": 7.076844507664084, "learning_rate": 1.1351351351351352e-06, "loss": 1.6584, "step": 1540 }, { "epoch": 0.04555506413091552, "grad_norm": 6.779609225318567, "learning_rate": 1.1388206388206389e-06, "loss": 1.7283, "step": 1545 }, { "epoch": 0.04570249152292496, "grad_norm": 6.585294038820602, "learning_rate": 1.1425061425061425e-06, "loss": 1.6748, "step": 1550 }, { "epoch": 0.045849918914934396, "grad_norm": 6.67330961482568, "learning_rate": 1.1461916461916462e-06, "loss": 1.6677, "step": 1555 }, { "epoch": 0.04599734630694383, "grad_norm": 5.962677871627909, "learning_rate": 1.14987714987715e-06, "loss": 1.6417, "step": 1560 }, { "epoch": 0.04614477369895326, "grad_norm": 7.240489406645415, "learning_rate": 1.1535626535626535e-06, "loss": 1.7132, "step": 1565 }, { "epoch": 0.0462922010909627, "grad_norm": 6.974034583631069, "learning_rate": 1.1572481572481571e-06, "loss": 1.6426, "step": 1570 }, { "epoch": 0.04643962848297214, "grad_norm": 6.954593452194493, "learning_rate": 1.160933660933661e-06, "loss": 1.6694, "step": 1575 }, { "epoch": 0.04658705587498157, "grad_norm": 7.062854202352342, "learning_rate": 1.1646191646191647e-06, "loss": 1.6176, "step": 1580 }, { "epoch": 0.046734483266991006, "grad_norm": 7.334076731539495, "learning_rate": 1.1683046683046683e-06, "loss": 1.6229, "step": 1585 }, { "epoch": 0.04688191065900044, "grad_norm": 6.221154901138304, "learning_rate": 1.171990171990172e-06, "loss": 1.6636, "step": 1590 }, { "epoch": 0.04702933805100988, "grad_norm": 7.6447609322996275, "learning_rate": 1.1756756756756758e-06, "loss": 1.704, "step": 1595 }, { "epoch": 0.04717676544301931, "grad_norm": 6.930170529371163, "learning_rate": 1.1793611793611793e-06, "loss": 1.681, "step": 1600 }, { "epoch": 0.04732419283502875, "grad_norm": 6.1166096586362055, "learning_rate": 1.1830466830466831e-06, "loss": 1.6436, "step": 1605 }, { "epoch": 0.047471620227038186, "grad_norm": 7.490144447097976, "learning_rate": 1.1867321867321868e-06, "loss": 1.6263, "step": 1610 }, { "epoch": 0.047619047619047616, "grad_norm": 6.363150253187046, "learning_rate": 1.1904176904176904e-06, "loss": 1.7108, "step": 1615 }, { "epoch": 0.047766475011057054, "grad_norm": 7.193952374831471, "learning_rate": 1.194103194103194e-06, "loss": 1.6958, "step": 1620 }, { "epoch": 0.04791390240306649, "grad_norm": 7.842030214294463, "learning_rate": 1.1977886977886978e-06, "loss": 1.6872, "step": 1625 }, { "epoch": 0.04806132979507593, "grad_norm": 7.623797889400072, "learning_rate": 1.2014742014742016e-06, "loss": 1.6869, "step": 1630 }, { "epoch": 0.04820875718708536, "grad_norm": 6.915695962499651, "learning_rate": 1.205159705159705e-06, "loss": 1.6115, "step": 1635 }, { "epoch": 0.048356184579094796, "grad_norm": 6.936298298317977, "learning_rate": 1.208845208845209e-06, "loss": 1.701, "step": 1640 }, { "epoch": 0.048503611971104234, "grad_norm": 6.26949477518621, "learning_rate": 1.2125307125307126e-06, "loss": 1.6317, "step": 1645 }, { "epoch": 0.048651039363113664, "grad_norm": 6.066048044116961, "learning_rate": 1.2162162162162162e-06, "loss": 1.6544, "step": 1650 }, { "epoch": 0.0487984667551231, "grad_norm": 6.8057600866197765, "learning_rate": 1.2199017199017199e-06, "loss": 1.6911, "step": 1655 }, { "epoch": 0.04894589414713254, "grad_norm": 5.793196262235378, "learning_rate": 1.2235872235872237e-06, "loss": 1.6449, "step": 1660 }, { "epoch": 0.04909332153914197, "grad_norm": 6.3341756034909755, "learning_rate": 1.2272727272727274e-06, "loss": 1.6473, "step": 1665 }, { "epoch": 0.04924074893115141, "grad_norm": 6.179137462700181, "learning_rate": 1.2309582309582308e-06, "loss": 1.6482, "step": 1670 }, { "epoch": 0.049388176323160844, "grad_norm": 7.228283506489298, "learning_rate": 1.2346437346437347e-06, "loss": 1.6111, "step": 1675 }, { "epoch": 0.04953560371517028, "grad_norm": 6.6611661965711235, "learning_rate": 1.2383292383292384e-06, "loss": 1.6415, "step": 1680 }, { "epoch": 0.04968303110717971, "grad_norm": 6.438123206666281, "learning_rate": 1.242014742014742e-06, "loss": 1.6807, "step": 1685 }, { "epoch": 0.04983045849918915, "grad_norm": 7.582558732441767, "learning_rate": 1.2457002457002457e-06, "loss": 1.708, "step": 1690 }, { "epoch": 0.04997788589119859, "grad_norm": 6.917747086428014, "learning_rate": 1.2493857493857495e-06, "loss": 1.6416, "step": 1695 }, { "epoch": 0.05012531328320802, "grad_norm": 6.851276562086603, "learning_rate": 1.253071253071253e-06, "loss": 1.6248, "step": 1700 }, { "epoch": 0.050272740675217455, "grad_norm": 7.5705294124409095, "learning_rate": 1.2567567567567568e-06, "loss": 1.6233, "step": 1705 }, { "epoch": 0.05042016806722689, "grad_norm": 6.908803100689672, "learning_rate": 1.2604422604422605e-06, "loss": 1.6644, "step": 1710 }, { "epoch": 0.05056759545923633, "grad_norm": 6.863255535867805, "learning_rate": 1.2641277641277644e-06, "loss": 1.6481, "step": 1715 }, { "epoch": 0.05071502285124576, "grad_norm": 6.990590765717037, "learning_rate": 1.2678132678132678e-06, "loss": 1.6598, "step": 1720 }, { "epoch": 0.0508624502432552, "grad_norm": 6.011456809045233, "learning_rate": 1.2714987714987714e-06, "loss": 1.6118, "step": 1725 }, { "epoch": 0.051009877635264635, "grad_norm": 7.06098893716798, "learning_rate": 1.2751842751842753e-06, "loss": 1.6699, "step": 1730 }, { "epoch": 0.051157305027274065, "grad_norm": 5.852306277830541, "learning_rate": 1.2788697788697788e-06, "loss": 1.5917, "step": 1735 }, { "epoch": 0.0513047324192835, "grad_norm": 6.868269045255142, "learning_rate": 1.2825552825552826e-06, "loss": 1.6268, "step": 1740 }, { "epoch": 0.05145215981129294, "grad_norm": 7.0839173029673494, "learning_rate": 1.2862407862407863e-06, "loss": 1.6605, "step": 1745 }, { "epoch": 0.05159958720330237, "grad_norm": 6.710050054987396, "learning_rate": 1.28992628992629e-06, "loss": 1.6436, "step": 1750 }, { "epoch": 0.05174701459531181, "grad_norm": 6.564481869609119, "learning_rate": 1.2936117936117936e-06, "loss": 1.6276, "step": 1755 }, { "epoch": 0.051894441987321245, "grad_norm": 5.953561678907276, "learning_rate": 1.2972972972972974e-06, "loss": 1.5982, "step": 1760 }, { "epoch": 0.05204186937933068, "grad_norm": 6.356708880536861, "learning_rate": 1.300982800982801e-06, "loss": 1.6677, "step": 1765 }, { "epoch": 0.05218929677134011, "grad_norm": 6.244249150447493, "learning_rate": 1.3046683046683047e-06, "loss": 1.6025, "step": 1770 }, { "epoch": 0.05233672416334955, "grad_norm": 6.570184416731645, "learning_rate": 1.3083538083538084e-06, "loss": 1.6507, "step": 1775 }, { "epoch": 0.05248415155535899, "grad_norm": 6.842452147267614, "learning_rate": 1.312039312039312e-06, "loss": 1.6499, "step": 1780 }, { "epoch": 0.05263157894736842, "grad_norm": 6.1478450877230575, "learning_rate": 1.3157248157248157e-06, "loss": 1.6571, "step": 1785 }, { "epoch": 0.052779006339377856, "grad_norm": 6.493981301770118, "learning_rate": 1.3194103194103194e-06, "loss": 1.644, "step": 1790 }, { "epoch": 0.05292643373138729, "grad_norm": 6.736386234571327, "learning_rate": 1.3230958230958232e-06, "loss": 1.6715, "step": 1795 }, { "epoch": 0.05307386112339673, "grad_norm": 6.692132166738226, "learning_rate": 1.3267813267813267e-06, "loss": 1.6393, "step": 1800 }, { "epoch": 0.05322128851540616, "grad_norm": 5.9384822045170065, "learning_rate": 1.3304668304668305e-06, "loss": 1.6142, "step": 1805 }, { "epoch": 0.0533687159074156, "grad_norm": 6.1229747939040005, "learning_rate": 1.3341523341523342e-06, "loss": 1.6075, "step": 1810 }, { "epoch": 0.053516143299425036, "grad_norm": 7.217751925815772, "learning_rate": 1.337837837837838e-06, "loss": 1.5923, "step": 1815 }, { "epoch": 0.053663570691434466, "grad_norm": 5.8950953264235375, "learning_rate": 1.3415233415233415e-06, "loss": 1.6168, "step": 1820 }, { "epoch": 0.0538109980834439, "grad_norm": 6.10816386819449, "learning_rate": 1.3452088452088453e-06, "loss": 1.5804, "step": 1825 }, { "epoch": 0.05395842547545334, "grad_norm": 6.672373166780856, "learning_rate": 1.348894348894349e-06, "loss": 1.63, "step": 1830 }, { "epoch": 0.05410585286746277, "grad_norm": 6.562553464555367, "learning_rate": 1.3525798525798524e-06, "loss": 1.6212, "step": 1835 }, { "epoch": 0.05425328025947221, "grad_norm": 6.217306577528632, "learning_rate": 1.3562653562653563e-06, "loss": 1.6562, "step": 1840 }, { "epoch": 0.054400707651481646, "grad_norm": 7.0560576335984155, "learning_rate": 1.35995085995086e-06, "loss": 1.6639, "step": 1845 }, { "epoch": 0.05454813504349108, "grad_norm": 6.190357917007809, "learning_rate": 1.3636363636363636e-06, "loss": 1.6631, "step": 1850 }, { "epoch": 0.054695562435500514, "grad_norm": 6.939284062412921, "learning_rate": 1.3673218673218673e-06, "loss": 1.6147, "step": 1855 }, { "epoch": 0.05484298982750995, "grad_norm": 6.613069915397667, "learning_rate": 1.3710073710073711e-06, "loss": 1.6332, "step": 1860 }, { "epoch": 0.05499041721951939, "grad_norm": 7.054725102673212, "learning_rate": 1.3746928746928748e-06, "loss": 1.6127, "step": 1865 }, { "epoch": 0.05513784461152882, "grad_norm": 6.372721556873654, "learning_rate": 1.3783783783783784e-06, "loss": 1.6205, "step": 1870 }, { "epoch": 0.055285272003538256, "grad_norm": 6.809611274060924, "learning_rate": 1.382063882063882e-06, "loss": 1.5914, "step": 1875 }, { "epoch": 0.055432699395547694, "grad_norm": 7.012492477152968, "learning_rate": 1.385749385749386e-06, "loss": 1.5854, "step": 1880 }, { "epoch": 0.05558012678755713, "grad_norm": 6.671274250880926, "learning_rate": 1.3894348894348894e-06, "loss": 1.6354, "step": 1885 }, { "epoch": 0.05572755417956656, "grad_norm": 7.682702435639918, "learning_rate": 1.393120393120393e-06, "loss": 1.6263, "step": 1890 }, { "epoch": 0.055874981571576, "grad_norm": 6.130072478986452, "learning_rate": 1.396805896805897e-06, "loss": 1.6094, "step": 1895 }, { "epoch": 0.056022408963585436, "grad_norm": 5.8768164524734345, "learning_rate": 1.4004914004914004e-06, "loss": 1.6348, "step": 1900 }, { "epoch": 0.05616983635559487, "grad_norm": 5.95041705155104, "learning_rate": 1.4041769041769042e-06, "loss": 1.6258, "step": 1905 }, { "epoch": 0.056317263747604304, "grad_norm": 6.368973433537462, "learning_rate": 1.4078624078624079e-06, "loss": 1.646, "step": 1910 }, { "epoch": 0.05646469113961374, "grad_norm": 6.7646477565924314, "learning_rate": 1.4115479115479117e-06, "loss": 1.6196, "step": 1915 }, { "epoch": 0.05661211853162318, "grad_norm": 6.624036077435706, "learning_rate": 1.4152334152334152e-06, "loss": 1.6099, "step": 1920 }, { "epoch": 0.05675954592363261, "grad_norm": 6.898654372608102, "learning_rate": 1.418918918918919e-06, "loss": 1.5624, "step": 1925 }, { "epoch": 0.05690697331564205, "grad_norm": 5.682169698784754, "learning_rate": 1.4226044226044227e-06, "loss": 1.6355, "step": 1930 }, { "epoch": 0.057054400707651484, "grad_norm": 7.132087021406356, "learning_rate": 1.4262899262899263e-06, "loss": 1.6179, "step": 1935 }, { "epoch": 0.057201828099660915, "grad_norm": 6.769063793636996, "learning_rate": 1.42997542997543e-06, "loss": 1.6174, "step": 1940 }, { "epoch": 0.05734925549167035, "grad_norm": 5.838932265015326, "learning_rate": 1.4336609336609337e-06, "loss": 1.58, "step": 1945 }, { "epoch": 0.05749668288367979, "grad_norm": 6.51537507140518, "learning_rate": 1.4373464373464373e-06, "loss": 1.644, "step": 1950 }, { "epoch": 0.05764411027568922, "grad_norm": 6.520152548756988, "learning_rate": 1.441031941031941e-06, "loss": 1.6299, "step": 1955 }, { "epoch": 0.05779153766769866, "grad_norm": 5.944985786311011, "learning_rate": 1.4447174447174448e-06, "loss": 1.6441, "step": 1960 }, { "epoch": 0.057938965059708095, "grad_norm": 7.17753226428166, "learning_rate": 1.4484029484029485e-06, "loss": 1.6132, "step": 1965 }, { "epoch": 0.05808639245171753, "grad_norm": 7.022896228128069, "learning_rate": 1.4520884520884521e-06, "loss": 1.7231, "step": 1970 }, { "epoch": 0.05823381984372696, "grad_norm": 6.402679592103105, "learning_rate": 1.4557739557739558e-06, "loss": 1.611, "step": 1975 }, { "epoch": 0.0583812472357364, "grad_norm": 6.2270913299768536, "learning_rate": 1.4594594594594596e-06, "loss": 1.6189, "step": 1980 }, { "epoch": 0.05852867462774584, "grad_norm": 6.0544530263134355, "learning_rate": 1.463144963144963e-06, "loss": 1.6035, "step": 1985 }, { "epoch": 0.05867610201975527, "grad_norm": 6.63056306227406, "learning_rate": 1.466830466830467e-06, "loss": 1.5878, "step": 1990 }, { "epoch": 0.058823529411764705, "grad_norm": 6.2448697383727225, "learning_rate": 1.4705159705159706e-06, "loss": 1.585, "step": 1995 }, { "epoch": 0.05897095680377414, "grad_norm": 7.979363781308324, "learning_rate": 1.474201474201474e-06, "loss": 1.6304, "step": 2000 }, { "epoch": 0.05897095680377414, "eval_loss": 1.382646918296814, "eval_runtime": 4.2425, "eval_samples_per_second": 93.342, "eval_steps_per_second": 3.064, "step": 2000 }, { "epoch": 0.05911838419578358, "grad_norm": 5.6440446557567086, "learning_rate": 1.477886977886978e-06, "loss": 1.5571, "step": 2005 }, { "epoch": 0.05926581158779301, "grad_norm": 12.990159136120742, "learning_rate": 1.4815724815724816e-06, "loss": 1.5521, "step": 2010 }, { "epoch": 0.05941323897980245, "grad_norm": 5.708485141077599, "learning_rate": 1.4852579852579854e-06, "loss": 1.6275, "step": 2015 }, { "epoch": 0.059560666371811885, "grad_norm": 6.1240405485766, "learning_rate": 1.4889434889434889e-06, "loss": 1.6288, "step": 2020 }, { "epoch": 0.059708093763821316, "grad_norm": 6.988845503315018, "learning_rate": 1.4926289926289927e-06, "loss": 1.5991, "step": 2025 }, { "epoch": 0.05985552115583075, "grad_norm": 6.22588644499163, "learning_rate": 1.4963144963144964e-06, "loss": 1.6067, "step": 2030 }, { "epoch": 0.06000294854784019, "grad_norm": 6.597449725678799, "learning_rate": 1.5e-06, "loss": 1.6127, "step": 2035 }, { "epoch": 0.06015037593984962, "grad_norm": 5.674943971205588, "learning_rate": 1.5036855036855037e-06, "loss": 1.6222, "step": 2040 }, { "epoch": 0.06029780333185906, "grad_norm": 5.612765736171038, "learning_rate": 1.5073710073710076e-06, "loss": 1.5516, "step": 2045 }, { "epoch": 0.060445230723868495, "grad_norm": 6.951181621309978, "learning_rate": 1.5110565110565112e-06, "loss": 1.6112, "step": 2050 }, { "epoch": 0.06059265811587793, "grad_norm": 6.704647038732817, "learning_rate": 1.5147420147420146e-06, "loss": 1.5715, "step": 2055 }, { "epoch": 0.06074008550788736, "grad_norm": 6.27579848392647, "learning_rate": 1.5184275184275185e-06, "loss": 1.626, "step": 2060 }, { "epoch": 0.0608875128998968, "grad_norm": 6.267122093871407, "learning_rate": 1.5221130221130222e-06, "loss": 1.6188, "step": 2065 }, { "epoch": 0.06103494029190624, "grad_norm": 6.12931563405375, "learning_rate": 1.5257985257985258e-06, "loss": 1.5945, "step": 2070 }, { "epoch": 0.06118236768391567, "grad_norm": 6.340752019193624, "learning_rate": 1.5294840294840295e-06, "loss": 1.618, "step": 2075 }, { "epoch": 0.061329795075925106, "grad_norm": 6.524464540398663, "learning_rate": 1.5331695331695333e-06, "loss": 1.6245, "step": 2080 }, { "epoch": 0.06147722246793454, "grad_norm": 5.525089828505235, "learning_rate": 1.5368550368550368e-06, "loss": 1.6299, "step": 2085 }, { "epoch": 0.06162464985994398, "grad_norm": 6.226134337546072, "learning_rate": 1.5405405405405406e-06, "loss": 1.634, "step": 2090 }, { "epoch": 0.06177207725195341, "grad_norm": 6.047859502944977, "learning_rate": 1.5442260442260443e-06, "loss": 1.6368, "step": 2095 }, { "epoch": 0.06191950464396285, "grad_norm": 6.343305876551591, "learning_rate": 1.547911547911548e-06, "loss": 1.575, "step": 2100 }, { "epoch": 0.062066932035972286, "grad_norm": 6.477459767031381, "learning_rate": 1.5515970515970516e-06, "loss": 1.6165, "step": 2105 }, { "epoch": 0.062214359427981716, "grad_norm": 6.194539092138237, "learning_rate": 1.5552825552825553e-06, "loss": 1.632, "step": 2110 }, { "epoch": 0.062361786819991154, "grad_norm": 6.066399118862582, "learning_rate": 1.5589680589680591e-06, "loss": 1.5802, "step": 2115 }, { "epoch": 0.06250921421200059, "grad_norm": 6.170513062054662, "learning_rate": 1.5626535626535626e-06, "loss": 1.6272, "step": 2120 }, { "epoch": 0.06265664160401002, "grad_norm": 5.93569331718644, "learning_rate": 1.5663390663390664e-06, "loss": 1.6382, "step": 2125 }, { "epoch": 0.06280406899601947, "grad_norm": 5.515069052203553, "learning_rate": 1.57002457002457e-06, "loss": 1.5983, "step": 2130 }, { "epoch": 0.0629514963880289, "grad_norm": 6.3867721650407345, "learning_rate": 1.5737100737100737e-06, "loss": 1.5911, "step": 2135 }, { "epoch": 0.06309892378003833, "grad_norm": 6.061282652554585, "learning_rate": 1.5773955773955774e-06, "loss": 1.5487, "step": 2140 }, { "epoch": 0.06324635117204777, "grad_norm": 5.774744507496526, "learning_rate": 1.5810810810810812e-06, "loss": 1.6431, "step": 2145 }, { "epoch": 0.0633937785640572, "grad_norm": 6.129425571977141, "learning_rate": 1.584766584766585e-06, "loss": 1.5807, "step": 2150 }, { "epoch": 0.06354120595606663, "grad_norm": 6.100686165980183, "learning_rate": 1.5884520884520883e-06, "loss": 1.6759, "step": 2155 }, { "epoch": 0.06368863334807608, "grad_norm": 5.804955599740174, "learning_rate": 1.5921375921375922e-06, "loss": 1.5693, "step": 2160 }, { "epoch": 0.0638360607400855, "grad_norm": 6.338072248357455, "learning_rate": 1.5958230958230959e-06, "loss": 1.5946, "step": 2165 }, { "epoch": 0.06398348813209494, "grad_norm": 6.241950920187482, "learning_rate": 1.5995085995085995e-06, "loss": 1.5983, "step": 2170 }, { "epoch": 0.06413091552410438, "grad_norm": 6.2944739104962055, "learning_rate": 1.6031941031941032e-06, "loss": 1.5939, "step": 2175 }, { "epoch": 0.06427834291611381, "grad_norm": 6.00558623959478, "learning_rate": 1.606879606879607e-06, "loss": 1.6055, "step": 2180 }, { "epoch": 0.06442577030812324, "grad_norm": 5.569825181869018, "learning_rate": 1.6105651105651105e-06, "loss": 1.5943, "step": 2185 }, { "epoch": 0.06457319770013269, "grad_norm": 5.851193658374157, "learning_rate": 1.6142506142506143e-06, "loss": 1.6055, "step": 2190 }, { "epoch": 0.06472062509214212, "grad_norm": 6.105170328894079, "learning_rate": 1.617936117936118e-06, "loss": 1.613, "step": 2195 }, { "epoch": 0.06486805248415156, "grad_norm": 6.458167521210023, "learning_rate": 1.6216216216216219e-06, "loss": 1.5645, "step": 2200 }, { "epoch": 0.06501547987616099, "grad_norm": 6.052289678506683, "learning_rate": 1.6253071253071253e-06, "loss": 1.5886, "step": 2205 }, { "epoch": 0.06516290726817042, "grad_norm": 6.7950166491394075, "learning_rate": 1.628992628992629e-06, "loss": 1.6402, "step": 2210 }, { "epoch": 0.06531033466017987, "grad_norm": 5.5978427381428775, "learning_rate": 1.6326781326781328e-06, "loss": 1.5938, "step": 2215 }, { "epoch": 0.0654577620521893, "grad_norm": 6.186198981873184, "learning_rate": 1.6363636363636363e-06, "loss": 1.568, "step": 2220 }, { "epoch": 0.06560518944419873, "grad_norm": 5.812734774848477, "learning_rate": 1.6400491400491401e-06, "loss": 1.6227, "step": 2225 }, { "epoch": 0.06575261683620817, "grad_norm": 5.111833880876855, "learning_rate": 1.6437346437346438e-06, "loss": 1.5353, "step": 2230 }, { "epoch": 0.0659000442282176, "grad_norm": 6.332677094797862, "learning_rate": 1.6474201474201474e-06, "loss": 1.5759, "step": 2235 }, { "epoch": 0.06604747162022703, "grad_norm": 6.276479022204391, "learning_rate": 1.651105651105651e-06, "loss": 1.5915, "step": 2240 }, { "epoch": 0.06619489901223648, "grad_norm": 6.253713450700337, "learning_rate": 1.654791154791155e-06, "loss": 1.6072, "step": 2245 }, { "epoch": 0.06634232640424591, "grad_norm": 5.750639882133948, "learning_rate": 1.6584766584766586e-06, "loss": 1.5691, "step": 2250 }, { "epoch": 0.06648975379625534, "grad_norm": 6.004439929448537, "learning_rate": 1.6621621621621622e-06, "loss": 1.5871, "step": 2255 }, { "epoch": 0.06663718118826478, "grad_norm": 5.79854762646375, "learning_rate": 1.665847665847666e-06, "loss": 1.5514, "step": 2260 }, { "epoch": 0.06678460858027421, "grad_norm": 6.0293148865265875, "learning_rate": 1.6695331695331695e-06, "loss": 1.5655, "step": 2265 }, { "epoch": 0.06693203597228366, "grad_norm": 5.92586827955084, "learning_rate": 1.6732186732186732e-06, "loss": 1.5741, "step": 2270 }, { "epoch": 0.06707946336429309, "grad_norm": 6.035277954994365, "learning_rate": 1.6769041769041769e-06, "loss": 1.5978, "step": 2275 }, { "epoch": 0.06722689075630252, "grad_norm": 6.154365995328953, "learning_rate": 1.6805896805896807e-06, "loss": 1.6023, "step": 2280 }, { "epoch": 0.06737431814831196, "grad_norm": 6.324120878826393, "learning_rate": 1.6842751842751842e-06, "loss": 1.6269, "step": 2285 }, { "epoch": 0.06752174554032139, "grad_norm": 6.100181395769613, "learning_rate": 1.687960687960688e-06, "loss": 1.6088, "step": 2290 }, { "epoch": 0.06766917293233082, "grad_norm": 6.535573570228745, "learning_rate": 1.6916461916461917e-06, "loss": 1.6229, "step": 2295 }, { "epoch": 0.06781660032434027, "grad_norm": 21.252092428904366, "learning_rate": 1.6953316953316955e-06, "loss": 1.5801, "step": 2300 }, { "epoch": 0.0679640277163497, "grad_norm": 5.422094297662072, "learning_rate": 1.699017199017199e-06, "loss": 1.5637, "step": 2305 }, { "epoch": 0.06811145510835913, "grad_norm": 6.113213077169714, "learning_rate": 1.7027027027027028e-06, "loss": 1.5895, "step": 2310 }, { "epoch": 0.06825888250036857, "grad_norm": 6.155056372037241, "learning_rate": 1.7063882063882065e-06, "loss": 1.5644, "step": 2315 }, { "epoch": 0.068406309892378, "grad_norm": 6.06687759234974, "learning_rate": 1.71007371007371e-06, "loss": 1.5874, "step": 2320 }, { "epoch": 0.06855373728438743, "grad_norm": 6.193980608082193, "learning_rate": 1.7137592137592138e-06, "loss": 1.5858, "step": 2325 }, { "epoch": 0.06870116467639688, "grad_norm": 6.513418519438128, "learning_rate": 1.7174447174447175e-06, "loss": 1.6267, "step": 2330 }, { "epoch": 0.06884859206840631, "grad_norm": 5.377085240253216, "learning_rate": 1.7211302211302211e-06, "loss": 1.6144, "step": 2335 }, { "epoch": 0.06899601946041574, "grad_norm": 5.946019409479213, "learning_rate": 1.7248157248157248e-06, "loss": 1.5575, "step": 2340 }, { "epoch": 0.06914344685242518, "grad_norm": 6.280931925911152, "learning_rate": 1.7285012285012286e-06, "loss": 1.65, "step": 2345 }, { "epoch": 0.06929087424443461, "grad_norm": 5.915682484805443, "learning_rate": 1.7321867321867323e-06, "loss": 1.6106, "step": 2350 }, { "epoch": 0.06943830163644406, "grad_norm": 5.991000625392707, "learning_rate": 1.735872235872236e-06, "loss": 1.5926, "step": 2355 }, { "epoch": 0.06958572902845349, "grad_norm": 5.032518493235425, "learning_rate": 1.7395577395577396e-06, "loss": 1.5895, "step": 2360 }, { "epoch": 0.06973315642046292, "grad_norm": 6.456780373354912, "learning_rate": 1.7432432432432435e-06, "loss": 1.6006, "step": 2365 }, { "epoch": 0.06988058381247236, "grad_norm": 5.458381728744118, "learning_rate": 1.746928746928747e-06, "loss": 1.573, "step": 2370 }, { "epoch": 0.0700280112044818, "grad_norm": 5.729463374465411, "learning_rate": 1.7506142506142505e-06, "loss": 1.6467, "step": 2375 }, { "epoch": 0.07017543859649122, "grad_norm": 6.742264416071399, "learning_rate": 1.7542997542997544e-06, "loss": 1.6136, "step": 2380 }, { "epoch": 0.07032286598850067, "grad_norm": 5.266224561236175, "learning_rate": 1.7579852579852579e-06, "loss": 1.5889, "step": 2385 }, { "epoch": 0.0704702933805101, "grad_norm": 5.670657838806408, "learning_rate": 1.7616707616707617e-06, "loss": 1.5544, "step": 2390 }, { "epoch": 0.07061772077251953, "grad_norm": 5.619718290319536, "learning_rate": 1.7653562653562654e-06, "loss": 1.5707, "step": 2395 }, { "epoch": 0.07076514816452897, "grad_norm": 6.562252719246465, "learning_rate": 1.7690417690417692e-06, "loss": 1.6269, "step": 2400 }, { "epoch": 0.0709125755565384, "grad_norm": 5.946654710843088, "learning_rate": 1.7727272727272727e-06, "loss": 1.5927, "step": 2405 }, { "epoch": 0.07106000294854783, "grad_norm": 5.761219098308535, "learning_rate": 1.7764127764127765e-06, "loss": 1.5798, "step": 2410 }, { "epoch": 0.07120743034055728, "grad_norm": 6.254656966734393, "learning_rate": 1.7800982800982802e-06, "loss": 1.6049, "step": 2415 }, { "epoch": 0.07135485773256671, "grad_norm": 5.397489400217602, "learning_rate": 1.7837837837837838e-06, "loss": 1.5973, "step": 2420 }, { "epoch": 0.07150228512457614, "grad_norm": 6.049631633762759, "learning_rate": 1.7874692874692875e-06, "loss": 1.6102, "step": 2425 }, { "epoch": 0.07164971251658558, "grad_norm": 5.609561594543775, "learning_rate": 1.7911547911547912e-06, "loss": 1.6082, "step": 2430 }, { "epoch": 0.07179713990859501, "grad_norm": 5.997978325394861, "learning_rate": 1.7948402948402948e-06, "loss": 1.6088, "step": 2435 }, { "epoch": 0.07194456730060446, "grad_norm": 6.280402227827555, "learning_rate": 1.7985257985257985e-06, "loss": 1.6026, "step": 2440 }, { "epoch": 0.07209199469261389, "grad_norm": 6.087042925259346, "learning_rate": 1.8022113022113023e-06, "loss": 1.6144, "step": 2445 }, { "epoch": 0.07223942208462332, "grad_norm": 6.039307600653221, "learning_rate": 1.805896805896806e-06, "loss": 1.5803, "step": 2450 }, { "epoch": 0.07238684947663276, "grad_norm": 5.743870042989688, "learning_rate": 1.8095823095823096e-06, "loss": 1.6284, "step": 2455 }, { "epoch": 0.0725342768686422, "grad_norm": 5.219763269080988, "learning_rate": 1.8132678132678133e-06, "loss": 1.5644, "step": 2460 }, { "epoch": 0.07268170426065163, "grad_norm": 6.044316394339984, "learning_rate": 1.8169533169533171e-06, "loss": 1.6066, "step": 2465 }, { "epoch": 0.07282913165266107, "grad_norm": 5.798446468070608, "learning_rate": 1.8206388206388206e-06, "loss": 1.5549, "step": 2470 }, { "epoch": 0.0729765590446705, "grad_norm": 5.5977438060654, "learning_rate": 1.8243243243243245e-06, "loss": 1.643, "step": 2475 }, { "epoch": 0.07312398643667993, "grad_norm": 6.248436601133372, "learning_rate": 1.828009828009828e-06, "loss": 1.6355, "step": 2480 }, { "epoch": 0.07327141382868937, "grad_norm": 5.319930996212688, "learning_rate": 1.8316953316953318e-06, "loss": 1.6118, "step": 2485 }, { "epoch": 0.0734188412206988, "grad_norm": 5.8294402607089575, "learning_rate": 1.8353808353808354e-06, "loss": 1.621, "step": 2490 }, { "epoch": 0.07356626861270824, "grad_norm": 6.139822590424839, "learning_rate": 1.839066339066339e-06, "loss": 1.5289, "step": 2495 }, { "epoch": 0.07371369600471768, "grad_norm": 5.815792976744819, "learning_rate": 1.842751842751843e-06, "loss": 1.5948, "step": 2500 }, { "epoch": 0.07371369600471768, "eval_loss": 1.3407624959945679, "eval_runtime": 4.193, "eval_samples_per_second": 94.444, "eval_steps_per_second": 3.1, "step": 2500 }, { "epoch": 0.07386112339672711, "grad_norm": 5.503381311397895, "learning_rate": 1.8464373464373464e-06, "loss": 1.5658, "step": 2505 }, { "epoch": 0.07400855078873654, "grad_norm": 5.452427795118422, "learning_rate": 1.8501228501228502e-06, "loss": 1.6371, "step": 2510 }, { "epoch": 0.07415597818074599, "grad_norm": 5.601226242024944, "learning_rate": 1.8538083538083539e-06, "loss": 1.5462, "step": 2515 }, { "epoch": 0.07430340557275542, "grad_norm": 5.707851657968658, "learning_rate": 1.8574938574938575e-06, "loss": 1.6161, "step": 2520 }, { "epoch": 0.07445083296476486, "grad_norm": 6.997608340595385, "learning_rate": 1.8611793611793612e-06, "loss": 1.6182, "step": 2525 }, { "epoch": 0.07459826035677429, "grad_norm": 5.931383035229415, "learning_rate": 1.864864864864865e-06, "loss": 1.5975, "step": 2530 }, { "epoch": 0.07474568774878372, "grad_norm": 5.876448442537718, "learning_rate": 1.8685503685503687e-06, "loss": 1.5569, "step": 2535 }, { "epoch": 0.07489311514079317, "grad_norm": 5.972277604362021, "learning_rate": 1.8722358722358721e-06, "loss": 1.5609, "step": 2540 }, { "epoch": 0.0750405425328026, "grad_norm": 5.942757321708201, "learning_rate": 1.875921375921376e-06, "loss": 1.5543, "step": 2545 }, { "epoch": 0.07518796992481203, "grad_norm": 5.688248491795728, "learning_rate": 1.8796068796068799e-06, "loss": 1.5939, "step": 2550 }, { "epoch": 0.07533539731682147, "grad_norm": 5.832589062780053, "learning_rate": 1.8832923832923831e-06, "loss": 1.613, "step": 2555 }, { "epoch": 0.0754828247088309, "grad_norm": 5.6562634240883725, "learning_rate": 1.886977886977887e-06, "loss": 1.5769, "step": 2560 }, { "epoch": 0.07563025210084033, "grad_norm": 5.970907462614157, "learning_rate": 1.8906633906633908e-06, "loss": 1.5629, "step": 2565 }, { "epoch": 0.07577767949284978, "grad_norm": 5.1708408704406, "learning_rate": 1.8943488943488945e-06, "loss": 1.5506, "step": 2570 }, { "epoch": 0.0759251068848592, "grad_norm": 6.138702489878542, "learning_rate": 1.898034398034398e-06, "loss": 1.6122, "step": 2575 }, { "epoch": 0.07607253427686864, "grad_norm": 5.705721600736725, "learning_rate": 1.9017199017199018e-06, "loss": 1.5735, "step": 2580 }, { "epoch": 0.07621996166887808, "grad_norm": 5.741777968016816, "learning_rate": 1.9054054054054057e-06, "loss": 1.5736, "step": 2585 }, { "epoch": 0.07636738906088751, "grad_norm": 6.10304979866348, "learning_rate": 1.909090909090909e-06, "loss": 1.5316, "step": 2590 }, { "epoch": 0.07651481645289694, "grad_norm": 5.281837694484547, "learning_rate": 1.9127764127764125e-06, "loss": 1.602, "step": 2595 }, { "epoch": 0.07666224384490639, "grad_norm": 5.8031102255300535, "learning_rate": 1.9164619164619164e-06, "loss": 1.582, "step": 2600 }, { "epoch": 0.07680967123691582, "grad_norm": 5.766633442884544, "learning_rate": 1.9201474201474203e-06, "loss": 1.5969, "step": 2605 }, { "epoch": 0.07695709862892526, "grad_norm": 5.8733622606436136, "learning_rate": 1.9238329238329237e-06, "loss": 1.5883, "step": 2610 }, { "epoch": 0.07710452602093469, "grad_norm": 6.975931138990729, "learning_rate": 1.9275184275184276e-06, "loss": 1.5607, "step": 2615 }, { "epoch": 0.07725195341294412, "grad_norm": 5.8497597269407535, "learning_rate": 1.9312039312039314e-06, "loss": 1.5749, "step": 2620 }, { "epoch": 0.07739938080495357, "grad_norm": 5.58148237484809, "learning_rate": 1.9348894348894353e-06, "loss": 1.569, "step": 2625 }, { "epoch": 0.077546808196963, "grad_norm": 5.313215247320638, "learning_rate": 1.9385749385749383e-06, "loss": 1.6081, "step": 2630 }, { "epoch": 0.07769423558897243, "grad_norm": 5.933853151638809, "learning_rate": 1.942260442260442e-06, "loss": 1.5472, "step": 2635 }, { "epoch": 0.07784166298098187, "grad_norm": 5.859407188180891, "learning_rate": 1.945945945945946e-06, "loss": 1.5861, "step": 2640 }, { "epoch": 0.0779890903729913, "grad_norm": 5.162300616256714, "learning_rate": 1.9496314496314495e-06, "loss": 1.5843, "step": 2645 }, { "epoch": 0.07813651776500073, "grad_norm": 5.7229458606607695, "learning_rate": 1.9533169533169534e-06, "loss": 1.5456, "step": 2650 }, { "epoch": 0.07828394515701018, "grad_norm": 5.7413300916733, "learning_rate": 1.9570024570024572e-06, "loss": 1.5251, "step": 2655 }, { "epoch": 0.0784313725490196, "grad_norm": 5.726110814560216, "learning_rate": 1.960687960687961e-06, "loss": 1.5819, "step": 2660 }, { "epoch": 0.07857879994102904, "grad_norm": 5.638999386478302, "learning_rate": 1.964373464373464e-06, "loss": 1.58, "step": 2665 }, { "epoch": 0.07872622733303848, "grad_norm": 6.032387620776518, "learning_rate": 1.968058968058968e-06, "loss": 1.5466, "step": 2670 }, { "epoch": 0.07887365472504791, "grad_norm": 4.992605072023976, "learning_rate": 1.971744471744472e-06, "loss": 1.5862, "step": 2675 }, { "epoch": 0.07902108211705734, "grad_norm": 5.206547610610573, "learning_rate": 1.9754299754299757e-06, "loss": 1.5736, "step": 2680 }, { "epoch": 0.07916850950906679, "grad_norm": 5.799364657673789, "learning_rate": 1.979115479115479e-06, "loss": 1.5908, "step": 2685 }, { "epoch": 0.07931593690107622, "grad_norm": 5.606726666544795, "learning_rate": 1.982800982800983e-06, "loss": 1.561, "step": 2690 }, { "epoch": 0.07946336429308566, "grad_norm": 5.245255557281611, "learning_rate": 1.9864864864864864e-06, "loss": 1.6126, "step": 2695 }, { "epoch": 0.07961079168509509, "grad_norm": 5.583426907232711, "learning_rate": 1.99017199017199e-06, "loss": 1.5329, "step": 2700 }, { "epoch": 0.07975821907710452, "grad_norm": 5.9584425999606605, "learning_rate": 1.9938574938574938e-06, "loss": 1.544, "step": 2705 }, { "epoch": 0.07990564646911397, "grad_norm": 5.355990493279444, "learning_rate": 1.9975429975429976e-06, "loss": 1.57, "step": 2710 }, { "epoch": 0.0800530738611234, "grad_norm": 5.753377430598455, "learning_rate": 2.0012285012285015e-06, "loss": 1.5509, "step": 2715 }, { "epoch": 0.08020050125313283, "grad_norm": 5.729794844153185, "learning_rate": 2.004914004914005e-06, "loss": 1.5327, "step": 2720 }, { "epoch": 0.08034792864514227, "grad_norm": 5.66470483248796, "learning_rate": 2.0085995085995088e-06, "loss": 1.5898, "step": 2725 }, { "epoch": 0.0804953560371517, "grad_norm": 5.876398812721234, "learning_rate": 2.0122850122850122e-06, "loss": 1.5935, "step": 2730 }, { "epoch": 0.08064278342916113, "grad_norm": 5.712957051747348, "learning_rate": 2.015970515970516e-06, "loss": 1.6284, "step": 2735 }, { "epoch": 0.08079021082117058, "grad_norm": 6.0691314904793225, "learning_rate": 2.0196560196560195e-06, "loss": 1.6097, "step": 2740 }, { "epoch": 0.08093763821318001, "grad_norm": 5.615532621590419, "learning_rate": 2.0233415233415234e-06, "loss": 1.5676, "step": 2745 }, { "epoch": 0.08108506560518944, "grad_norm": 5.892336683225733, "learning_rate": 2.0270270270270273e-06, "loss": 1.541, "step": 2750 }, { "epoch": 0.08123249299719888, "grad_norm": 6.121841333388253, "learning_rate": 2.0307125307125307e-06, "loss": 1.6127, "step": 2755 }, { "epoch": 0.08137992038920831, "grad_norm": 6.128042085848665, "learning_rate": 2.0343980343980346e-06, "loss": 1.6134, "step": 2760 }, { "epoch": 0.08152734778121776, "grad_norm": 6.016480821727076, "learning_rate": 2.038083538083538e-06, "loss": 1.5922, "step": 2765 }, { "epoch": 0.08167477517322719, "grad_norm": 4.962648937090439, "learning_rate": 2.041769041769042e-06, "loss": 1.5456, "step": 2770 }, { "epoch": 0.08182220256523662, "grad_norm": 5.434632047522005, "learning_rate": 2.0454545454545453e-06, "loss": 1.6083, "step": 2775 }, { "epoch": 0.08196962995724606, "grad_norm": 5.350930158050538, "learning_rate": 2.049140049140049e-06, "loss": 1.5893, "step": 2780 }, { "epoch": 0.08211705734925549, "grad_norm": 6.148742140459983, "learning_rate": 2.052825552825553e-06, "loss": 1.5274, "step": 2785 }, { "epoch": 0.08226448474126492, "grad_norm": 5.650500088284748, "learning_rate": 2.056511056511057e-06, "loss": 1.594, "step": 2790 }, { "epoch": 0.08241191213327437, "grad_norm": 5.6973801981048116, "learning_rate": 2.06019656019656e-06, "loss": 1.5512, "step": 2795 }, { "epoch": 0.0825593395252838, "grad_norm": 5.8965144267078955, "learning_rate": 2.063882063882064e-06, "loss": 1.6049, "step": 2800 }, { "epoch": 0.08270676691729323, "grad_norm": 5.309819930945153, "learning_rate": 2.0675675675675677e-06, "loss": 1.5366, "step": 2805 }, { "epoch": 0.08285419430930267, "grad_norm": 5.306567607459562, "learning_rate": 2.071253071253071e-06, "loss": 1.5885, "step": 2810 }, { "epoch": 0.0830016217013121, "grad_norm": 5.580736881180196, "learning_rate": 2.074938574938575e-06, "loss": 1.5311, "step": 2815 }, { "epoch": 0.08314904909332153, "grad_norm": 5.40216353588607, "learning_rate": 2.078624078624079e-06, "loss": 1.5587, "step": 2820 }, { "epoch": 0.08329647648533098, "grad_norm": 5.176539198872345, "learning_rate": 2.0823095823095827e-06, "loss": 1.5493, "step": 2825 }, { "epoch": 0.08344390387734041, "grad_norm": 5.322248403888444, "learning_rate": 2.0859950859950857e-06, "loss": 1.5704, "step": 2830 }, { "epoch": 0.08359133126934984, "grad_norm": 6.044406309796259, "learning_rate": 2.0896805896805896e-06, "loss": 1.59, "step": 2835 }, { "epoch": 0.08373875866135928, "grad_norm": 5.863304968402205, "learning_rate": 2.0933660933660934e-06, "loss": 1.5597, "step": 2840 }, { "epoch": 0.08388618605336871, "grad_norm": 5.020550665807446, "learning_rate": 2.0970515970515973e-06, "loss": 1.5498, "step": 2845 }, { "epoch": 0.08403361344537816, "grad_norm": 5.4955566835875915, "learning_rate": 2.1007371007371007e-06, "loss": 1.5812, "step": 2850 }, { "epoch": 0.08418104083738759, "grad_norm": 5.667971984297459, "learning_rate": 2.1044226044226046e-06, "loss": 1.5731, "step": 2855 }, { "epoch": 0.08432846822939702, "grad_norm": 5.665171886288633, "learning_rate": 2.1081081081081085e-06, "loss": 1.6505, "step": 2860 }, { "epoch": 0.08447589562140646, "grad_norm": 5.557577649899368, "learning_rate": 2.1117936117936115e-06, "loss": 1.5887, "step": 2865 }, { "epoch": 0.0846233230134159, "grad_norm": 4.691919108501698, "learning_rate": 2.1154791154791154e-06, "loss": 1.5648, "step": 2870 }, { "epoch": 0.08477075040542532, "grad_norm": 5.525925132015191, "learning_rate": 2.1191646191646192e-06, "loss": 1.5287, "step": 2875 }, { "epoch": 0.08491817779743477, "grad_norm": 5.428847766057488, "learning_rate": 2.122850122850123e-06, "loss": 1.5816, "step": 2880 }, { "epoch": 0.0850656051894442, "grad_norm": 5.736134226278227, "learning_rate": 2.1265356265356265e-06, "loss": 1.6022, "step": 2885 }, { "epoch": 0.08521303258145363, "grad_norm": 5.480492914228286, "learning_rate": 2.1302211302211304e-06, "loss": 1.6048, "step": 2890 }, { "epoch": 0.08536045997346307, "grad_norm": 5.625767849329854, "learning_rate": 2.1339066339066343e-06, "loss": 1.5861, "step": 2895 }, { "epoch": 0.0855078873654725, "grad_norm": 5.083759681820498, "learning_rate": 2.1375921375921377e-06, "loss": 1.5641, "step": 2900 }, { "epoch": 0.08565531475748193, "grad_norm": 6.5955372201462685, "learning_rate": 2.141277641277641e-06, "loss": 1.5853, "step": 2905 }, { "epoch": 0.08580274214949138, "grad_norm": 5.123395880317845, "learning_rate": 2.144963144963145e-06, "loss": 1.5851, "step": 2910 }, { "epoch": 0.08595016954150081, "grad_norm": 4.7346423420456665, "learning_rate": 2.148648648648649e-06, "loss": 1.5687, "step": 2915 }, { "epoch": 0.08609759693351024, "grad_norm": 6.029573311378683, "learning_rate": 2.1523341523341523e-06, "loss": 1.5483, "step": 2920 }, { "epoch": 0.08624502432551968, "grad_norm": 5.293400520604943, "learning_rate": 2.156019656019656e-06, "loss": 1.5033, "step": 2925 }, { "epoch": 0.08639245171752911, "grad_norm": 5.405125494297334, "learning_rate": 2.1597051597051596e-06, "loss": 1.5557, "step": 2930 }, { "epoch": 0.08653987910953856, "grad_norm": 5.88602515632899, "learning_rate": 2.1633906633906635e-06, "loss": 1.5608, "step": 2935 }, { "epoch": 0.08668730650154799, "grad_norm": 5.652850744911539, "learning_rate": 2.167076167076167e-06, "loss": 1.6041, "step": 2940 }, { "epoch": 0.08683473389355742, "grad_norm": 5.805283814669125, "learning_rate": 2.1707616707616708e-06, "loss": 1.5351, "step": 2945 }, { "epoch": 0.08698216128556686, "grad_norm": 5.717420757828651, "learning_rate": 2.1744471744471746e-06, "loss": 1.5902, "step": 2950 }, { "epoch": 0.0871295886775763, "grad_norm": 5.4419225821626505, "learning_rate": 2.1781326781326785e-06, "loss": 1.5745, "step": 2955 }, { "epoch": 0.08727701606958572, "grad_norm": 4.845650434565536, "learning_rate": 2.181818181818182e-06, "loss": 1.5203, "step": 2960 }, { "epoch": 0.08742444346159517, "grad_norm": 5.614428265760162, "learning_rate": 2.1855036855036854e-06, "loss": 1.5751, "step": 2965 }, { "epoch": 0.0875718708536046, "grad_norm": 5.7501124609187535, "learning_rate": 2.1891891891891893e-06, "loss": 1.5619, "step": 2970 }, { "epoch": 0.08771929824561403, "grad_norm": 5.193986493810577, "learning_rate": 2.1928746928746927e-06, "loss": 1.5398, "step": 2975 }, { "epoch": 0.08786672563762347, "grad_norm": 4.953723668432326, "learning_rate": 2.1965601965601966e-06, "loss": 1.5207, "step": 2980 }, { "epoch": 0.0880141530296329, "grad_norm": 5.07764837021217, "learning_rate": 2.2002457002457004e-06, "loss": 1.5892, "step": 2985 }, { "epoch": 0.08816158042164234, "grad_norm": 5.831584046110282, "learning_rate": 2.2039312039312043e-06, "loss": 1.576, "step": 2990 }, { "epoch": 0.08830900781365178, "grad_norm": 4.916013766358875, "learning_rate": 2.2076167076167077e-06, "loss": 1.5462, "step": 2995 }, { "epoch": 0.08845643520566121, "grad_norm": 5.095768353789963, "learning_rate": 2.211302211302211e-06, "loss": 1.6226, "step": 3000 }, { "epoch": 0.08845643520566121, "eval_loss": 1.315456509590149, "eval_runtime": 4.2545, "eval_samples_per_second": 93.077, "eval_steps_per_second": 3.056, "step": 3000 }, { "epoch": 0.08860386259767064, "grad_norm": 5.532399374511659, "learning_rate": 2.214987714987715e-06, "loss": 1.5859, "step": 3005 }, { "epoch": 0.08875128998968008, "grad_norm": 5.807528893233106, "learning_rate": 2.218673218673219e-06, "loss": 1.5729, "step": 3010 }, { "epoch": 0.08889871738168952, "grad_norm": 5.723411603703698, "learning_rate": 2.2223587223587223e-06, "loss": 1.5075, "step": 3015 }, { "epoch": 0.08904614477369896, "grad_norm": 5.512704055817731, "learning_rate": 2.226044226044226e-06, "loss": 1.5473, "step": 3020 }, { "epoch": 0.08919357216570839, "grad_norm": 5.117426401902597, "learning_rate": 2.22972972972973e-06, "loss": 1.5651, "step": 3025 }, { "epoch": 0.08934099955771782, "grad_norm": 5.708985022272007, "learning_rate": 2.233415233415233e-06, "loss": 1.5295, "step": 3030 }, { "epoch": 0.08948842694972726, "grad_norm": 5.482097712708165, "learning_rate": 2.237100737100737e-06, "loss": 1.5435, "step": 3035 }, { "epoch": 0.0896358543417367, "grad_norm": 5.707723806042086, "learning_rate": 2.240786240786241e-06, "loss": 1.577, "step": 3040 }, { "epoch": 0.08978328173374613, "grad_norm": 5.571048541828452, "learning_rate": 2.2444717444717447e-06, "loss": 1.5512, "step": 3045 }, { "epoch": 0.08993070912575557, "grad_norm": 4.992802713191392, "learning_rate": 2.248157248157248e-06, "loss": 1.5975, "step": 3050 }, { "epoch": 0.090078136517765, "grad_norm": 5.193778079742338, "learning_rate": 2.251842751842752e-06, "loss": 1.5588, "step": 3055 }, { "epoch": 0.09022556390977443, "grad_norm": 5.47802994162327, "learning_rate": 2.255528255528256e-06, "loss": 1.5214, "step": 3060 }, { "epoch": 0.09037299130178388, "grad_norm": 5.42861021948171, "learning_rate": 2.2592137592137593e-06, "loss": 1.5964, "step": 3065 }, { "epoch": 0.0905204186937933, "grad_norm": 5.387825297353141, "learning_rate": 2.2628992628992627e-06, "loss": 1.5062, "step": 3070 }, { "epoch": 0.09066784608580274, "grad_norm": 5.251798003962752, "learning_rate": 2.2665847665847666e-06, "loss": 1.5657, "step": 3075 }, { "epoch": 0.09081527347781218, "grad_norm": 5.450507898651486, "learning_rate": 2.2702702702702705e-06, "loss": 1.5858, "step": 3080 }, { "epoch": 0.09096270086982161, "grad_norm": 4.869485010772805, "learning_rate": 2.273955773955774e-06, "loss": 1.5895, "step": 3085 }, { "epoch": 0.09111012826183104, "grad_norm": 5.451508005536635, "learning_rate": 2.2776412776412778e-06, "loss": 1.5684, "step": 3090 }, { "epoch": 0.09125755565384049, "grad_norm": 5.001824389349824, "learning_rate": 2.2813267813267816e-06, "loss": 1.5482, "step": 3095 }, { "epoch": 0.09140498304584992, "grad_norm": 4.817233840674773, "learning_rate": 2.285012285012285e-06, "loss": 1.5186, "step": 3100 }, { "epoch": 0.09155241043785936, "grad_norm": 5.857783068463483, "learning_rate": 2.2886977886977885e-06, "loss": 1.5231, "step": 3105 }, { "epoch": 0.09169983782986879, "grad_norm": 5.459405614357861, "learning_rate": 2.2923832923832924e-06, "loss": 1.5748, "step": 3110 }, { "epoch": 0.09184726522187822, "grad_norm": 5.67601364690361, "learning_rate": 2.2960687960687962e-06, "loss": 1.5789, "step": 3115 }, { "epoch": 0.09199469261388767, "grad_norm": 5.514326600823962, "learning_rate": 2.2997542997543e-06, "loss": 1.5795, "step": 3120 }, { "epoch": 0.0921421200058971, "grad_norm": 5.213935706820417, "learning_rate": 2.3034398034398036e-06, "loss": 1.5362, "step": 3125 }, { "epoch": 0.09228954739790653, "grad_norm": 5.245918817381799, "learning_rate": 2.307125307125307e-06, "loss": 1.5194, "step": 3130 }, { "epoch": 0.09243697478991597, "grad_norm": 5.518358288703439, "learning_rate": 2.310810810810811e-06, "loss": 1.5505, "step": 3135 }, { "epoch": 0.0925844021819254, "grad_norm": 4.939392933117302, "learning_rate": 2.3144963144963143e-06, "loss": 1.5427, "step": 3140 }, { "epoch": 0.09273182957393483, "grad_norm": 5.131093534093756, "learning_rate": 2.318181818181818e-06, "loss": 1.5137, "step": 3145 }, { "epoch": 0.09287925696594428, "grad_norm": 5.150466339697616, "learning_rate": 2.321867321867322e-06, "loss": 1.5911, "step": 3150 }, { "epoch": 0.0930266843579537, "grad_norm": 4.90290310220582, "learning_rate": 2.325552825552826e-06, "loss": 1.5627, "step": 3155 }, { "epoch": 0.09317411174996314, "grad_norm": 5.061119230336674, "learning_rate": 2.3292383292383293e-06, "loss": 1.5622, "step": 3160 }, { "epoch": 0.09332153914197258, "grad_norm": 5.606703744657838, "learning_rate": 2.3329238329238328e-06, "loss": 1.4593, "step": 3165 }, { "epoch": 0.09346896653398201, "grad_norm": 8.121005627048005, "learning_rate": 2.3366093366093366e-06, "loss": 1.5502, "step": 3170 }, { "epoch": 0.09361639392599144, "grad_norm": 8.960094173459341, "learning_rate": 2.3402948402948405e-06, "loss": 1.5533, "step": 3175 }, { "epoch": 0.09376382131800089, "grad_norm": 13.492053165593376, "learning_rate": 2.343980343980344e-06, "loss": 1.52, "step": 3180 }, { "epoch": 0.09391124871001032, "grad_norm": 5.079224029000266, "learning_rate": 2.347665847665848e-06, "loss": 1.5803, "step": 3185 }, { "epoch": 0.09405867610201976, "grad_norm": 5.14960779076619, "learning_rate": 2.3513513513513517e-06, "loss": 1.5545, "step": 3190 }, { "epoch": 0.09420610349402919, "grad_norm": 5.669090256322424, "learning_rate": 2.355036855036855e-06, "loss": 1.5688, "step": 3195 }, { "epoch": 0.09435353088603862, "grad_norm": 5.3681032272155225, "learning_rate": 2.3587223587223586e-06, "loss": 1.5588, "step": 3200 }, { "epoch": 0.09450095827804807, "grad_norm": 5.006974983507809, "learning_rate": 2.3624078624078624e-06, "loss": 1.555, "step": 3205 }, { "epoch": 0.0946483856700575, "grad_norm": 5.211135487002072, "learning_rate": 2.3660933660933663e-06, "loss": 1.5703, "step": 3210 }, { "epoch": 0.09479581306206693, "grad_norm": 5.6432333344594126, "learning_rate": 2.3697788697788697e-06, "loss": 1.5437, "step": 3215 }, { "epoch": 0.09494324045407637, "grad_norm": 5.225594090751217, "learning_rate": 2.3734643734643736e-06, "loss": 1.5742, "step": 3220 }, { "epoch": 0.0950906678460858, "grad_norm": 4.787643776805635, "learning_rate": 2.3771498771498775e-06, "loss": 1.54, "step": 3225 }, { "epoch": 0.09523809523809523, "grad_norm": 5.13541308454328, "learning_rate": 2.380835380835381e-06, "loss": 1.5256, "step": 3230 }, { "epoch": 0.09538552263010468, "grad_norm": 5.189648929207905, "learning_rate": 2.3845208845208843e-06, "loss": 1.5593, "step": 3235 }, { "epoch": 0.09553295002211411, "grad_norm": 5.155401047847877, "learning_rate": 2.388206388206388e-06, "loss": 1.5624, "step": 3240 }, { "epoch": 0.09568037741412354, "grad_norm": 5.274728681545437, "learning_rate": 2.391891891891892e-06, "loss": 1.5893, "step": 3245 }, { "epoch": 0.09582780480613298, "grad_norm": 5.101545087717652, "learning_rate": 2.3955773955773955e-06, "loss": 1.5236, "step": 3250 }, { "epoch": 0.09597523219814241, "grad_norm": 5.409192180869291, "learning_rate": 2.3992628992628994e-06, "loss": 1.5374, "step": 3255 }, { "epoch": 0.09612265959015186, "grad_norm": 5.523989371590172, "learning_rate": 2.4029484029484032e-06, "loss": 1.5254, "step": 3260 }, { "epoch": 0.09627008698216129, "grad_norm": 5.095863624662689, "learning_rate": 2.4066339066339067e-06, "loss": 1.5517, "step": 3265 }, { "epoch": 0.09641751437417072, "grad_norm": 5.356495488626344, "learning_rate": 2.41031941031941e-06, "loss": 1.5929, "step": 3270 }, { "epoch": 0.09656494176618016, "grad_norm": 5.921294830878963, "learning_rate": 2.414004914004914e-06, "loss": 1.5695, "step": 3275 }, { "epoch": 0.09671236915818959, "grad_norm": 5.220779676738756, "learning_rate": 2.417690417690418e-06, "loss": 1.5298, "step": 3280 }, { "epoch": 0.09685979655019902, "grad_norm": 5.024820098702245, "learning_rate": 2.4213759213759213e-06, "loss": 1.5986, "step": 3285 }, { "epoch": 0.09700722394220847, "grad_norm": 5.289936796762004, "learning_rate": 2.425061425061425e-06, "loss": 1.6122, "step": 3290 }, { "epoch": 0.0971546513342179, "grad_norm": 4.7835839206787405, "learning_rate": 2.428746928746929e-06, "loss": 1.5481, "step": 3295 }, { "epoch": 0.09730207872622733, "grad_norm": 5.685833718497556, "learning_rate": 2.4324324324324325e-06, "loss": 1.567, "step": 3300 }, { "epoch": 0.09744950611823677, "grad_norm": 5.699032084737712, "learning_rate": 2.436117936117936e-06, "loss": 1.5398, "step": 3305 }, { "epoch": 0.0975969335102462, "grad_norm": 5.210661723195905, "learning_rate": 2.4398034398034398e-06, "loss": 1.5716, "step": 3310 }, { "epoch": 0.09774436090225563, "grad_norm": 4.636678516207649, "learning_rate": 2.4434889434889436e-06, "loss": 1.5612, "step": 3315 }, { "epoch": 0.09789178829426508, "grad_norm": 5.650915754496562, "learning_rate": 2.4471744471744475e-06, "loss": 1.5808, "step": 3320 }, { "epoch": 0.09803921568627451, "grad_norm": 5.520250160945193, "learning_rate": 2.450859950859951e-06, "loss": 1.5889, "step": 3325 }, { "epoch": 0.09818664307828394, "grad_norm": 5.582174169634553, "learning_rate": 2.454545454545455e-06, "loss": 1.5563, "step": 3330 }, { "epoch": 0.09833407047029338, "grad_norm": 5.021463368137592, "learning_rate": 2.4582309582309582e-06, "loss": 1.5868, "step": 3335 }, { "epoch": 0.09848149786230281, "grad_norm": 5.284464898465465, "learning_rate": 2.4619164619164617e-06, "loss": 1.5691, "step": 3340 }, { "epoch": 0.09862892525431226, "grad_norm": 6.181869842759779, "learning_rate": 2.4656019656019655e-06, "loss": 1.5261, "step": 3345 }, { "epoch": 0.09877635264632169, "grad_norm": 4.726352538077351, "learning_rate": 2.4692874692874694e-06, "loss": 1.6011, "step": 3350 }, { "epoch": 0.09892378003833112, "grad_norm": 4.860711879702271, "learning_rate": 2.4729729729729733e-06, "loss": 1.5932, "step": 3355 }, { "epoch": 0.09907120743034056, "grad_norm": 5.583661080840626, "learning_rate": 2.4766584766584767e-06, "loss": 1.533, "step": 3360 }, { "epoch": 0.09921863482235, "grad_norm": 5.437508330026464, "learning_rate": 2.48034398034398e-06, "loss": 1.6062, "step": 3365 }, { "epoch": 0.09936606221435942, "grad_norm": 5.030106585983966, "learning_rate": 2.484029484029484e-06, "loss": 1.5417, "step": 3370 }, { "epoch": 0.09951348960636887, "grad_norm": 4.811147125985856, "learning_rate": 2.487714987714988e-06, "loss": 1.5073, "step": 3375 }, { "epoch": 0.0996609169983783, "grad_norm": 5.263939977370588, "learning_rate": 2.4914004914004913e-06, "loss": 1.6237, "step": 3380 }, { "epoch": 0.09980834439038773, "grad_norm": 5.133340630222783, "learning_rate": 2.495085995085995e-06, "loss": 1.5645, "step": 3385 }, { "epoch": 0.09995577178239717, "grad_norm": 5.404595525678628, "learning_rate": 2.498771498771499e-06, "loss": 1.5276, "step": 3390 }, { "epoch": 0.1001031991744066, "grad_norm": 4.840501831936331, "learning_rate": 2.5024570024570025e-06, "loss": 1.5342, "step": 3395 }, { "epoch": 0.10025062656641603, "grad_norm": 4.788091745541234, "learning_rate": 2.506142506142506e-06, "loss": 1.5284, "step": 3400 }, { "epoch": 0.10039805395842548, "grad_norm": 5.0469033550672515, "learning_rate": 2.50982800982801e-06, "loss": 1.5795, "step": 3405 }, { "epoch": 0.10054548135043491, "grad_norm": 4.919028034531004, "learning_rate": 2.5135135135135137e-06, "loss": 1.5498, "step": 3410 }, { "epoch": 0.10069290874244434, "grad_norm": 5.058853666146796, "learning_rate": 2.517199017199017e-06, "loss": 1.5334, "step": 3415 }, { "epoch": 0.10084033613445378, "grad_norm": 5.008656370188883, "learning_rate": 2.520884520884521e-06, "loss": 1.5832, "step": 3420 }, { "epoch": 0.10098776352646321, "grad_norm": 5.065778526550041, "learning_rate": 2.524570024570025e-06, "loss": 1.5541, "step": 3425 }, { "epoch": 0.10113519091847266, "grad_norm": 5.184140559544803, "learning_rate": 2.5282555282555287e-06, "loss": 1.5471, "step": 3430 }, { "epoch": 0.10128261831048209, "grad_norm": 5.283574522943525, "learning_rate": 2.5319410319410317e-06, "loss": 1.6078, "step": 3435 }, { "epoch": 0.10143004570249152, "grad_norm": 4.97944812020313, "learning_rate": 2.5356265356265356e-06, "loss": 1.578, "step": 3440 }, { "epoch": 0.10157747309450096, "grad_norm": 4.9686275585514075, "learning_rate": 2.5393120393120395e-06, "loss": 1.5345, "step": 3445 }, { "epoch": 0.1017249004865104, "grad_norm": 5.327108043482793, "learning_rate": 2.542997542997543e-06, "loss": 1.5485, "step": 3450 }, { "epoch": 0.10187232787851982, "grad_norm": 5.0909384251317675, "learning_rate": 2.5466830466830468e-06, "loss": 1.5792, "step": 3455 }, { "epoch": 0.10201975527052927, "grad_norm": 4.95418759171407, "learning_rate": 2.5503685503685506e-06, "loss": 1.549, "step": 3460 }, { "epoch": 0.1021671826625387, "grad_norm": 4.774256695000534, "learning_rate": 2.554054054054054e-06, "loss": 1.5711, "step": 3465 }, { "epoch": 0.10231461005454813, "grad_norm": 4.917654265931028, "learning_rate": 2.5577395577395575e-06, "loss": 1.5776, "step": 3470 }, { "epoch": 0.10246203744655757, "grad_norm": 5.33462303025168, "learning_rate": 2.5614250614250614e-06, "loss": 1.5755, "step": 3475 }, { "epoch": 0.102609464838567, "grad_norm": 4.727044242677108, "learning_rate": 2.5651105651105652e-06, "loss": 1.5408, "step": 3480 }, { "epoch": 0.10275689223057644, "grad_norm": 5.35480792596283, "learning_rate": 2.568796068796069e-06, "loss": 1.5396, "step": 3485 }, { "epoch": 0.10290431962258588, "grad_norm": 5.337016256917022, "learning_rate": 2.5724815724815725e-06, "loss": 1.5355, "step": 3490 }, { "epoch": 0.10305174701459531, "grad_norm": 4.846485646227201, "learning_rate": 2.5761670761670764e-06, "loss": 1.5549, "step": 3495 }, { "epoch": 0.10319917440660474, "grad_norm": 4.682220583824321, "learning_rate": 2.57985257985258e-06, "loss": 1.5106, "step": 3500 }, { "epoch": 0.10319917440660474, "eval_loss": 1.297115445137024, "eval_runtime": 4.1927, "eval_samples_per_second": 94.449, "eval_steps_per_second": 3.101, "step": 3500 }, { "epoch": 0.10334660179861418, "grad_norm": 5.701002236963091, "learning_rate": 2.5835380835380833e-06, "loss": 1.5619, "step": 3505 }, { "epoch": 0.10349402919062362, "grad_norm": 5.081480707479078, "learning_rate": 2.587223587223587e-06, "loss": 1.5713, "step": 3510 }, { "epoch": 0.10364145658263306, "grad_norm": 4.95586271303443, "learning_rate": 2.590909090909091e-06, "loss": 1.5637, "step": 3515 }, { "epoch": 0.10378888397464249, "grad_norm": 5.7586762762719035, "learning_rate": 2.594594594594595e-06, "loss": 1.555, "step": 3520 }, { "epoch": 0.10393631136665192, "grad_norm": 4.9199110144276075, "learning_rate": 2.5982800982800983e-06, "loss": 1.5261, "step": 3525 }, { "epoch": 0.10408373875866136, "grad_norm": 5.103926098275406, "learning_rate": 2.601965601965602e-06, "loss": 1.5522, "step": 3530 }, { "epoch": 0.1042311661506708, "grad_norm": 5.308410545513771, "learning_rate": 2.6056511056511056e-06, "loss": 1.5206, "step": 3535 }, { "epoch": 0.10437859354268023, "grad_norm": 4.892672842570433, "learning_rate": 2.6093366093366095e-06, "loss": 1.572, "step": 3540 }, { "epoch": 0.10452602093468967, "grad_norm": 4.958914116027942, "learning_rate": 2.613022113022113e-06, "loss": 1.5653, "step": 3545 }, { "epoch": 0.1046734483266991, "grad_norm": 5.331631905296775, "learning_rate": 2.616707616707617e-06, "loss": 1.5398, "step": 3550 }, { "epoch": 0.10482087571870853, "grad_norm": 4.615001823581313, "learning_rate": 2.6203931203931207e-06, "loss": 1.5234, "step": 3555 }, { "epoch": 0.10496830311071798, "grad_norm": 5.806055712995913, "learning_rate": 2.624078624078624e-06, "loss": 1.5459, "step": 3560 }, { "epoch": 0.1051157305027274, "grad_norm": 4.760850213529344, "learning_rate": 2.6277641277641275e-06, "loss": 1.4927, "step": 3565 }, { "epoch": 0.10526315789473684, "grad_norm": 5.422479803518235, "learning_rate": 2.6314496314496314e-06, "loss": 1.5144, "step": 3570 }, { "epoch": 0.10541058528674628, "grad_norm": 4.875435873794537, "learning_rate": 2.6351351351351353e-06, "loss": 1.5307, "step": 3575 }, { "epoch": 0.10555801267875571, "grad_norm": 4.75998207046485, "learning_rate": 2.6388206388206387e-06, "loss": 1.5828, "step": 3580 }, { "epoch": 0.10570544007076514, "grad_norm": 4.481628916290409, "learning_rate": 2.6425061425061426e-06, "loss": 1.5904, "step": 3585 }, { "epoch": 0.10585286746277459, "grad_norm": 5.048056725565446, "learning_rate": 2.6461916461916464e-06, "loss": 1.5402, "step": 3590 }, { "epoch": 0.10600029485478402, "grad_norm": 5.2548834949134395, "learning_rate": 2.6498771498771503e-06, "loss": 1.5668, "step": 3595 }, { "epoch": 0.10614772224679346, "grad_norm": 4.753732712819008, "learning_rate": 2.6535626535626533e-06, "loss": 1.5393, "step": 3600 }, { "epoch": 0.10629514963880289, "grad_norm": 5.079581504607287, "learning_rate": 2.657248157248157e-06, "loss": 1.5196, "step": 3605 }, { "epoch": 0.10644257703081232, "grad_norm": 4.773308716710452, "learning_rate": 2.660933660933661e-06, "loss": 1.5658, "step": 3610 }, { "epoch": 0.10659000442282177, "grad_norm": 5.061283990316247, "learning_rate": 2.6646191646191645e-06, "loss": 1.5698, "step": 3615 }, { "epoch": 0.1067374318148312, "grad_norm": 4.911010481864436, "learning_rate": 2.6683046683046684e-06, "loss": 1.5506, "step": 3620 }, { "epoch": 0.10688485920684063, "grad_norm": 4.70291520360516, "learning_rate": 2.6719901719901722e-06, "loss": 1.5596, "step": 3625 }, { "epoch": 0.10703228659885007, "grad_norm": 5.873657967446069, "learning_rate": 2.675675675675676e-06, "loss": 1.4843, "step": 3630 }, { "epoch": 0.1071797139908595, "grad_norm": 9.857632605933828, "learning_rate": 2.679361179361179e-06, "loss": 1.5797, "step": 3635 }, { "epoch": 0.10732714138286893, "grad_norm": 5.3780939796998695, "learning_rate": 2.683046683046683e-06, "loss": 1.4947, "step": 3640 }, { "epoch": 0.10747456877487838, "grad_norm": 4.99069857836035, "learning_rate": 2.686732186732187e-06, "loss": 1.5582, "step": 3645 }, { "epoch": 0.1076219961668878, "grad_norm": 5.325447232314799, "learning_rate": 2.6904176904176907e-06, "loss": 1.561, "step": 3650 }, { "epoch": 0.10776942355889724, "grad_norm": 4.69162509279504, "learning_rate": 2.694103194103194e-06, "loss": 1.5796, "step": 3655 }, { "epoch": 0.10791685095090668, "grad_norm": 5.098459761275713, "learning_rate": 2.697788697788698e-06, "loss": 1.5582, "step": 3660 }, { "epoch": 0.10806427834291611, "grad_norm": 5.3902688403091865, "learning_rate": 2.7014742014742014e-06, "loss": 1.5149, "step": 3665 }, { "epoch": 0.10821170573492554, "grad_norm": 5.095612404721856, "learning_rate": 2.705159705159705e-06, "loss": 1.5339, "step": 3670 }, { "epoch": 0.10835913312693499, "grad_norm": 5.114771789553405, "learning_rate": 2.7088452088452088e-06, "loss": 1.5298, "step": 3675 }, { "epoch": 0.10850656051894442, "grad_norm": 4.797483671540377, "learning_rate": 2.7125307125307126e-06, "loss": 1.5254, "step": 3680 }, { "epoch": 0.10865398791095386, "grad_norm": 5.196460689753555, "learning_rate": 2.7162162162162165e-06, "loss": 1.567, "step": 3685 }, { "epoch": 0.10880141530296329, "grad_norm": 5.105901892429125, "learning_rate": 2.71990171990172e-06, "loss": 1.5757, "step": 3690 }, { "epoch": 0.10894884269497272, "grad_norm": 5.166938690439382, "learning_rate": 2.7235872235872238e-06, "loss": 1.567, "step": 3695 }, { "epoch": 0.10909627008698217, "grad_norm": 5.356787019442545, "learning_rate": 2.7272727272727272e-06, "loss": 1.5602, "step": 3700 }, { "epoch": 0.1092436974789916, "grad_norm": 5.187971809780159, "learning_rate": 2.730958230958231e-06, "loss": 1.5535, "step": 3705 }, { "epoch": 0.10939112487100103, "grad_norm": 4.778754237280304, "learning_rate": 2.7346437346437345e-06, "loss": 1.5322, "step": 3710 }, { "epoch": 0.10953855226301047, "grad_norm": 4.541399221136939, "learning_rate": 2.7383292383292384e-06, "loss": 1.5367, "step": 3715 }, { "epoch": 0.1096859796550199, "grad_norm": 4.609841441115413, "learning_rate": 2.7420147420147423e-06, "loss": 1.5097, "step": 3720 }, { "epoch": 0.10983340704702933, "grad_norm": 5.007780995576974, "learning_rate": 2.7457002457002457e-06, "loss": 1.53, "step": 3725 }, { "epoch": 0.10998083443903878, "grad_norm": 5.050333117875373, "learning_rate": 2.7493857493857496e-06, "loss": 1.5515, "step": 3730 }, { "epoch": 0.11012826183104821, "grad_norm": 6.03915354822342, "learning_rate": 2.753071253071253e-06, "loss": 1.517, "step": 3735 }, { "epoch": 0.11027568922305764, "grad_norm": 5.253169097831777, "learning_rate": 2.756756756756757e-06, "loss": 1.4972, "step": 3740 }, { "epoch": 0.11042311661506708, "grad_norm": 5.349604816251573, "learning_rate": 2.7604422604422603e-06, "loss": 1.5603, "step": 3745 }, { "epoch": 0.11057054400707651, "grad_norm": 5.163313590600435, "learning_rate": 2.764127764127764e-06, "loss": 1.5593, "step": 3750 }, { "epoch": 0.11071797139908596, "grad_norm": 4.968746785068509, "learning_rate": 2.767813267813268e-06, "loss": 1.492, "step": 3755 }, { "epoch": 0.11086539879109539, "grad_norm": 4.80956823259557, "learning_rate": 2.771498771498772e-06, "loss": 1.549, "step": 3760 }, { "epoch": 0.11101282618310482, "grad_norm": 5.0058003913589895, "learning_rate": 2.7751842751842753e-06, "loss": 1.5029, "step": 3765 }, { "epoch": 0.11116025357511426, "grad_norm": 5.345351114412537, "learning_rate": 2.778869778869779e-06, "loss": 1.5417, "step": 3770 }, { "epoch": 0.11130768096712369, "grad_norm": 4.510820014653224, "learning_rate": 2.7825552825552827e-06, "loss": 1.5284, "step": 3775 }, { "epoch": 0.11145510835913312, "grad_norm": 4.624433433863288, "learning_rate": 2.786240786240786e-06, "loss": 1.5472, "step": 3780 }, { "epoch": 0.11160253575114257, "grad_norm": 5.122459522949579, "learning_rate": 2.78992628992629e-06, "loss": 1.532, "step": 3785 }, { "epoch": 0.111749963143152, "grad_norm": 5.375291166914608, "learning_rate": 2.793611793611794e-06, "loss": 1.5285, "step": 3790 }, { "epoch": 0.11189739053516143, "grad_norm": 5.012838594143527, "learning_rate": 2.7972972972972977e-06, "loss": 1.5417, "step": 3795 }, { "epoch": 0.11204481792717087, "grad_norm": 4.8949072403304275, "learning_rate": 2.8009828009828007e-06, "loss": 1.5474, "step": 3800 }, { "epoch": 0.1121922453191803, "grad_norm": 4.799102359859115, "learning_rate": 2.8046683046683046e-06, "loss": 1.5202, "step": 3805 }, { "epoch": 0.11233967271118973, "grad_norm": 5.4251407374357346, "learning_rate": 2.8083538083538084e-06, "loss": 1.5385, "step": 3810 }, { "epoch": 0.11248710010319918, "grad_norm": 4.696443002942902, "learning_rate": 2.8120393120393123e-06, "loss": 1.5961, "step": 3815 }, { "epoch": 0.11263452749520861, "grad_norm": 5.264955335631435, "learning_rate": 2.8157248157248157e-06, "loss": 1.5558, "step": 3820 }, { "epoch": 0.11278195488721804, "grad_norm": 4.8847420200085825, "learning_rate": 2.8194103194103196e-06, "loss": 1.5673, "step": 3825 }, { "epoch": 0.11292938227922748, "grad_norm": 4.446546349159201, "learning_rate": 2.8230958230958235e-06, "loss": 1.5315, "step": 3830 }, { "epoch": 0.11307680967123691, "grad_norm": 4.841377253745106, "learning_rate": 2.8267813267813265e-06, "loss": 1.5733, "step": 3835 }, { "epoch": 0.11322423706324636, "grad_norm": 4.710850077901208, "learning_rate": 2.8304668304668304e-06, "loss": 1.5808, "step": 3840 }, { "epoch": 0.11337166445525579, "grad_norm": 4.957588828362327, "learning_rate": 2.8341523341523342e-06, "loss": 1.5373, "step": 3845 }, { "epoch": 0.11351909184726522, "grad_norm": 4.819341209916732, "learning_rate": 2.837837837837838e-06, "loss": 1.578, "step": 3850 }, { "epoch": 0.11366651923927466, "grad_norm": 4.978397126599328, "learning_rate": 2.8415233415233415e-06, "loss": 1.5509, "step": 3855 }, { "epoch": 0.1138139466312841, "grad_norm": 4.608949403039753, "learning_rate": 2.8452088452088454e-06, "loss": 1.5688, "step": 3860 }, { "epoch": 0.11396137402329352, "grad_norm": 5.07313810502031, "learning_rate": 2.8488943488943493e-06, "loss": 1.5528, "step": 3865 }, { "epoch": 0.11410880141530297, "grad_norm": 4.682159001691932, "learning_rate": 2.8525798525798527e-06, "loss": 1.576, "step": 3870 }, { "epoch": 0.1142562288073124, "grad_norm": 4.800541455500152, "learning_rate": 2.856265356265356e-06, "loss": 1.5486, "step": 3875 }, { "epoch": 0.11440365619932183, "grad_norm": 4.702422234952057, "learning_rate": 2.85995085995086e-06, "loss": 1.5466, "step": 3880 }, { "epoch": 0.11455108359133127, "grad_norm": 5.266729384541099, "learning_rate": 2.863636363636364e-06, "loss": 1.5906, "step": 3885 }, { "epoch": 0.1146985109833407, "grad_norm": 4.967935949572244, "learning_rate": 2.8673218673218673e-06, "loss": 1.5911, "step": 3890 }, { "epoch": 0.11484593837535013, "grad_norm": 5.009813388496023, "learning_rate": 2.871007371007371e-06, "loss": 1.5939, "step": 3895 }, { "epoch": 0.11499336576735958, "grad_norm": 5.188143433814658, "learning_rate": 2.8746928746928746e-06, "loss": 1.5611, "step": 3900 }, { "epoch": 0.11514079315936901, "grad_norm": 4.520245864687655, "learning_rate": 2.8783783783783785e-06, "loss": 1.517, "step": 3905 }, { "epoch": 0.11528822055137844, "grad_norm": 4.465689990824497, "learning_rate": 2.882063882063882e-06, "loss": 1.5502, "step": 3910 }, { "epoch": 0.11543564794338788, "grad_norm": 4.993991932626459, "learning_rate": 2.8857493857493858e-06, "loss": 1.5451, "step": 3915 }, { "epoch": 0.11558307533539731, "grad_norm": 5.382898816884267, "learning_rate": 2.8894348894348896e-06, "loss": 1.5808, "step": 3920 }, { "epoch": 0.11573050272740676, "grad_norm": 4.53973586513479, "learning_rate": 2.8931203931203935e-06, "loss": 1.5194, "step": 3925 }, { "epoch": 0.11587793011941619, "grad_norm": 4.775653546080999, "learning_rate": 2.896805896805897e-06, "loss": 1.5481, "step": 3930 }, { "epoch": 0.11602535751142562, "grad_norm": 4.962586263557358, "learning_rate": 2.9004914004914004e-06, "loss": 1.6068, "step": 3935 }, { "epoch": 0.11617278490343506, "grad_norm": 4.884359115008496, "learning_rate": 2.9041769041769043e-06, "loss": 1.5365, "step": 3940 }, { "epoch": 0.1163202122954445, "grad_norm": 5.048551025112042, "learning_rate": 2.9078624078624077e-06, "loss": 1.5187, "step": 3945 }, { "epoch": 0.11646763968745392, "grad_norm": 4.558967167485648, "learning_rate": 2.9115479115479116e-06, "loss": 1.5554, "step": 3950 }, { "epoch": 0.11661506707946337, "grad_norm": 4.471138109516607, "learning_rate": 2.9152334152334154e-06, "loss": 1.4963, "step": 3955 }, { "epoch": 0.1167624944714728, "grad_norm": 4.832778931529353, "learning_rate": 2.9189189189189193e-06, "loss": 1.5534, "step": 3960 }, { "epoch": 0.11690992186348223, "grad_norm": 4.658115830146915, "learning_rate": 2.9226044226044227e-06, "loss": 1.5198, "step": 3965 }, { "epoch": 0.11705734925549167, "grad_norm": 4.386986759226401, "learning_rate": 2.926289926289926e-06, "loss": 1.5449, "step": 3970 }, { "epoch": 0.1172047766475011, "grad_norm": 4.893714160318801, "learning_rate": 2.92997542997543e-06, "loss": 1.5773, "step": 3975 }, { "epoch": 0.11735220403951054, "grad_norm": 4.769855730357969, "learning_rate": 2.933660933660934e-06, "loss": 1.5552, "step": 3980 }, { "epoch": 0.11749963143151998, "grad_norm": 4.808361744907782, "learning_rate": 2.9373464373464373e-06, "loss": 1.5102, "step": 3985 }, { "epoch": 0.11764705882352941, "grad_norm": 4.978824258013667, "learning_rate": 2.941031941031941e-06, "loss": 1.5427, "step": 3990 }, { "epoch": 0.11779448621553884, "grad_norm": 4.611591933768196, "learning_rate": 2.944717444717445e-06, "loss": 1.5516, "step": 3995 }, { "epoch": 0.11794191360754828, "grad_norm": 4.576954944138754, "learning_rate": 2.948402948402948e-06, "loss": 1.5651, "step": 4000 }, { "epoch": 0.11794191360754828, "eval_loss": 1.2853575944900513, "eval_runtime": 4.2067, "eval_samples_per_second": 94.135, "eval_steps_per_second": 3.09, "step": 4000 }, { "epoch": 0.11808934099955772, "grad_norm": 4.732427929658077, "learning_rate": 2.952088452088452e-06, "loss": 1.5203, "step": 4005 }, { "epoch": 0.11823676839156716, "grad_norm": 4.689407374485043, "learning_rate": 2.955773955773956e-06, "loss": 1.5366, "step": 4010 }, { "epoch": 0.11838419578357659, "grad_norm": 5.084907213074423, "learning_rate": 2.9594594594594597e-06, "loss": 1.537, "step": 4015 }, { "epoch": 0.11853162317558602, "grad_norm": 5.2281329830111165, "learning_rate": 2.963144963144963e-06, "loss": 1.5371, "step": 4020 }, { "epoch": 0.11867905056759546, "grad_norm": 6.817466206062417, "learning_rate": 2.966830466830467e-06, "loss": 1.5707, "step": 4025 }, { "epoch": 0.1188264779596049, "grad_norm": 4.905161087563407, "learning_rate": 2.970515970515971e-06, "loss": 1.5479, "step": 4030 }, { "epoch": 0.11897390535161433, "grad_norm": 4.710337441815403, "learning_rate": 2.9742014742014743e-06, "loss": 1.5623, "step": 4035 }, { "epoch": 0.11912133274362377, "grad_norm": 4.687694634237855, "learning_rate": 2.9778869778869777e-06, "loss": 1.5128, "step": 4040 }, { "epoch": 0.1192687601356332, "grad_norm": 4.820302917092024, "learning_rate": 2.9815724815724816e-06, "loss": 1.5636, "step": 4045 }, { "epoch": 0.11941618752764263, "grad_norm": 8.556107184959185, "learning_rate": 2.9852579852579855e-06, "loss": 1.5771, "step": 4050 }, { "epoch": 0.11956361491965208, "grad_norm": 5.292532388201866, "learning_rate": 2.988943488943489e-06, "loss": 1.54, "step": 4055 }, { "epoch": 0.1197110423116615, "grad_norm": 4.839105028005585, "learning_rate": 2.9926289926289928e-06, "loss": 1.5454, "step": 4060 }, { "epoch": 0.11985846970367094, "grad_norm": 5.163797382986242, "learning_rate": 2.9963144963144966e-06, "loss": 1.5843, "step": 4065 }, { "epoch": 0.12000589709568038, "grad_norm": 5.3997636049238436, "learning_rate": 3e-06, "loss": 1.5633, "step": 4070 }, { "epoch": 0.12015332448768981, "grad_norm": 4.644273423115345, "learning_rate": 3.0036855036855035e-06, "loss": 1.5268, "step": 4075 }, { "epoch": 0.12030075187969924, "grad_norm": 4.554894017986495, "learning_rate": 3.0073710073710074e-06, "loss": 1.5184, "step": 4080 }, { "epoch": 0.12044817927170869, "grad_norm": 4.529772768805013, "learning_rate": 3.0110565110565112e-06, "loss": 1.5036, "step": 4085 }, { "epoch": 0.12059560666371812, "grad_norm": 4.9275135687071945, "learning_rate": 3.014742014742015e-06, "loss": 1.5566, "step": 4090 }, { "epoch": 0.12074303405572756, "grad_norm": 4.955892422571594, "learning_rate": 3.0184275184275186e-06, "loss": 1.5404, "step": 4095 }, { "epoch": 0.12089046144773699, "grad_norm": 4.6951565307983065, "learning_rate": 3.0221130221130224e-06, "loss": 1.5624, "step": 4100 }, { "epoch": 0.12103788883974642, "grad_norm": 5.088582957520457, "learning_rate": 3.025798525798526e-06, "loss": 1.5306, "step": 4105 }, { "epoch": 0.12118531623175587, "grad_norm": 4.710224319726949, "learning_rate": 3.0294840294840293e-06, "loss": 1.5302, "step": 4110 }, { "epoch": 0.1213327436237653, "grad_norm": 4.935938221891261, "learning_rate": 3.033169533169533e-06, "loss": 1.6089, "step": 4115 }, { "epoch": 0.12148017101577473, "grad_norm": 5.031166381274814, "learning_rate": 3.036855036855037e-06, "loss": 1.5706, "step": 4120 }, { "epoch": 0.12162759840778417, "grad_norm": 5.0639776342342415, "learning_rate": 3.040540540540541e-06, "loss": 1.5475, "step": 4125 }, { "epoch": 0.1217750257997936, "grad_norm": 4.7452919824851625, "learning_rate": 3.0442260442260443e-06, "loss": 1.5198, "step": 4130 }, { "epoch": 0.12192245319180303, "grad_norm": 5.373053997905438, "learning_rate": 3.0479115479115478e-06, "loss": 1.5523, "step": 4135 }, { "epoch": 0.12206988058381248, "grad_norm": 4.631640099306868, "learning_rate": 3.0515970515970516e-06, "loss": 1.5295, "step": 4140 }, { "epoch": 0.1222173079758219, "grad_norm": 5.737448071468093, "learning_rate": 3.0552825552825555e-06, "loss": 1.5641, "step": 4145 }, { "epoch": 0.12236473536783134, "grad_norm": 4.828525087764085, "learning_rate": 3.058968058968059e-06, "loss": 1.5443, "step": 4150 }, { "epoch": 0.12251216275984078, "grad_norm": 5.069136743109657, "learning_rate": 3.062653562653563e-06, "loss": 1.5578, "step": 4155 }, { "epoch": 0.12265959015185021, "grad_norm": 4.73994033840109, "learning_rate": 3.0663390663390667e-06, "loss": 1.5317, "step": 4160 }, { "epoch": 0.12280701754385964, "grad_norm": 4.666639015026299, "learning_rate": 3.07002457002457e-06, "loss": 1.4997, "step": 4165 }, { "epoch": 0.12295444493586909, "grad_norm": 4.713695731392397, "learning_rate": 3.0737100737100736e-06, "loss": 1.5835, "step": 4170 }, { "epoch": 0.12310187232787852, "grad_norm": 4.985225344774401, "learning_rate": 3.0773955773955774e-06, "loss": 1.5554, "step": 4175 }, { "epoch": 0.12324929971988796, "grad_norm": 4.658096506615796, "learning_rate": 3.0810810810810813e-06, "loss": 1.559, "step": 4180 }, { "epoch": 0.12339672711189739, "grad_norm": 4.571663526510476, "learning_rate": 3.0847665847665847e-06, "loss": 1.6037, "step": 4185 }, { "epoch": 0.12354415450390682, "grad_norm": 4.919622535449413, "learning_rate": 3.0884520884520886e-06, "loss": 1.5616, "step": 4190 }, { "epoch": 0.12369158189591627, "grad_norm": 5.16438183607393, "learning_rate": 3.0921375921375925e-06, "loss": 1.5808, "step": 4195 }, { "epoch": 0.1238390092879257, "grad_norm": 4.715498368143993, "learning_rate": 3.095823095823096e-06, "loss": 1.5748, "step": 4200 }, { "epoch": 0.12398643667993513, "grad_norm": 4.759893879753685, "learning_rate": 3.0995085995085993e-06, "loss": 1.5715, "step": 4205 }, { "epoch": 0.12413386407194457, "grad_norm": 4.631789504235767, "learning_rate": 3.103194103194103e-06, "loss": 1.5209, "step": 4210 }, { "epoch": 0.124281291463954, "grad_norm": 4.989392326840394, "learning_rate": 3.106879606879607e-06, "loss": 1.6106, "step": 4215 }, { "epoch": 0.12442871885596343, "grad_norm": 4.59673299112241, "learning_rate": 3.1105651105651105e-06, "loss": 1.5346, "step": 4220 }, { "epoch": 0.12457614624797288, "grad_norm": 4.782739313632573, "learning_rate": 3.1142506142506144e-06, "loss": 1.5517, "step": 4225 }, { "epoch": 0.12472357363998231, "grad_norm": 4.895963546986295, "learning_rate": 3.1179361179361182e-06, "loss": 1.5214, "step": 4230 }, { "epoch": 0.12487100103199174, "grad_norm": 5.27687023269987, "learning_rate": 3.1216216216216217e-06, "loss": 1.5084, "step": 4235 }, { "epoch": 0.12501842842400118, "grad_norm": 4.870830541435462, "learning_rate": 3.125307125307125e-06, "loss": 1.5743, "step": 4240 }, { "epoch": 0.1251658558160106, "grad_norm": 4.903213767799196, "learning_rate": 3.128992628992629e-06, "loss": 1.5038, "step": 4245 }, { "epoch": 0.12531328320802004, "grad_norm": 4.70230088699987, "learning_rate": 3.132678132678133e-06, "loss": 1.5618, "step": 4250 }, { "epoch": 0.12546071060002947, "grad_norm": 5.158104727388008, "learning_rate": 3.1363636363636363e-06, "loss": 1.5554, "step": 4255 }, { "epoch": 0.12560813799203893, "grad_norm": 4.7890214026697375, "learning_rate": 3.14004914004914e-06, "loss": 1.5344, "step": 4260 }, { "epoch": 0.12575556538404836, "grad_norm": 4.919785299481858, "learning_rate": 3.143734643734644e-06, "loss": 1.5405, "step": 4265 }, { "epoch": 0.1259029927760578, "grad_norm": 5.0813320335269365, "learning_rate": 3.1474201474201475e-06, "loss": 1.5145, "step": 4270 }, { "epoch": 0.12605042016806722, "grad_norm": 4.652024750886404, "learning_rate": 3.151105651105651e-06, "loss": 1.5558, "step": 4275 }, { "epoch": 0.12619784756007665, "grad_norm": 5.148035551838379, "learning_rate": 3.1547911547911548e-06, "loss": 1.5504, "step": 4280 }, { "epoch": 0.12634527495208608, "grad_norm": 5.10691114835925, "learning_rate": 3.1584766584766586e-06, "loss": 1.5342, "step": 4285 }, { "epoch": 0.12649270234409554, "grad_norm": 4.7378636689477505, "learning_rate": 3.1621621621621625e-06, "loss": 1.5129, "step": 4290 }, { "epoch": 0.12664012973610497, "grad_norm": 4.463790066328153, "learning_rate": 3.165847665847666e-06, "loss": 1.5517, "step": 4295 }, { "epoch": 0.1267875571281144, "grad_norm": 4.485041843980907, "learning_rate": 3.16953316953317e-06, "loss": 1.5376, "step": 4300 }, { "epoch": 0.12693498452012383, "grad_norm": 4.551496674068248, "learning_rate": 3.1732186732186732e-06, "loss": 1.5124, "step": 4305 }, { "epoch": 0.12708241191213326, "grad_norm": 4.573414693237346, "learning_rate": 3.1769041769041767e-06, "loss": 1.5173, "step": 4310 }, { "epoch": 0.12722983930414272, "grad_norm": 5.1878652990611025, "learning_rate": 3.1805896805896805e-06, "loss": 1.5494, "step": 4315 }, { "epoch": 0.12737726669615215, "grad_norm": 4.816233904226429, "learning_rate": 3.1842751842751844e-06, "loss": 1.5328, "step": 4320 }, { "epoch": 0.12752469408816158, "grad_norm": 4.653678403269613, "learning_rate": 3.1879606879606883e-06, "loss": 1.5455, "step": 4325 }, { "epoch": 0.127672121480171, "grad_norm": 4.903280176809807, "learning_rate": 3.1916461916461917e-06, "loss": 1.5235, "step": 4330 }, { "epoch": 0.12781954887218044, "grad_norm": 4.4054247415033565, "learning_rate": 3.195331695331695e-06, "loss": 1.5266, "step": 4335 }, { "epoch": 0.12796697626418987, "grad_norm": 5.049733485885453, "learning_rate": 3.199017199017199e-06, "loss": 1.5251, "step": 4340 }, { "epoch": 0.12811440365619933, "grad_norm": 5.046345570760735, "learning_rate": 3.202702702702703e-06, "loss": 1.5436, "step": 4345 }, { "epoch": 0.12826183104820876, "grad_norm": 4.818676607944189, "learning_rate": 3.2063882063882063e-06, "loss": 1.5541, "step": 4350 }, { "epoch": 0.1284092584402182, "grad_norm": 4.76318526615118, "learning_rate": 3.21007371007371e-06, "loss": 1.5271, "step": 4355 }, { "epoch": 0.12855668583222762, "grad_norm": 4.6745703872001325, "learning_rate": 3.213759213759214e-06, "loss": 1.4793, "step": 4360 }, { "epoch": 0.12870411322423705, "grad_norm": 5.336595366366269, "learning_rate": 3.2174447174447175e-06, "loss": 1.5386, "step": 4365 }, { "epoch": 0.12885154061624648, "grad_norm": 4.525579011666381, "learning_rate": 3.221130221130221e-06, "loss": 1.5424, "step": 4370 }, { "epoch": 0.12899896800825594, "grad_norm": 5.107690617703786, "learning_rate": 3.224815724815725e-06, "loss": 1.5549, "step": 4375 }, { "epoch": 0.12914639540026537, "grad_norm": 4.941398537828222, "learning_rate": 3.2285012285012287e-06, "loss": 1.5751, "step": 4380 }, { "epoch": 0.1292938227922748, "grad_norm": 4.739317965347838, "learning_rate": 3.232186732186732e-06, "loss": 1.5612, "step": 4385 }, { "epoch": 0.12944125018428423, "grad_norm": 4.701620414532305, "learning_rate": 3.235872235872236e-06, "loss": 1.5413, "step": 4390 }, { "epoch": 0.12958867757629366, "grad_norm": 4.753853921532109, "learning_rate": 3.23955773955774e-06, "loss": 1.5072, "step": 4395 }, { "epoch": 0.12973610496830312, "grad_norm": 4.89547949451295, "learning_rate": 3.2432432432432437e-06, "loss": 1.5353, "step": 4400 }, { "epoch": 0.12988353236031255, "grad_norm": 4.5746627816799155, "learning_rate": 3.2469287469287467e-06, "loss": 1.4866, "step": 4405 }, { "epoch": 0.13003095975232198, "grad_norm": 5.268040654155388, "learning_rate": 3.2506142506142506e-06, "loss": 1.5923, "step": 4410 }, { "epoch": 0.13017838714433141, "grad_norm": 4.985117492483401, "learning_rate": 3.2542997542997544e-06, "loss": 1.5374, "step": 4415 }, { "epoch": 0.13032581453634084, "grad_norm": 4.860459912703197, "learning_rate": 3.257985257985258e-06, "loss": 1.5151, "step": 4420 }, { "epoch": 0.13047324192835028, "grad_norm": 5.062866139683235, "learning_rate": 3.2616707616707618e-06, "loss": 1.528, "step": 4425 }, { "epoch": 0.13062066932035973, "grad_norm": 4.756279520744277, "learning_rate": 3.2653562653562656e-06, "loss": 1.5263, "step": 4430 }, { "epoch": 0.13076809671236916, "grad_norm": 4.8523133684629824, "learning_rate": 3.269041769041769e-06, "loss": 1.5678, "step": 4435 }, { "epoch": 0.1309155241043786, "grad_norm": 4.5486117081456285, "learning_rate": 3.2727272727272725e-06, "loss": 1.5097, "step": 4440 }, { "epoch": 0.13106295149638802, "grad_norm": 4.801024645490914, "learning_rate": 3.2764127764127764e-06, "loss": 1.524, "step": 4445 }, { "epoch": 0.13121037888839746, "grad_norm": 4.683204275336217, "learning_rate": 3.2800982800982802e-06, "loss": 1.4864, "step": 4450 }, { "epoch": 0.13135780628040689, "grad_norm": 4.6070201231257775, "learning_rate": 3.283783783783784e-06, "loss": 1.5018, "step": 4455 }, { "epoch": 0.13150523367241634, "grad_norm": 5.031804729419836, "learning_rate": 3.2874692874692875e-06, "loss": 1.5482, "step": 4460 }, { "epoch": 0.13165266106442577, "grad_norm": 5.229747091030568, "learning_rate": 3.2911547911547914e-06, "loss": 1.4864, "step": 4465 }, { "epoch": 0.1318000884564352, "grad_norm": 4.575243418432003, "learning_rate": 3.294840294840295e-06, "loss": 1.5289, "step": 4470 }, { "epoch": 0.13194751584844464, "grad_norm": 4.505049132585044, "learning_rate": 3.2985257985257983e-06, "loss": 1.5176, "step": 4475 }, { "epoch": 0.13209494324045407, "grad_norm": 4.593066896806852, "learning_rate": 3.302211302211302e-06, "loss": 1.5299, "step": 4480 }, { "epoch": 0.13224237063246352, "grad_norm": 4.53424507100528, "learning_rate": 3.305896805896806e-06, "loss": 1.5367, "step": 4485 }, { "epoch": 0.13238979802447295, "grad_norm": 4.590432278566023, "learning_rate": 3.30958230958231e-06, "loss": 1.4987, "step": 4490 }, { "epoch": 0.13253722541648238, "grad_norm": 4.568072316513191, "learning_rate": 3.3132678132678133e-06, "loss": 1.5392, "step": 4495 }, { "epoch": 0.13268465280849182, "grad_norm": 4.195489507102553, "learning_rate": 3.316953316953317e-06, "loss": 1.4974, "step": 4500 }, { "epoch": 0.13268465280849182, "eval_loss": 1.2763160467147827, "eval_runtime": 4.1719, "eval_samples_per_second": 94.922, "eval_steps_per_second": 3.116, "step": 4500 }, { "epoch": 0.13283208020050125, "grad_norm": 4.847315157906659, "learning_rate": 3.3206388206388206e-06, "loss": 1.5327, "step": 4505 }, { "epoch": 0.13297950759251068, "grad_norm": 4.585114527559985, "learning_rate": 3.3243243243243245e-06, "loss": 1.5166, "step": 4510 }, { "epoch": 0.13312693498452013, "grad_norm": 4.8841225943225925, "learning_rate": 3.328009828009828e-06, "loss": 1.5616, "step": 4515 }, { "epoch": 0.13327436237652956, "grad_norm": 4.860954861394083, "learning_rate": 3.331695331695332e-06, "loss": 1.5553, "step": 4520 }, { "epoch": 0.133421789768539, "grad_norm": 4.285454199662641, "learning_rate": 3.3353808353808357e-06, "loss": 1.5545, "step": 4525 }, { "epoch": 0.13356921716054843, "grad_norm": 4.695115178189639, "learning_rate": 3.339066339066339e-06, "loss": 1.5661, "step": 4530 }, { "epoch": 0.13371664455255786, "grad_norm": 4.721841875675509, "learning_rate": 3.342751842751843e-06, "loss": 1.5375, "step": 4535 }, { "epoch": 0.13386407194456731, "grad_norm": 5.039699374492787, "learning_rate": 3.3464373464373464e-06, "loss": 1.515, "step": 4540 }, { "epoch": 0.13401149933657674, "grad_norm": 4.728020657523132, "learning_rate": 3.3501228501228503e-06, "loss": 1.5768, "step": 4545 }, { "epoch": 0.13415892672858618, "grad_norm": 5.2064212942756445, "learning_rate": 3.3538083538083537e-06, "loss": 1.5318, "step": 4550 }, { "epoch": 0.1343063541205956, "grad_norm": 4.671392036074109, "learning_rate": 3.3574938574938576e-06, "loss": 1.4862, "step": 4555 }, { "epoch": 0.13445378151260504, "grad_norm": 4.7017991682102105, "learning_rate": 3.3611793611793614e-06, "loss": 1.5168, "step": 4560 }, { "epoch": 0.13460120890461447, "grad_norm": 4.326425193284831, "learning_rate": 3.3648648648648653e-06, "loss": 1.553, "step": 4565 }, { "epoch": 0.13474863629662392, "grad_norm": 4.795978305243529, "learning_rate": 3.3685503685503683e-06, "loss": 1.517, "step": 4570 }, { "epoch": 0.13489606368863336, "grad_norm": 4.911356948468643, "learning_rate": 3.372235872235872e-06, "loss": 1.5713, "step": 4575 }, { "epoch": 0.13504349108064279, "grad_norm": 4.500703162289795, "learning_rate": 3.375921375921376e-06, "loss": 1.5148, "step": 4580 }, { "epoch": 0.13519091847265222, "grad_norm": 4.792272852337474, "learning_rate": 3.3796068796068795e-06, "loss": 1.5233, "step": 4585 }, { "epoch": 0.13533834586466165, "grad_norm": 4.673721031902478, "learning_rate": 3.3832923832923834e-06, "loss": 1.5062, "step": 4590 }, { "epoch": 0.13548577325667108, "grad_norm": 5.011677207450699, "learning_rate": 3.3869778869778872e-06, "loss": 1.4964, "step": 4595 }, { "epoch": 0.13563320064868054, "grad_norm": 4.589816409366487, "learning_rate": 3.390663390663391e-06, "loss": 1.5691, "step": 4600 }, { "epoch": 0.13578062804068997, "grad_norm": 4.631846397269571, "learning_rate": 3.394348894348894e-06, "loss": 1.47, "step": 4605 }, { "epoch": 0.1359280554326994, "grad_norm": 4.685127736673351, "learning_rate": 3.398034398034398e-06, "loss": 1.5518, "step": 4610 }, { "epoch": 0.13607548282470883, "grad_norm": 4.82056983043057, "learning_rate": 3.401719901719902e-06, "loss": 1.5301, "step": 4615 }, { "epoch": 0.13622291021671826, "grad_norm": 4.9343299059616355, "learning_rate": 3.4054054054054057e-06, "loss": 1.527, "step": 4620 }, { "epoch": 0.13637033760872772, "grad_norm": 4.706643536060202, "learning_rate": 3.409090909090909e-06, "loss": 1.5358, "step": 4625 }, { "epoch": 0.13651776500073715, "grad_norm": 5.222983084298233, "learning_rate": 3.412776412776413e-06, "loss": 1.5356, "step": 4630 }, { "epoch": 0.13666519239274658, "grad_norm": 4.967317865559979, "learning_rate": 3.416461916461917e-06, "loss": 1.5162, "step": 4635 }, { "epoch": 0.136812619784756, "grad_norm": 4.534750918285993, "learning_rate": 3.42014742014742e-06, "loss": 1.5589, "step": 4640 }, { "epoch": 0.13696004717676544, "grad_norm": 4.844409550927746, "learning_rate": 3.4238329238329238e-06, "loss": 1.5675, "step": 4645 }, { "epoch": 0.13710747456877487, "grad_norm": 4.522917031551449, "learning_rate": 3.4275184275184276e-06, "loss": 1.4653, "step": 4650 }, { "epoch": 0.13725490196078433, "grad_norm": 5.0351651554507235, "learning_rate": 3.4312039312039315e-06, "loss": 1.5498, "step": 4655 }, { "epoch": 0.13740232935279376, "grad_norm": 4.77684230554101, "learning_rate": 3.434889434889435e-06, "loss": 1.513, "step": 4660 }, { "epoch": 0.1375497567448032, "grad_norm": 4.586555017513112, "learning_rate": 3.4385749385749388e-06, "loss": 1.5244, "step": 4665 }, { "epoch": 0.13769718413681262, "grad_norm": 4.196702116642287, "learning_rate": 3.4422604422604422e-06, "loss": 1.5069, "step": 4670 }, { "epoch": 0.13784461152882205, "grad_norm": 4.4976768036098225, "learning_rate": 3.445945945945946e-06, "loss": 1.5639, "step": 4675 }, { "epoch": 0.13799203892083148, "grad_norm": 4.91752968154964, "learning_rate": 3.4496314496314495e-06, "loss": 1.5391, "step": 4680 }, { "epoch": 0.13813946631284094, "grad_norm": 4.631318164890942, "learning_rate": 3.4533169533169534e-06, "loss": 1.5092, "step": 4685 }, { "epoch": 0.13828689370485037, "grad_norm": 4.789869479227321, "learning_rate": 3.4570024570024573e-06, "loss": 1.5521, "step": 4690 }, { "epoch": 0.1384343210968598, "grad_norm": 4.700776725620946, "learning_rate": 3.4606879606879607e-06, "loss": 1.5302, "step": 4695 }, { "epoch": 0.13858174848886923, "grad_norm": 4.575941788487999, "learning_rate": 3.4643734643734646e-06, "loss": 1.5272, "step": 4700 }, { "epoch": 0.13872917588087866, "grad_norm": 4.628764184563924, "learning_rate": 3.468058968058968e-06, "loss": 1.5349, "step": 4705 }, { "epoch": 0.13887660327288812, "grad_norm": 4.694637284413882, "learning_rate": 3.471744471744472e-06, "loss": 1.5526, "step": 4710 }, { "epoch": 0.13902403066489755, "grad_norm": 4.975441995693226, "learning_rate": 3.4754299754299753e-06, "loss": 1.5367, "step": 4715 }, { "epoch": 0.13917145805690698, "grad_norm": 4.8637715926499245, "learning_rate": 3.479115479115479e-06, "loss": 1.5488, "step": 4720 }, { "epoch": 0.1393188854489164, "grad_norm": 4.565111363212398, "learning_rate": 3.482800982800983e-06, "loss": 1.5477, "step": 4725 }, { "epoch": 0.13946631284092584, "grad_norm": 5.158395840428209, "learning_rate": 3.486486486486487e-06, "loss": 1.5383, "step": 4730 }, { "epoch": 0.13961374023293527, "grad_norm": 5.094117021555197, "learning_rate": 3.4901719901719903e-06, "loss": 1.4912, "step": 4735 }, { "epoch": 0.13976116762494473, "grad_norm": 4.704418026828554, "learning_rate": 3.493857493857494e-06, "loss": 1.5351, "step": 4740 }, { "epoch": 0.13990859501695416, "grad_norm": 5.068547392192988, "learning_rate": 3.4975429975429977e-06, "loss": 1.5389, "step": 4745 }, { "epoch": 0.1400560224089636, "grad_norm": 4.888727015828537, "learning_rate": 3.501228501228501e-06, "loss": 1.5297, "step": 4750 }, { "epoch": 0.14020344980097302, "grad_norm": 4.613732550685465, "learning_rate": 3.504914004914005e-06, "loss": 1.4748, "step": 4755 }, { "epoch": 0.14035087719298245, "grad_norm": 4.365570457629462, "learning_rate": 3.508599508599509e-06, "loss": 1.4939, "step": 4760 }, { "epoch": 0.14049830458499188, "grad_norm": 4.232012183407537, "learning_rate": 3.5122850122850127e-06, "loss": 1.5629, "step": 4765 }, { "epoch": 0.14064573197700134, "grad_norm": 4.795600656193571, "learning_rate": 3.5159705159705157e-06, "loss": 1.5202, "step": 4770 }, { "epoch": 0.14079315936901077, "grad_norm": 4.565470899416253, "learning_rate": 3.5196560196560196e-06, "loss": 1.5852, "step": 4775 }, { "epoch": 0.1409405867610202, "grad_norm": 4.57217817356715, "learning_rate": 3.5233415233415234e-06, "loss": 1.5332, "step": 4780 }, { "epoch": 0.14108801415302963, "grad_norm": 4.356304867714271, "learning_rate": 3.5270270270270273e-06, "loss": 1.5067, "step": 4785 }, { "epoch": 0.14123544154503906, "grad_norm": 4.629736362873982, "learning_rate": 3.5307125307125307e-06, "loss": 1.5442, "step": 4790 }, { "epoch": 0.14138286893704852, "grad_norm": 4.769343001149021, "learning_rate": 3.5343980343980346e-06, "loss": 1.4539, "step": 4795 }, { "epoch": 0.14153029632905795, "grad_norm": 4.590132952794273, "learning_rate": 3.5380835380835385e-06, "loss": 1.5375, "step": 4800 }, { "epoch": 0.14167772372106738, "grad_norm": 4.780089648938973, "learning_rate": 3.5417690417690415e-06, "loss": 1.531, "step": 4805 }, { "epoch": 0.1418251511130768, "grad_norm": 4.426432023869081, "learning_rate": 3.5454545454545454e-06, "loss": 1.5422, "step": 4810 }, { "epoch": 0.14197257850508624, "grad_norm": 4.579085664892875, "learning_rate": 3.5491400491400492e-06, "loss": 1.5351, "step": 4815 }, { "epoch": 0.14212000589709567, "grad_norm": 4.718968901590322, "learning_rate": 3.552825552825553e-06, "loss": 1.5168, "step": 4820 }, { "epoch": 0.14226743328910513, "grad_norm": 4.4839483754008675, "learning_rate": 3.5565110565110565e-06, "loss": 1.4943, "step": 4825 }, { "epoch": 0.14241486068111456, "grad_norm": 4.471872454691049, "learning_rate": 3.5601965601965604e-06, "loss": 1.5132, "step": 4830 }, { "epoch": 0.142562288073124, "grad_norm": 5.143463738460816, "learning_rate": 3.5638820638820643e-06, "loss": 1.5504, "step": 4835 }, { "epoch": 0.14270971546513342, "grad_norm": 4.639727095385295, "learning_rate": 3.5675675675675677e-06, "loss": 1.492, "step": 4840 }, { "epoch": 0.14285714285714285, "grad_norm": 4.647828944342637, "learning_rate": 3.571253071253071e-06, "loss": 1.4892, "step": 4845 }, { "epoch": 0.14300457024915228, "grad_norm": 4.7501883564088585, "learning_rate": 3.574938574938575e-06, "loss": 1.5372, "step": 4850 }, { "epoch": 0.14315199764116174, "grad_norm": 4.6000255860285755, "learning_rate": 3.578624078624079e-06, "loss": 1.513, "step": 4855 }, { "epoch": 0.14329942503317117, "grad_norm": 5.550344656723158, "learning_rate": 3.5823095823095823e-06, "loss": 1.5545, "step": 4860 }, { "epoch": 0.1434468524251806, "grad_norm": 4.447301490619689, "learning_rate": 3.585995085995086e-06, "loss": 1.553, "step": 4865 }, { "epoch": 0.14359427981719003, "grad_norm": 4.363623755426452, "learning_rate": 3.5896805896805896e-06, "loss": 1.5007, "step": 4870 }, { "epoch": 0.14374170720919946, "grad_norm": 4.782303986026331, "learning_rate": 3.5933660933660935e-06, "loss": 1.5041, "step": 4875 }, { "epoch": 0.14388913460120892, "grad_norm": 4.681618481938802, "learning_rate": 3.597051597051597e-06, "loss": 1.502, "step": 4880 }, { "epoch": 0.14403656199321835, "grad_norm": 4.812155974536888, "learning_rate": 3.6007371007371008e-06, "loss": 1.5145, "step": 4885 }, { "epoch": 0.14418398938522778, "grad_norm": 4.607640203066771, "learning_rate": 3.6044226044226046e-06, "loss": 1.5814, "step": 4890 }, { "epoch": 0.1443314167772372, "grad_norm": 4.462818134626861, "learning_rate": 3.6081081081081085e-06, "loss": 1.5633, "step": 4895 }, { "epoch": 0.14447884416924664, "grad_norm": 5.006692767262002, "learning_rate": 3.611793611793612e-06, "loss": 1.5326, "step": 4900 }, { "epoch": 0.14462627156125607, "grad_norm": 4.357266177678512, "learning_rate": 3.6154791154791154e-06, "loss": 1.5255, "step": 4905 }, { "epoch": 0.14477369895326553, "grad_norm": 5.337473515177719, "learning_rate": 3.6191646191646193e-06, "loss": 1.5129, "step": 4910 }, { "epoch": 0.14492112634527496, "grad_norm": 4.950770531013438, "learning_rate": 3.6228501228501227e-06, "loss": 1.5267, "step": 4915 }, { "epoch": 0.1450685537372844, "grad_norm": 4.404342525036498, "learning_rate": 3.6265356265356266e-06, "loss": 1.5215, "step": 4920 }, { "epoch": 0.14521598112929382, "grad_norm": 5.324643424794852, "learning_rate": 3.6302211302211304e-06, "loss": 1.553, "step": 4925 }, { "epoch": 0.14536340852130325, "grad_norm": 4.620021063123445, "learning_rate": 3.6339066339066343e-06, "loss": 1.5568, "step": 4930 }, { "epoch": 0.14551083591331268, "grad_norm": 4.50498852327706, "learning_rate": 3.6375921375921377e-06, "loss": 1.5324, "step": 4935 }, { "epoch": 0.14565826330532214, "grad_norm": 4.561475120398187, "learning_rate": 3.641277641277641e-06, "loss": 1.5473, "step": 4940 }, { "epoch": 0.14580569069733157, "grad_norm": 4.346947513572713, "learning_rate": 3.644963144963145e-06, "loss": 1.5248, "step": 4945 }, { "epoch": 0.145953118089341, "grad_norm": 5.182284394637787, "learning_rate": 3.648648648648649e-06, "loss": 1.5892, "step": 4950 }, { "epoch": 0.14610054548135043, "grad_norm": 4.929694565920372, "learning_rate": 3.6523341523341523e-06, "loss": 1.5413, "step": 4955 }, { "epoch": 0.14624797287335986, "grad_norm": 4.618852029768198, "learning_rate": 3.656019656019656e-06, "loss": 1.6156, "step": 4960 }, { "epoch": 0.14639540026536932, "grad_norm": 4.1871261909570405, "learning_rate": 3.65970515970516e-06, "loss": 1.5183, "step": 4965 }, { "epoch": 0.14654282765737875, "grad_norm": 4.612599007345202, "learning_rate": 3.6633906633906635e-06, "loss": 1.5451, "step": 4970 }, { "epoch": 0.14669025504938818, "grad_norm": 4.25268311774661, "learning_rate": 3.667076167076167e-06, "loss": 1.5595, "step": 4975 }, { "epoch": 0.1468376824413976, "grad_norm": 4.482910197716342, "learning_rate": 3.670761670761671e-06, "loss": 1.4917, "step": 4980 }, { "epoch": 0.14698510983340704, "grad_norm": 4.613900115188567, "learning_rate": 3.6744471744471747e-06, "loss": 1.5586, "step": 4985 }, { "epoch": 0.14713253722541647, "grad_norm": 4.856261090063445, "learning_rate": 3.678132678132678e-06, "loss": 1.5271, "step": 4990 }, { "epoch": 0.14727996461742593, "grad_norm": 4.532214578057965, "learning_rate": 3.681818181818182e-06, "loss": 1.5012, "step": 4995 }, { "epoch": 0.14742739200943536, "grad_norm": 4.546149851985248, "learning_rate": 3.685503685503686e-06, "loss": 1.5642, "step": 5000 }, { "epoch": 0.14742739200943536, "eval_loss": 1.2688268423080444, "eval_runtime": 4.2357, "eval_samples_per_second": 93.492, "eval_steps_per_second": 3.069, "step": 5000 }, { "epoch": 0.1475748194014448, "grad_norm": 4.645029054498701, "learning_rate": 3.6891891891891893e-06, "loss": 1.5934, "step": 5005 }, { "epoch": 0.14772224679345422, "grad_norm": 5.000566860272309, "learning_rate": 3.6928746928746927e-06, "loss": 1.5371, "step": 5010 }, { "epoch": 0.14786967418546365, "grad_norm": 4.696644088596374, "learning_rate": 3.6965601965601966e-06, "loss": 1.5388, "step": 5015 }, { "epoch": 0.14801710157747308, "grad_norm": 4.722836129968028, "learning_rate": 3.7002457002457005e-06, "loss": 1.5164, "step": 5020 }, { "epoch": 0.14816452896948254, "grad_norm": 4.74893850350209, "learning_rate": 3.703931203931204e-06, "loss": 1.543, "step": 5025 }, { "epoch": 0.14831195636149197, "grad_norm": 4.635935042951048, "learning_rate": 3.7076167076167078e-06, "loss": 1.5971, "step": 5030 }, { "epoch": 0.1484593837535014, "grad_norm": 4.123420573917591, "learning_rate": 3.7113022113022116e-06, "loss": 1.5707, "step": 5035 }, { "epoch": 0.14860681114551083, "grad_norm": 4.588866283250317, "learning_rate": 3.714987714987715e-06, "loss": 1.5355, "step": 5040 }, { "epoch": 0.14875423853752026, "grad_norm": 4.425789965675826, "learning_rate": 3.7186732186732185e-06, "loss": 1.4882, "step": 5045 }, { "epoch": 0.14890166592952972, "grad_norm": 4.549559590289285, "learning_rate": 3.7223587223587224e-06, "loss": 1.5105, "step": 5050 }, { "epoch": 0.14904909332153915, "grad_norm": 4.2944644728532735, "learning_rate": 3.7260442260442262e-06, "loss": 1.6113, "step": 5055 }, { "epoch": 0.14919652071354858, "grad_norm": 4.747594756005791, "learning_rate": 3.72972972972973e-06, "loss": 1.5463, "step": 5060 }, { "epoch": 0.149343948105558, "grad_norm": 4.440263617013229, "learning_rate": 3.7334152334152336e-06, "loss": 1.51, "step": 5065 }, { "epoch": 0.14949137549756744, "grad_norm": 4.735950413702658, "learning_rate": 3.7371007371007374e-06, "loss": 1.5045, "step": 5070 }, { "epoch": 0.14963880288957687, "grad_norm": 4.311826892831912, "learning_rate": 3.740786240786241e-06, "loss": 1.5666, "step": 5075 }, { "epoch": 0.14978623028158633, "grad_norm": 4.439681284015319, "learning_rate": 3.7444717444717443e-06, "loss": 1.515, "step": 5080 }, { "epoch": 0.14993365767359576, "grad_norm": 4.440150103201596, "learning_rate": 3.748157248157248e-06, "loss": 1.5098, "step": 5085 }, { "epoch": 0.1500810850656052, "grad_norm": 4.120424872459335, "learning_rate": 3.751842751842752e-06, "loss": 1.4757, "step": 5090 }, { "epoch": 0.15022851245761462, "grad_norm": 4.45881585095023, "learning_rate": 3.755528255528256e-06, "loss": 1.4725, "step": 5095 }, { "epoch": 0.15037593984962405, "grad_norm": 4.616123427110541, "learning_rate": 3.7592137592137598e-06, "loss": 1.5754, "step": 5100 }, { "epoch": 0.15052336724163348, "grad_norm": 4.3686245565318576, "learning_rate": 3.762899262899263e-06, "loss": 1.554, "step": 5105 }, { "epoch": 0.15067079463364294, "grad_norm": 4.576069349777376, "learning_rate": 3.7665847665847662e-06, "loss": 1.595, "step": 5110 }, { "epoch": 0.15081822202565237, "grad_norm": 4.695065707822306, "learning_rate": 3.77027027027027e-06, "loss": 1.5076, "step": 5115 }, { "epoch": 0.1509656494176618, "grad_norm": 4.594024327105022, "learning_rate": 3.773955773955774e-06, "loss": 1.5688, "step": 5120 }, { "epoch": 0.15111307680967123, "grad_norm": 4.711477293703158, "learning_rate": 3.777641277641278e-06, "loss": 1.5289, "step": 5125 }, { "epoch": 0.15126050420168066, "grad_norm": 4.638140411397622, "learning_rate": 3.7813267813267817e-06, "loss": 1.4898, "step": 5130 }, { "epoch": 0.15140793159369012, "grad_norm": 6.552701132836743, "learning_rate": 3.7850122850122855e-06, "loss": 1.5524, "step": 5135 }, { "epoch": 0.15155535898569955, "grad_norm": 4.293694032681414, "learning_rate": 3.788697788697789e-06, "loss": 1.5263, "step": 5140 }, { "epoch": 0.15170278637770898, "grad_norm": 4.648874656880593, "learning_rate": 3.792383292383292e-06, "loss": 1.5705, "step": 5145 }, { "epoch": 0.1518502137697184, "grad_norm": 4.633391167108116, "learning_rate": 3.796068796068796e-06, "loss": 1.5514, "step": 5150 }, { "epoch": 0.15199764116172784, "grad_norm": 4.4734680792287245, "learning_rate": 3.7997542997542997e-06, "loss": 1.5539, "step": 5155 }, { "epoch": 0.15214506855373727, "grad_norm": 4.379731986887787, "learning_rate": 3.8034398034398036e-06, "loss": 1.5372, "step": 5160 }, { "epoch": 0.15229249594574673, "grad_norm": 5.030475331591095, "learning_rate": 3.8071253071253075e-06, "loss": 1.4639, "step": 5165 }, { "epoch": 0.15243992333775616, "grad_norm": 4.666014819129949, "learning_rate": 3.8108108108108113e-06, "loss": 1.5518, "step": 5170 }, { "epoch": 0.1525873507297656, "grad_norm": 4.444622378971735, "learning_rate": 3.8144963144963148e-06, "loss": 1.5336, "step": 5175 }, { "epoch": 0.15273477812177502, "grad_norm": 4.7852107102095145, "learning_rate": 3.818181818181818e-06, "loss": 1.4938, "step": 5180 }, { "epoch": 0.15288220551378445, "grad_norm": 4.5533843756161785, "learning_rate": 3.821867321867322e-06, "loss": 1.5624, "step": 5185 }, { "epoch": 0.15302963290579388, "grad_norm": 4.638409740174792, "learning_rate": 3.825552825552825e-06, "loss": 1.5426, "step": 5190 }, { "epoch": 0.15317706029780334, "grad_norm": 4.373040144895963, "learning_rate": 3.829238329238329e-06, "loss": 1.5627, "step": 5195 }, { "epoch": 0.15332448768981277, "grad_norm": 4.58301609754513, "learning_rate": 3.832923832923833e-06, "loss": 1.54, "step": 5200 }, { "epoch": 0.1534719150818222, "grad_norm": 4.515332434394476, "learning_rate": 3.836609336609337e-06, "loss": 1.5151, "step": 5205 }, { "epoch": 0.15361934247383163, "grad_norm": 4.675221093900497, "learning_rate": 3.8402948402948405e-06, "loss": 1.5623, "step": 5210 }, { "epoch": 0.15376676986584106, "grad_norm": 4.798159113106519, "learning_rate": 3.843980343980344e-06, "loss": 1.5531, "step": 5215 }, { "epoch": 0.15391419725785052, "grad_norm": 4.4635953484913715, "learning_rate": 3.8476658476658474e-06, "loss": 1.4983, "step": 5220 }, { "epoch": 0.15406162464985995, "grad_norm": 4.5063355098091495, "learning_rate": 3.851351351351351e-06, "loss": 1.5651, "step": 5225 }, { "epoch": 0.15420905204186938, "grad_norm": 4.606837177860966, "learning_rate": 3.855036855036855e-06, "loss": 1.523, "step": 5230 }, { "epoch": 0.1543564794338788, "grad_norm": 4.410246077529467, "learning_rate": 3.858722358722359e-06, "loss": 1.5178, "step": 5235 }, { "epoch": 0.15450390682588824, "grad_norm": 4.454243538099957, "learning_rate": 3.862407862407863e-06, "loss": 1.5202, "step": 5240 }, { "epoch": 0.15465133421789767, "grad_norm": 5.224611333136219, "learning_rate": 3.866093366093367e-06, "loss": 1.4987, "step": 5245 }, { "epoch": 0.15479876160990713, "grad_norm": 5.091495327444323, "learning_rate": 3.869778869778871e-06, "loss": 1.5516, "step": 5250 }, { "epoch": 0.15494618900191656, "grad_norm": 4.557726034550247, "learning_rate": 3.873464373464374e-06, "loss": 1.515, "step": 5255 }, { "epoch": 0.155093616393926, "grad_norm": 4.31492025215664, "learning_rate": 3.877149877149877e-06, "loss": 1.5502, "step": 5260 }, { "epoch": 0.15524104378593542, "grad_norm": 4.409448364377614, "learning_rate": 3.8808353808353805e-06, "loss": 1.533, "step": 5265 }, { "epoch": 0.15538847117794485, "grad_norm": 4.855781577084502, "learning_rate": 3.884520884520884e-06, "loss": 1.5194, "step": 5270 }, { "epoch": 0.15553589856995428, "grad_norm": 4.523797253479802, "learning_rate": 3.888206388206388e-06, "loss": 1.5743, "step": 5275 }, { "epoch": 0.15568332596196374, "grad_norm": 4.550702189642647, "learning_rate": 3.891891891891892e-06, "loss": 1.4989, "step": 5280 }, { "epoch": 0.15583075335397317, "grad_norm": 4.275705966848993, "learning_rate": 3.895577395577396e-06, "loss": 1.4984, "step": 5285 }, { "epoch": 0.1559781807459826, "grad_norm": 4.567637960577632, "learning_rate": 3.899262899262899e-06, "loss": 1.5296, "step": 5290 }, { "epoch": 0.15612560813799203, "grad_norm": 4.521817554824935, "learning_rate": 3.902948402948403e-06, "loss": 1.5375, "step": 5295 }, { "epoch": 0.15627303553000146, "grad_norm": 4.3787311379367795, "learning_rate": 3.906633906633907e-06, "loss": 1.5906, "step": 5300 }, { "epoch": 0.15642046292201092, "grad_norm": 4.72300760414436, "learning_rate": 3.910319410319411e-06, "loss": 1.5446, "step": 5305 }, { "epoch": 0.15656789031402035, "grad_norm": 4.449279159392347, "learning_rate": 3.9140049140049144e-06, "loss": 1.5347, "step": 5310 }, { "epoch": 0.15671531770602978, "grad_norm": 4.8280486308936075, "learning_rate": 3.917690417690418e-06, "loss": 1.5746, "step": 5315 }, { "epoch": 0.1568627450980392, "grad_norm": 4.333644899375493, "learning_rate": 3.921375921375922e-06, "loss": 1.5798, "step": 5320 }, { "epoch": 0.15701017249004864, "grad_norm": 4.3170927559619985, "learning_rate": 3.925061425061425e-06, "loss": 1.5592, "step": 5325 }, { "epoch": 0.15715759988205807, "grad_norm": 4.230648999594289, "learning_rate": 3.928746928746928e-06, "loss": 1.5393, "step": 5330 }, { "epoch": 0.15730502727406753, "grad_norm": 4.576196263139866, "learning_rate": 3.932432432432432e-06, "loss": 1.5311, "step": 5335 }, { "epoch": 0.15745245466607696, "grad_norm": 4.360023908813899, "learning_rate": 3.936117936117936e-06, "loss": 1.5891, "step": 5340 }, { "epoch": 0.1575998820580864, "grad_norm": 4.540384740203446, "learning_rate": 3.93980343980344e-06, "loss": 1.6165, "step": 5345 }, { "epoch": 0.15774730945009582, "grad_norm": 4.501252259490442, "learning_rate": 3.943488943488944e-06, "loss": 1.5028, "step": 5350 }, { "epoch": 0.15789473684210525, "grad_norm": 4.468308782456239, "learning_rate": 3.9471744471744475e-06, "loss": 1.54, "step": 5355 }, { "epoch": 0.15804216423411468, "grad_norm": 4.4406569654774, "learning_rate": 3.950859950859951e-06, "loss": 1.4958, "step": 5360 }, { "epoch": 0.15818959162612414, "grad_norm": 4.206889663815658, "learning_rate": 3.954545454545454e-06, "loss": 1.5577, "step": 5365 }, { "epoch": 0.15833701901813357, "grad_norm": 4.490491643629893, "learning_rate": 3.958230958230958e-06, "loss": 1.5371, "step": 5370 }, { "epoch": 0.158484446410143, "grad_norm": 4.539040253837564, "learning_rate": 3.961916461916462e-06, "loss": 1.5843, "step": 5375 }, { "epoch": 0.15863187380215243, "grad_norm": 4.817153156445021, "learning_rate": 3.965601965601966e-06, "loss": 1.5875, "step": 5380 }, { "epoch": 0.15877930119416186, "grad_norm": 4.291008843754178, "learning_rate": 3.96928746928747e-06, "loss": 1.5189, "step": 5385 }, { "epoch": 0.15892672858617132, "grad_norm": 4.283635667786998, "learning_rate": 3.972972972972973e-06, "loss": 1.5413, "step": 5390 }, { "epoch": 0.15907415597818075, "grad_norm": 4.579750447389729, "learning_rate": 3.976658476658477e-06, "loss": 1.5411, "step": 5395 }, { "epoch": 0.15922158337019018, "grad_norm": 5.611716139600716, "learning_rate": 3.98034398034398e-06, "loss": 1.5298, "step": 5400 }, { "epoch": 0.15936901076219961, "grad_norm": 4.361213844709548, "learning_rate": 3.984029484029484e-06, "loss": 1.5452, "step": 5405 }, { "epoch": 0.15951643815420904, "grad_norm": 4.592425644019118, "learning_rate": 3.9877149877149875e-06, "loss": 1.5184, "step": 5410 }, { "epoch": 0.15966386554621848, "grad_norm": 4.233902799657307, "learning_rate": 3.991400491400491e-06, "loss": 1.5694, "step": 5415 }, { "epoch": 0.15981129293822793, "grad_norm": 4.643937018760166, "learning_rate": 3.995085995085995e-06, "loss": 1.5353, "step": 5420 }, { "epoch": 0.15995872033023736, "grad_norm": 4.6060324860824995, "learning_rate": 3.998771498771499e-06, "loss": 1.5162, "step": 5425 }, { "epoch": 0.1601061477222468, "grad_norm": 4.200492951967331, "learning_rate": 4.002457002457003e-06, "loss": 1.5216, "step": 5430 }, { "epoch": 0.16025357511425622, "grad_norm": 4.537295520371377, "learning_rate": 4.006142506142507e-06, "loss": 1.5444, "step": 5435 }, { "epoch": 0.16040100250626566, "grad_norm": 4.25478014648566, "learning_rate": 4.00982800982801e-06, "loss": 1.5417, "step": 5440 }, { "epoch": 0.16054842989827509, "grad_norm": 5.486628619838799, "learning_rate": 4.013513513513514e-06, "loss": 1.4933, "step": 5445 }, { "epoch": 0.16069585729028454, "grad_norm": 4.738329713130608, "learning_rate": 4.0171990171990176e-06, "loss": 1.5353, "step": 5450 }, { "epoch": 0.16084328468229397, "grad_norm": 4.525304859147241, "learning_rate": 4.0208845208845214e-06, "loss": 1.5355, "step": 5455 }, { "epoch": 0.1609907120743034, "grad_norm": 4.425592262910372, "learning_rate": 4.0245700245700245e-06, "loss": 1.5218, "step": 5460 }, { "epoch": 0.16113813946631284, "grad_norm": 4.449880106333343, "learning_rate": 4.028255528255528e-06, "loss": 1.4837, "step": 5465 }, { "epoch": 0.16128556685832227, "grad_norm": 4.92234483854458, "learning_rate": 4.031941031941032e-06, "loss": 1.5427, "step": 5470 }, { "epoch": 0.16143299425033172, "grad_norm": 4.617425120225604, "learning_rate": 4.035626535626535e-06, "loss": 1.5361, "step": 5475 }, { "epoch": 0.16158042164234115, "grad_norm": 4.099115213487491, "learning_rate": 4.039312039312039e-06, "loss": 1.5486, "step": 5480 }, { "epoch": 0.16172784903435058, "grad_norm": 4.546505389011976, "learning_rate": 4.042997542997543e-06, "loss": 1.5557, "step": 5485 }, { "epoch": 0.16187527642636002, "grad_norm": 4.090979993567217, "learning_rate": 4.046683046683047e-06, "loss": 1.5147, "step": 5490 }, { "epoch": 0.16202270381836945, "grad_norm": 4.593434820574161, "learning_rate": 4.050368550368551e-06, "loss": 1.5517, "step": 5495 }, { "epoch": 0.16217013121037888, "grad_norm": 4.159116042774019, "learning_rate": 4.0540540540540545e-06, "loss": 1.519, "step": 5500 }, { "epoch": 0.16217013121037888, "eval_loss": 1.2616699934005737, "eval_runtime": 4.153, "eval_samples_per_second": 95.354, "eval_steps_per_second": 3.13, "step": 5500 }, { "epoch": 0.16231755860238833, "grad_norm": 4.249080247555688, "learning_rate": 4.057739557739558e-06, "loss": 1.4785, "step": 5505 }, { "epoch": 0.16246498599439776, "grad_norm": 4.280826020370763, "learning_rate": 4.061425061425061e-06, "loss": 1.5709, "step": 5510 }, { "epoch": 0.1626124133864072, "grad_norm": 4.173087936969887, "learning_rate": 4.065110565110565e-06, "loss": 1.4904, "step": 5515 }, { "epoch": 0.16275984077841663, "grad_norm": 4.41661718881703, "learning_rate": 4.068796068796069e-06, "loss": 1.5168, "step": 5520 }, { "epoch": 0.16290726817042606, "grad_norm": 4.555248400268817, "learning_rate": 4.072481572481572e-06, "loss": 1.522, "step": 5525 }, { "epoch": 0.16305469556243551, "grad_norm": 4.515478086299296, "learning_rate": 4.076167076167076e-06, "loss": 1.5008, "step": 5530 }, { "epoch": 0.16320212295444494, "grad_norm": 4.640903064669996, "learning_rate": 4.07985257985258e-06, "loss": 1.5564, "step": 5535 }, { "epoch": 0.16334955034645438, "grad_norm": 5.075288415069169, "learning_rate": 4.083538083538084e-06, "loss": 1.5703, "step": 5540 }, { "epoch": 0.1634969777384638, "grad_norm": 4.718314135236502, "learning_rate": 4.087223587223588e-06, "loss": 1.5791, "step": 5545 }, { "epoch": 0.16364440513047324, "grad_norm": 4.328025581269543, "learning_rate": 4.090909090909091e-06, "loss": 1.5641, "step": 5550 }, { "epoch": 0.16379183252248267, "grad_norm": 4.2227188413127035, "learning_rate": 4.0945945945945945e-06, "loss": 1.5541, "step": 5555 }, { "epoch": 0.16393925991449212, "grad_norm": 4.644440242925791, "learning_rate": 4.098280098280098e-06, "loss": 1.5457, "step": 5560 }, { "epoch": 0.16408668730650156, "grad_norm": 4.537478586693682, "learning_rate": 4.101965601965602e-06, "loss": 1.5223, "step": 5565 }, { "epoch": 0.16423411469851099, "grad_norm": 4.405101396385208, "learning_rate": 4.105651105651106e-06, "loss": 1.5297, "step": 5570 }, { "epoch": 0.16438154209052042, "grad_norm": 7.337364997710357, "learning_rate": 4.10933660933661e-06, "loss": 1.4939, "step": 5575 }, { "epoch": 0.16452896948252985, "grad_norm": 4.3173979909767946, "learning_rate": 4.113022113022114e-06, "loss": 1.4826, "step": 5580 }, { "epoch": 0.16467639687453928, "grad_norm": 4.373007783972512, "learning_rate": 4.116707616707617e-06, "loss": 1.5562, "step": 5585 }, { "epoch": 0.16482382426654874, "grad_norm": 4.774093177902672, "learning_rate": 4.12039312039312e-06, "loss": 1.5435, "step": 5590 }, { "epoch": 0.16497125165855817, "grad_norm": 4.428012241544187, "learning_rate": 4.124078624078624e-06, "loss": 1.5102, "step": 5595 }, { "epoch": 0.1651186790505676, "grad_norm": 4.564053035519051, "learning_rate": 4.127764127764128e-06, "loss": 1.5538, "step": 5600 }, { "epoch": 0.16526610644257703, "grad_norm": 4.1469395621308385, "learning_rate": 4.1314496314496314e-06, "loss": 1.5267, "step": 5605 }, { "epoch": 0.16541353383458646, "grad_norm": 4.401765891614241, "learning_rate": 4.135135135135135e-06, "loss": 1.4775, "step": 5610 }, { "epoch": 0.16556096122659592, "grad_norm": 4.60176863072781, "learning_rate": 4.138820638820639e-06, "loss": 1.5449, "step": 5615 }, { "epoch": 0.16570838861860535, "grad_norm": 4.160110126850831, "learning_rate": 4.142506142506142e-06, "loss": 1.5114, "step": 5620 }, { "epoch": 0.16585581601061478, "grad_norm": 4.147034755263783, "learning_rate": 4.146191646191646e-06, "loss": 1.5501, "step": 5625 }, { "epoch": 0.1660032434026242, "grad_norm": 4.432297400860323, "learning_rate": 4.14987714987715e-06, "loss": 1.5225, "step": 5630 }, { "epoch": 0.16615067079463364, "grad_norm": 4.469877377377752, "learning_rate": 4.153562653562654e-06, "loss": 1.5354, "step": 5635 }, { "epoch": 0.16629809818664307, "grad_norm": 4.315499948898841, "learning_rate": 4.157248157248158e-06, "loss": 1.5355, "step": 5640 }, { "epoch": 0.16644552557865253, "grad_norm": 4.584778471438633, "learning_rate": 4.1609336609336615e-06, "loss": 1.5163, "step": 5645 }, { "epoch": 0.16659295297066196, "grad_norm": 4.61277363437239, "learning_rate": 4.164619164619165e-06, "loss": 1.5356, "step": 5650 }, { "epoch": 0.1667403803626714, "grad_norm": 4.332224797634135, "learning_rate": 4.168304668304668e-06, "loss": 1.5407, "step": 5655 }, { "epoch": 0.16688780775468082, "grad_norm": 4.181188731146761, "learning_rate": 4.171990171990171e-06, "loss": 1.5497, "step": 5660 }, { "epoch": 0.16703523514669025, "grad_norm": 4.364611014026289, "learning_rate": 4.175675675675675e-06, "loss": 1.4655, "step": 5665 }, { "epoch": 0.16718266253869968, "grad_norm": 4.31753545156476, "learning_rate": 4.179361179361179e-06, "loss": 1.559, "step": 5670 }, { "epoch": 0.16733008993070914, "grad_norm": 4.539404605082137, "learning_rate": 4.183046683046683e-06, "loss": 1.5447, "step": 5675 }, { "epoch": 0.16747751732271857, "grad_norm": 4.265717353459123, "learning_rate": 4.186732186732187e-06, "loss": 1.5343, "step": 5680 }, { "epoch": 0.167624944714728, "grad_norm": 4.375702554108104, "learning_rate": 4.190417690417691e-06, "loss": 1.515, "step": 5685 }, { "epoch": 0.16777237210673743, "grad_norm": 4.647067252711078, "learning_rate": 4.194103194103195e-06, "loss": 1.5274, "step": 5690 }, { "epoch": 0.16791979949874686, "grad_norm": 4.455686156778295, "learning_rate": 4.197788697788698e-06, "loss": 1.5272, "step": 5695 }, { "epoch": 0.16806722689075632, "grad_norm": 4.189368734684065, "learning_rate": 4.2014742014742015e-06, "loss": 1.4857, "step": 5700 }, { "epoch": 0.16821465428276575, "grad_norm": 4.6747496561082125, "learning_rate": 4.205159705159705e-06, "loss": 1.4982, "step": 5705 }, { "epoch": 0.16836208167477518, "grad_norm": 4.256083124635998, "learning_rate": 4.208845208845209e-06, "loss": 1.5256, "step": 5710 }, { "epoch": 0.1685095090667846, "grad_norm": 4.325011870788555, "learning_rate": 4.212530712530713e-06, "loss": 1.5289, "step": 5715 }, { "epoch": 0.16865693645879404, "grad_norm": 4.394164973686788, "learning_rate": 4.216216216216217e-06, "loss": 1.5545, "step": 5720 }, { "epoch": 0.16880436385080347, "grad_norm": 4.453858047598077, "learning_rate": 4.21990171990172e-06, "loss": 1.5498, "step": 5725 }, { "epoch": 0.16895179124281293, "grad_norm": 4.3374203921023415, "learning_rate": 4.223587223587223e-06, "loss": 1.5847, "step": 5730 }, { "epoch": 0.16909921863482236, "grad_norm": 4.463769516126149, "learning_rate": 4.227272727272727e-06, "loss": 1.5488, "step": 5735 }, { "epoch": 0.1692466460268318, "grad_norm": 4.498491332576486, "learning_rate": 4.230958230958231e-06, "loss": 1.5398, "step": 5740 }, { "epoch": 0.16939407341884122, "grad_norm": 4.60876114015473, "learning_rate": 4.2346437346437346e-06, "loss": 1.5182, "step": 5745 }, { "epoch": 0.16954150081085065, "grad_norm": 4.759164040555741, "learning_rate": 4.2383292383292384e-06, "loss": 1.5432, "step": 5750 }, { "epoch": 0.16968892820286008, "grad_norm": 4.613158986267842, "learning_rate": 4.242014742014742e-06, "loss": 1.5416, "step": 5755 }, { "epoch": 0.16983635559486954, "grad_norm": 4.348411061930745, "learning_rate": 4.245700245700246e-06, "loss": 1.5012, "step": 5760 }, { "epoch": 0.16998378298687897, "grad_norm": 4.254099929076426, "learning_rate": 4.249385749385749e-06, "loss": 1.5312, "step": 5765 }, { "epoch": 0.1701312103788884, "grad_norm": 4.2092402373762345, "learning_rate": 4.253071253071253e-06, "loss": 1.5406, "step": 5770 }, { "epoch": 0.17027863777089783, "grad_norm": 4.829375182371872, "learning_rate": 4.256756756756757e-06, "loss": 1.5685, "step": 5775 }, { "epoch": 0.17042606516290726, "grad_norm": 4.368095410056806, "learning_rate": 4.260442260442261e-06, "loss": 1.5482, "step": 5780 }, { "epoch": 0.17057349255491672, "grad_norm": 4.242233472644191, "learning_rate": 4.264127764127765e-06, "loss": 1.5085, "step": 5785 }, { "epoch": 0.17072091994692615, "grad_norm": 4.408684931411843, "learning_rate": 4.2678132678132685e-06, "loss": 1.5753, "step": 5790 }, { "epoch": 0.17086834733893558, "grad_norm": 4.551561316456305, "learning_rate": 4.2714987714987715e-06, "loss": 1.5368, "step": 5795 }, { "epoch": 0.171015774730945, "grad_norm": 4.191679100969944, "learning_rate": 4.275184275184275e-06, "loss": 1.4797, "step": 5800 }, { "epoch": 0.17116320212295444, "grad_norm": 4.422287494236393, "learning_rate": 4.278869778869778e-06, "loss": 1.5622, "step": 5805 }, { "epoch": 0.17131062951496387, "grad_norm": 4.522426776383429, "learning_rate": 4.282555282555282e-06, "loss": 1.5605, "step": 5810 }, { "epoch": 0.17145805690697333, "grad_norm": 4.257051601604477, "learning_rate": 4.286240786240786e-06, "loss": 1.517, "step": 5815 }, { "epoch": 0.17160548429898276, "grad_norm": 4.170552058886427, "learning_rate": 4.28992628992629e-06, "loss": 1.4729, "step": 5820 }, { "epoch": 0.1717529116909922, "grad_norm": 4.111481349550243, "learning_rate": 4.293611793611794e-06, "loss": 1.5462, "step": 5825 }, { "epoch": 0.17190033908300162, "grad_norm": 4.539511221181973, "learning_rate": 4.297297297297298e-06, "loss": 1.5086, "step": 5830 }, { "epoch": 0.17204776647501105, "grad_norm": 4.227800132903902, "learning_rate": 4.300982800982802e-06, "loss": 1.5088, "step": 5835 }, { "epoch": 0.17219519386702048, "grad_norm": 4.169381343871914, "learning_rate": 4.304668304668305e-06, "loss": 1.5292, "step": 5840 }, { "epoch": 0.17234262125902994, "grad_norm": 4.45829539265741, "learning_rate": 4.3083538083538085e-06, "loss": 1.5086, "step": 5845 }, { "epoch": 0.17249004865103937, "grad_norm": 4.584971479933393, "learning_rate": 4.312039312039312e-06, "loss": 1.4955, "step": 5850 }, { "epoch": 0.1726374760430488, "grad_norm": 4.454696166166125, "learning_rate": 4.315724815724816e-06, "loss": 1.5339, "step": 5855 }, { "epoch": 0.17278490343505823, "grad_norm": 4.527571706215926, "learning_rate": 4.319410319410319e-06, "loss": 1.5245, "step": 5860 }, { "epoch": 0.17293233082706766, "grad_norm": 5.00469539630663, "learning_rate": 4.323095823095823e-06, "loss": 1.5258, "step": 5865 }, { "epoch": 0.17307975821907712, "grad_norm": 4.248637726175427, "learning_rate": 4.326781326781327e-06, "loss": 1.5356, "step": 5870 }, { "epoch": 0.17322718561108655, "grad_norm": 4.4315460662812045, "learning_rate": 4.33046683046683e-06, "loss": 1.4967, "step": 5875 }, { "epoch": 0.17337461300309598, "grad_norm": 4.478618390611372, "learning_rate": 4.334152334152334e-06, "loss": 1.5552, "step": 5880 }, { "epoch": 0.1735220403951054, "grad_norm": 4.463830646291961, "learning_rate": 4.337837837837838e-06, "loss": 1.518, "step": 5885 }, { "epoch": 0.17366946778711484, "grad_norm": 4.0154579638711825, "learning_rate": 4.3415233415233416e-06, "loss": 1.5282, "step": 5890 }, { "epoch": 0.17381689517912427, "grad_norm": 4.607747967542708, "learning_rate": 4.3452088452088454e-06, "loss": 1.5607, "step": 5895 }, { "epoch": 0.17396432257113373, "grad_norm": 4.301204818227144, "learning_rate": 4.348894348894349e-06, "loss": 1.5105, "step": 5900 }, { "epoch": 0.17411174996314316, "grad_norm": 4.323228208939006, "learning_rate": 4.352579852579853e-06, "loss": 1.489, "step": 5905 }, { "epoch": 0.1742591773551526, "grad_norm": 4.393549924218712, "learning_rate": 4.356265356265357e-06, "loss": 1.5526, "step": 5910 }, { "epoch": 0.17440660474716202, "grad_norm": 4.328145442121455, "learning_rate": 4.35995085995086e-06, "loss": 1.5617, "step": 5915 }, { "epoch": 0.17455403213917145, "grad_norm": 4.618844872011938, "learning_rate": 4.363636363636364e-06, "loss": 1.5523, "step": 5920 }, { "epoch": 0.17470145953118088, "grad_norm": 4.937046334204323, "learning_rate": 4.367321867321867e-06, "loss": 1.5022, "step": 5925 }, { "epoch": 0.17484888692319034, "grad_norm": 4.329299201630363, "learning_rate": 4.371007371007371e-06, "loss": 1.5632, "step": 5930 }, { "epoch": 0.17499631431519977, "grad_norm": 4.369794423303023, "learning_rate": 4.374692874692875e-06, "loss": 1.4975, "step": 5935 }, { "epoch": 0.1751437417072092, "grad_norm": 4.373959285627452, "learning_rate": 4.3783783783783785e-06, "loss": 1.575, "step": 5940 }, { "epoch": 0.17529116909921863, "grad_norm": 4.287966887145537, "learning_rate": 4.382063882063882e-06, "loss": 1.5108, "step": 5945 }, { "epoch": 0.17543859649122806, "grad_norm": 4.615246307472135, "learning_rate": 4.385749385749385e-06, "loss": 1.478, "step": 5950 }, { "epoch": 0.17558602388323752, "grad_norm": 4.34394416046245, "learning_rate": 4.389434889434889e-06, "loss": 1.5262, "step": 5955 }, { "epoch": 0.17573345127524695, "grad_norm": 4.333182586673723, "learning_rate": 4.393120393120393e-06, "loss": 1.5648, "step": 5960 }, { "epoch": 0.17588087866725638, "grad_norm": 4.002470878222204, "learning_rate": 4.396805896805897e-06, "loss": 1.5172, "step": 5965 }, { "epoch": 0.1760283060592658, "grad_norm": 4.4180748196888535, "learning_rate": 4.400491400491401e-06, "loss": 1.5175, "step": 5970 }, { "epoch": 0.17617573345127524, "grad_norm": 4.296503442027887, "learning_rate": 4.404176904176905e-06, "loss": 1.5451, "step": 5975 }, { "epoch": 0.17632316084328467, "grad_norm": 4.050803096496357, "learning_rate": 4.407862407862409e-06, "loss": 1.526, "step": 5980 }, { "epoch": 0.17647058823529413, "grad_norm": 4.312640507056593, "learning_rate": 4.411547911547912e-06, "loss": 1.5854, "step": 5985 }, { "epoch": 0.17661801562730356, "grad_norm": 4.235045466111791, "learning_rate": 4.4152334152334155e-06, "loss": 1.541, "step": 5990 }, { "epoch": 0.176765443019313, "grad_norm": 4.428403762732608, "learning_rate": 4.4189189189189185e-06, "loss": 1.4805, "step": 5995 }, { "epoch": 0.17691287041132242, "grad_norm": 4.235490503644078, "learning_rate": 4.422604422604422e-06, "loss": 1.4893, "step": 6000 }, { "epoch": 0.17691287041132242, "eval_loss": 1.257535457611084, "eval_runtime": 4.3099, "eval_samples_per_second": 91.882, "eval_steps_per_second": 3.016, "step": 6000 }, { "epoch": 0.17706029780333185, "grad_norm": 4.04963156490117, "learning_rate": 4.426289926289926e-06, "loss": 1.5198, "step": 6005 }, { "epoch": 0.17720772519534128, "grad_norm": 4.585835081082225, "learning_rate": 4.42997542997543e-06, "loss": 1.5456, "step": 6010 }, { "epoch": 0.17735515258735074, "grad_norm": 4.396070666747274, "learning_rate": 4.433660933660934e-06, "loss": 1.5454, "step": 6015 }, { "epoch": 0.17750257997936017, "grad_norm": 4.380034269809513, "learning_rate": 4.437346437346438e-06, "loss": 1.5073, "step": 6020 }, { "epoch": 0.1776500073713696, "grad_norm": 4.190054797348023, "learning_rate": 4.441031941031941e-06, "loss": 1.5545, "step": 6025 }, { "epoch": 0.17779743476337903, "grad_norm": 4.100076681532694, "learning_rate": 4.444717444717445e-06, "loss": 1.5138, "step": 6030 }, { "epoch": 0.17794486215538846, "grad_norm": 4.203462981338546, "learning_rate": 4.4484029484029486e-06, "loss": 1.5243, "step": 6035 }, { "epoch": 0.17809228954739792, "grad_norm": 4.453613816218949, "learning_rate": 4.452088452088452e-06, "loss": 1.5919, "step": 6040 }, { "epoch": 0.17823971693940735, "grad_norm": 4.595356890558187, "learning_rate": 4.455773955773956e-06, "loss": 1.5007, "step": 6045 }, { "epoch": 0.17838714433141678, "grad_norm": 4.2201518486119705, "learning_rate": 4.45945945945946e-06, "loss": 1.5171, "step": 6050 }, { "epoch": 0.1785345717234262, "grad_norm": 4.3065745238378845, "learning_rate": 4.463144963144964e-06, "loss": 1.5037, "step": 6055 }, { "epoch": 0.17868199911543564, "grad_norm": 3.869771187602747, "learning_rate": 4.466830466830466e-06, "loss": 1.472, "step": 6060 }, { "epoch": 0.17882942650744507, "grad_norm": 4.197383106512679, "learning_rate": 4.47051597051597e-06, "loss": 1.522, "step": 6065 }, { "epoch": 0.17897685389945453, "grad_norm": 4.242282267653934, "learning_rate": 4.474201474201474e-06, "loss": 1.5459, "step": 6070 }, { "epoch": 0.17912428129146396, "grad_norm": 4.078166539685521, "learning_rate": 4.477886977886978e-06, "loss": 1.5737, "step": 6075 }, { "epoch": 0.1792717086834734, "grad_norm": 4.101760998356492, "learning_rate": 4.481572481572482e-06, "loss": 1.4779, "step": 6080 }, { "epoch": 0.17941913607548282, "grad_norm": 4.387441743419072, "learning_rate": 4.4852579852579855e-06, "loss": 1.5145, "step": 6085 }, { "epoch": 0.17956656346749225, "grad_norm": 4.457728554436675, "learning_rate": 4.488943488943489e-06, "loss": 1.5366, "step": 6090 }, { "epoch": 0.17971399085950168, "grad_norm": 4.32329607160772, "learning_rate": 4.492628992628992e-06, "loss": 1.5205, "step": 6095 }, { "epoch": 0.17986141825151114, "grad_norm": 4.54591873805589, "learning_rate": 4.496314496314496e-06, "loss": 1.5359, "step": 6100 }, { "epoch": 0.18000884564352057, "grad_norm": 4.082937540362469, "learning_rate": 4.5e-06, "loss": 1.4987, "step": 6105 }, { "epoch": 0.18015627303553, "grad_norm": 4.2333018579622035, "learning_rate": 4.503685503685504e-06, "loss": 1.5258, "step": 6110 }, { "epoch": 0.18030370042753943, "grad_norm": 4.5259875360265065, "learning_rate": 4.507371007371008e-06, "loss": 1.5278, "step": 6115 }, { "epoch": 0.18045112781954886, "grad_norm": 4.179637606093999, "learning_rate": 4.511056511056512e-06, "loss": 1.548, "step": 6120 }, { "epoch": 0.18059855521155832, "grad_norm": 4.1907907574650505, "learning_rate": 4.514742014742015e-06, "loss": 1.5472, "step": 6125 }, { "epoch": 0.18074598260356775, "grad_norm": 4.425771682500092, "learning_rate": 4.518427518427519e-06, "loss": 1.5159, "step": 6130 }, { "epoch": 0.18089340999557718, "grad_norm": 4.359663047651177, "learning_rate": 4.522113022113022e-06, "loss": 1.5714, "step": 6135 }, { "epoch": 0.1810408373875866, "grad_norm": 4.5790687516862985, "learning_rate": 4.5257985257985255e-06, "loss": 1.5395, "step": 6140 }, { "epoch": 0.18118826477959604, "grad_norm": 4.7412319569398, "learning_rate": 4.529484029484029e-06, "loss": 1.5271, "step": 6145 }, { "epoch": 0.18133569217160547, "grad_norm": 4.39461085171289, "learning_rate": 4.533169533169533e-06, "loss": 1.5051, "step": 6150 }, { "epoch": 0.18148311956361493, "grad_norm": 4.498033347704751, "learning_rate": 4.536855036855037e-06, "loss": 1.5301, "step": 6155 }, { "epoch": 0.18163054695562436, "grad_norm": 4.242610846775795, "learning_rate": 4.540540540540541e-06, "loss": 1.5405, "step": 6160 }, { "epoch": 0.1817779743476338, "grad_norm": 4.146750781510859, "learning_rate": 4.544226044226045e-06, "loss": 1.5703, "step": 6165 }, { "epoch": 0.18192540173964322, "grad_norm": 4.1868341906808215, "learning_rate": 4.547911547911548e-06, "loss": 1.5589, "step": 6170 }, { "epoch": 0.18207282913165265, "grad_norm": 4.415045158911148, "learning_rate": 4.551597051597052e-06, "loss": 1.4676, "step": 6175 }, { "epoch": 0.18222025652366208, "grad_norm": 4.295479233347107, "learning_rate": 4.5552825552825555e-06, "loss": 1.5415, "step": 6180 }, { "epoch": 0.18236768391567154, "grad_norm": 5.1906165163832005, "learning_rate": 4.558968058968059e-06, "loss": 1.5445, "step": 6185 }, { "epoch": 0.18251511130768097, "grad_norm": 4.310032262154603, "learning_rate": 4.562653562653563e-06, "loss": 1.4914, "step": 6190 }, { "epoch": 0.1826625386996904, "grad_norm": 4.049607999964906, "learning_rate": 4.566339066339066e-06, "loss": 1.5185, "step": 6195 }, { "epoch": 0.18280996609169983, "grad_norm": 4.5021412637416445, "learning_rate": 4.57002457002457e-06, "loss": 1.5634, "step": 6200 }, { "epoch": 0.18295739348370926, "grad_norm": 4.506937100851079, "learning_rate": 4.573710073710073e-06, "loss": 1.4726, "step": 6205 }, { "epoch": 0.18310482087571872, "grad_norm": 4.272066566990574, "learning_rate": 4.577395577395577e-06, "loss": 1.4656, "step": 6210 }, { "epoch": 0.18325224826772815, "grad_norm": 4.72457799494417, "learning_rate": 4.581081081081081e-06, "loss": 1.5212, "step": 6215 }, { "epoch": 0.18339967565973758, "grad_norm": 4.278257462769268, "learning_rate": 4.584766584766585e-06, "loss": 1.543, "step": 6220 }, { "epoch": 0.183547103051747, "grad_norm": 4.553209825845942, "learning_rate": 4.588452088452089e-06, "loss": 1.572, "step": 6225 }, { "epoch": 0.18369453044375644, "grad_norm": 4.514469597307542, "learning_rate": 4.5921375921375925e-06, "loss": 1.5089, "step": 6230 }, { "epoch": 0.18384195783576587, "grad_norm": 4.342624317164546, "learning_rate": 4.595823095823096e-06, "loss": 1.4947, "step": 6235 }, { "epoch": 0.18398938522777533, "grad_norm": 4.767210006973199, "learning_rate": 4.5995085995086e-06, "loss": 1.556, "step": 6240 }, { "epoch": 0.18413681261978476, "grad_norm": 4.648160834838052, "learning_rate": 4.603194103194103e-06, "loss": 1.5596, "step": 6245 }, { "epoch": 0.1842842400117942, "grad_norm": 4.535861655470595, "learning_rate": 4.606879606879607e-06, "loss": 1.5522, "step": 6250 }, { "epoch": 0.18443166740380362, "grad_norm": 4.387647919189034, "learning_rate": 4.610565110565111e-06, "loss": 1.5261, "step": 6255 }, { "epoch": 0.18457909479581305, "grad_norm": 4.1691218693242735, "learning_rate": 4.614250614250614e-06, "loss": 1.5313, "step": 6260 }, { "epoch": 0.18472652218782248, "grad_norm": 4.293484300855273, "learning_rate": 4.617936117936118e-06, "loss": 1.5093, "step": 6265 }, { "epoch": 0.18487394957983194, "grad_norm": 4.416143524459441, "learning_rate": 4.621621621621622e-06, "loss": 1.5236, "step": 6270 }, { "epoch": 0.18502137697184137, "grad_norm": 4.4400135743981055, "learning_rate": 4.625307125307126e-06, "loss": 1.5348, "step": 6275 }, { "epoch": 0.1851688043638508, "grad_norm": 4.060704734914859, "learning_rate": 4.628992628992629e-06, "loss": 1.5357, "step": 6280 }, { "epoch": 0.18531623175586023, "grad_norm": 4.41305209483714, "learning_rate": 4.6326781326781325e-06, "loss": 1.531, "step": 6285 }, { "epoch": 0.18546365914786966, "grad_norm": 4.492856297505531, "learning_rate": 4.636363636363636e-06, "loss": 1.5528, "step": 6290 }, { "epoch": 0.18561108653987912, "grad_norm": 4.849174971125233, "learning_rate": 4.64004914004914e-06, "loss": 1.5099, "step": 6295 }, { "epoch": 0.18575851393188855, "grad_norm": 3.9514252513410413, "learning_rate": 4.643734643734644e-06, "loss": 1.4922, "step": 6300 }, { "epoch": 0.18590594132389798, "grad_norm": 4.389701489198237, "learning_rate": 4.647420147420148e-06, "loss": 1.4912, "step": 6305 }, { "epoch": 0.1860533687159074, "grad_norm": 4.3052593135785315, "learning_rate": 4.651105651105652e-06, "loss": 1.5817, "step": 6310 }, { "epoch": 0.18620079610791684, "grad_norm": 4.345764454399641, "learning_rate": 4.654791154791155e-06, "loss": 1.5445, "step": 6315 }, { "epoch": 0.18634822349992627, "grad_norm": 4.296464411085172, "learning_rate": 4.658476658476659e-06, "loss": 1.5437, "step": 6320 }, { "epoch": 0.18649565089193573, "grad_norm": 3.894293652297619, "learning_rate": 4.6621621621621625e-06, "loss": 1.5416, "step": 6325 }, { "epoch": 0.18664307828394516, "grad_norm": 4.516611022859687, "learning_rate": 4.6658476658476655e-06, "loss": 1.537, "step": 6330 }, { "epoch": 0.1867905056759546, "grad_norm": 4.1497387625566935, "learning_rate": 4.669533169533169e-06, "loss": 1.523, "step": 6335 }, { "epoch": 0.18693793306796402, "grad_norm": 4.5325748149545255, "learning_rate": 4.673218673218673e-06, "loss": 1.508, "step": 6340 }, { "epoch": 0.18708536045997345, "grad_norm": 5.6381171057480834, "learning_rate": 4.676904176904177e-06, "loss": 1.5296, "step": 6345 }, { "epoch": 0.18723278785198288, "grad_norm": 4.291473066546091, "learning_rate": 4.680589680589681e-06, "loss": 1.5382, "step": 6350 }, { "epoch": 0.18738021524399234, "grad_norm": 3.987689922058338, "learning_rate": 4.684275184275184e-06, "loss": 1.5169, "step": 6355 }, { "epoch": 0.18752764263600177, "grad_norm": 3.9832669315200357, "learning_rate": 4.687960687960688e-06, "loss": 1.5352, "step": 6360 }, { "epoch": 0.1876750700280112, "grad_norm": 3.9029974956382456, "learning_rate": 4.691646191646192e-06, "loss": 1.5155, "step": 6365 }, { "epoch": 0.18782249742002063, "grad_norm": 4.095377255846397, "learning_rate": 4.695331695331696e-06, "loss": 1.5317, "step": 6370 }, { "epoch": 0.18796992481203006, "grad_norm": 4.1243583420308205, "learning_rate": 4.6990171990171995e-06, "loss": 1.5799, "step": 6375 }, { "epoch": 0.18811735220403952, "grad_norm": 4.235564765103723, "learning_rate": 4.702702702702703e-06, "loss": 1.5308, "step": 6380 }, { "epoch": 0.18826477959604895, "grad_norm": 4.370656493812406, "learning_rate": 4.706388206388207e-06, "loss": 1.5574, "step": 6385 }, { "epoch": 0.18841220698805838, "grad_norm": 4.348085508055727, "learning_rate": 4.71007371007371e-06, "loss": 1.4324, "step": 6390 }, { "epoch": 0.18855963438006781, "grad_norm": 4.216060877518067, "learning_rate": 4.713759213759213e-06, "loss": 1.5397, "step": 6395 }, { "epoch": 0.18870706177207724, "grad_norm": 4.300239893947884, "learning_rate": 4.717444717444717e-06, "loss": 1.5453, "step": 6400 }, { "epoch": 0.18885448916408668, "grad_norm": 4.14895167621462, "learning_rate": 4.721130221130221e-06, "loss": 1.4665, "step": 6405 }, { "epoch": 0.18900191655609613, "grad_norm": 4.107495727885251, "learning_rate": 4.724815724815725e-06, "loss": 1.5321, "step": 6410 }, { "epoch": 0.18914934394810556, "grad_norm": 4.205153775103616, "learning_rate": 4.728501228501229e-06, "loss": 1.5313, "step": 6415 }, { "epoch": 0.189296771340115, "grad_norm": 4.019984860401772, "learning_rate": 4.7321867321867326e-06, "loss": 1.5663, "step": 6420 }, { "epoch": 0.18944419873212442, "grad_norm": 4.252354155054782, "learning_rate": 4.735872235872236e-06, "loss": 1.5274, "step": 6425 }, { "epoch": 0.18959162612413386, "grad_norm": 4.304602411263594, "learning_rate": 4.7395577395577395e-06, "loss": 1.5516, "step": 6430 }, { "epoch": 0.18973905351614329, "grad_norm": 3.8811657791958685, "learning_rate": 4.743243243243243e-06, "loss": 1.5343, "step": 6435 }, { "epoch": 0.18988648090815274, "grad_norm": 4.3740918554725265, "learning_rate": 4.746928746928747e-06, "loss": 1.5662, "step": 6440 }, { "epoch": 0.19003390830016217, "grad_norm": 4.308147342873083, "learning_rate": 4.750614250614251e-06, "loss": 1.524, "step": 6445 }, { "epoch": 0.1901813356921716, "grad_norm": 4.340013001521231, "learning_rate": 4.754299754299755e-06, "loss": 1.5319, "step": 6450 }, { "epoch": 0.19032876308418104, "grad_norm": 4.094081425054821, "learning_rate": 4.757985257985259e-06, "loss": 1.5444, "step": 6455 }, { "epoch": 0.19047619047619047, "grad_norm": 4.542498011120124, "learning_rate": 4.761670761670762e-06, "loss": 1.5042, "step": 6460 }, { "epoch": 0.19062361786819992, "grad_norm": 3.99114612334549, "learning_rate": 4.765356265356265e-06, "loss": 1.4802, "step": 6465 }, { "epoch": 0.19077104526020935, "grad_norm": 3.978022470406204, "learning_rate": 4.769041769041769e-06, "loss": 1.5137, "step": 6470 }, { "epoch": 0.19091847265221878, "grad_norm": 4.3953409829002705, "learning_rate": 4.7727272727272725e-06, "loss": 1.5333, "step": 6475 }, { "epoch": 0.19106590004422822, "grad_norm": 4.215275864394401, "learning_rate": 4.776412776412776e-06, "loss": 1.4759, "step": 6480 }, { "epoch": 0.19121332743623765, "grad_norm": 4.467100876721119, "learning_rate": 4.78009828009828e-06, "loss": 1.5428, "step": 6485 }, { "epoch": 0.19136075482824708, "grad_norm": 4.21689529536466, "learning_rate": 4.783783783783784e-06, "loss": 1.5547, "step": 6490 }, { "epoch": 0.19150818222025653, "grad_norm": 4.373742656503817, "learning_rate": 4.787469287469288e-06, "loss": 1.48, "step": 6495 }, { "epoch": 0.19165560961226596, "grad_norm": 4.117139264062731, "learning_rate": 4.791154791154791e-06, "loss": 1.5136, "step": 6500 }, { "epoch": 0.19165560961226596, "eval_loss": 1.255479097366333, "eval_runtime": 4.1659, "eval_samples_per_second": 95.058, "eval_steps_per_second": 3.121, "step": 6500 }, { "epoch": 0.1918030370042754, "grad_norm": 4.151075392366929, "learning_rate": 4.794840294840295e-06, "loss": 1.4922, "step": 6505 }, { "epoch": 0.19195046439628483, "grad_norm": 4.351879501212037, "learning_rate": 4.798525798525799e-06, "loss": 1.5738, "step": 6510 }, { "epoch": 0.19209789178829426, "grad_norm": 4.159194806843944, "learning_rate": 4.802211302211303e-06, "loss": 1.538, "step": 6515 }, { "epoch": 0.19224531918030371, "grad_norm": 4.408300433232171, "learning_rate": 4.8058968058968065e-06, "loss": 1.5398, "step": 6520 }, { "epoch": 0.19239274657231314, "grad_norm": 4.222394382247749, "learning_rate": 4.80958230958231e-06, "loss": 1.5811, "step": 6525 }, { "epoch": 0.19254017396432258, "grad_norm": 4.353018682000152, "learning_rate": 4.813267813267813e-06, "loss": 1.5272, "step": 6530 }, { "epoch": 0.192687601356332, "grad_norm": 4.368320984546859, "learning_rate": 4.816953316953316e-06, "loss": 1.5555, "step": 6535 }, { "epoch": 0.19283502874834144, "grad_norm": 4.098141672506175, "learning_rate": 4.82063882063882e-06, "loss": 1.5603, "step": 6540 }, { "epoch": 0.19298245614035087, "grad_norm": 4.13917741790913, "learning_rate": 4.824324324324324e-06, "loss": 1.5033, "step": 6545 }, { "epoch": 0.19312988353236032, "grad_norm": 4.223846623128955, "learning_rate": 4.828009828009828e-06, "loss": 1.5368, "step": 6550 }, { "epoch": 0.19327731092436976, "grad_norm": 4.275716613549743, "learning_rate": 4.831695331695332e-06, "loss": 1.5283, "step": 6555 }, { "epoch": 0.19342473831637919, "grad_norm": 4.224274587143926, "learning_rate": 4.835380835380836e-06, "loss": 1.5635, "step": 6560 }, { "epoch": 0.19357216570838862, "grad_norm": 4.128975274314031, "learning_rate": 4.8390663390663396e-06, "loss": 1.5192, "step": 6565 }, { "epoch": 0.19371959310039805, "grad_norm": 4.577259413400133, "learning_rate": 4.842751842751843e-06, "loss": 1.5641, "step": 6570 }, { "epoch": 0.19386702049240748, "grad_norm": 4.163095024346347, "learning_rate": 4.8464373464373464e-06, "loss": 1.5664, "step": 6575 }, { "epoch": 0.19401444788441694, "grad_norm": 4.283112133147661, "learning_rate": 4.85012285012285e-06, "loss": 1.5517, "step": 6580 }, { "epoch": 0.19416187527642637, "grad_norm": 3.9767177620188887, "learning_rate": 4.853808353808354e-06, "loss": 1.5175, "step": 6585 }, { "epoch": 0.1943093026684358, "grad_norm": 4.402713667852659, "learning_rate": 4.857493857493858e-06, "loss": 1.5475, "step": 6590 }, { "epoch": 0.19445673006044523, "grad_norm": 4.309634273638352, "learning_rate": 4.861179361179361e-06, "loss": 1.5102, "step": 6595 }, { "epoch": 0.19460415745245466, "grad_norm": 4.306945794969365, "learning_rate": 4.864864864864865e-06, "loss": 1.5255, "step": 6600 }, { "epoch": 0.19475158484446412, "grad_norm": 4.172445828256005, "learning_rate": 4.868550368550369e-06, "loss": 1.5702, "step": 6605 }, { "epoch": 0.19489901223647355, "grad_norm": 3.825270427979099, "learning_rate": 4.872235872235872e-06, "loss": 1.5749, "step": 6610 }, { "epoch": 0.19504643962848298, "grad_norm": 4.200219080351988, "learning_rate": 4.875921375921376e-06, "loss": 1.5063, "step": 6615 }, { "epoch": 0.1951938670204924, "grad_norm": 3.9063107049536416, "learning_rate": 4.8796068796068795e-06, "loss": 1.5531, "step": 6620 }, { "epoch": 0.19534129441250184, "grad_norm": 4.286993907816878, "learning_rate": 4.883292383292383e-06, "loss": 1.5633, "step": 6625 }, { "epoch": 0.19548872180451127, "grad_norm": 4.013502629322435, "learning_rate": 4.886977886977887e-06, "loss": 1.5424, "step": 6630 }, { "epoch": 0.19563614919652073, "grad_norm": 4.277810178392941, "learning_rate": 4.890663390663391e-06, "loss": 1.5741, "step": 6635 }, { "epoch": 0.19578357658853016, "grad_norm": 4.453228753980048, "learning_rate": 4.894348894348895e-06, "loss": 1.5103, "step": 6640 }, { "epoch": 0.1959310039805396, "grad_norm": 4.394467531978316, "learning_rate": 4.898034398034398e-06, "loss": 1.5202, "step": 6645 }, { "epoch": 0.19607843137254902, "grad_norm": 4.669545637115777, "learning_rate": 4.901719901719902e-06, "loss": 1.5477, "step": 6650 }, { "epoch": 0.19622585876455845, "grad_norm": 3.9681447388199875, "learning_rate": 4.905405405405406e-06, "loss": 1.5184, "step": 6655 }, { "epoch": 0.19637328615656788, "grad_norm": 4.172542490982599, "learning_rate": 4.90909090909091e-06, "loss": 1.5279, "step": 6660 }, { "epoch": 0.19652071354857734, "grad_norm": 4.184417616728185, "learning_rate": 4.912776412776413e-06, "loss": 1.5133, "step": 6665 }, { "epoch": 0.19666814094058677, "grad_norm": 4.2834548065838325, "learning_rate": 4.9164619164619165e-06, "loss": 1.5354, "step": 6670 }, { "epoch": 0.1968155683325962, "grad_norm": 4.063381653738373, "learning_rate": 4.92014742014742e-06, "loss": 1.53, "step": 6675 }, { "epoch": 0.19696299572460563, "grad_norm": 4.049708890451092, "learning_rate": 4.923832923832923e-06, "loss": 1.5044, "step": 6680 }, { "epoch": 0.19711042311661506, "grad_norm": 4.342887610324372, "learning_rate": 4.927518427518427e-06, "loss": 1.5113, "step": 6685 }, { "epoch": 0.19725785050862452, "grad_norm": 4.769858861548181, "learning_rate": 4.931203931203931e-06, "loss": 1.5457, "step": 6690 }, { "epoch": 0.19740527790063395, "grad_norm": 4.3303362935738985, "learning_rate": 4.934889434889435e-06, "loss": 1.524, "step": 6695 }, { "epoch": 0.19755270529264338, "grad_norm": 4.179968914252294, "learning_rate": 4.938574938574939e-06, "loss": 1.5348, "step": 6700 }, { "epoch": 0.1977001326846528, "grad_norm": 4.247736862504495, "learning_rate": 4.942260442260443e-06, "loss": 1.5127, "step": 6705 }, { "epoch": 0.19784756007666224, "grad_norm": 4.083485133755814, "learning_rate": 4.9459459459459466e-06, "loss": 1.5455, "step": 6710 }, { "epoch": 0.19799498746867167, "grad_norm": 4.402882046782719, "learning_rate": 4.94963144963145e-06, "loss": 1.5509, "step": 6715 }, { "epoch": 0.19814241486068113, "grad_norm": 4.289237190190962, "learning_rate": 4.9533169533169534e-06, "loss": 1.5371, "step": 6720 }, { "epoch": 0.19828984225269056, "grad_norm": 3.91163070747874, "learning_rate": 4.957002457002457e-06, "loss": 1.5217, "step": 6725 }, { "epoch": 0.1984372696447, "grad_norm": 4.180089842098656, "learning_rate": 4.96068796068796e-06, "loss": 1.526, "step": 6730 }, { "epoch": 0.19858469703670942, "grad_norm": 4.745838163613975, "learning_rate": 4.964373464373464e-06, "loss": 1.5558, "step": 6735 }, { "epoch": 0.19873212442871885, "grad_norm": 4.282545956963796, "learning_rate": 4.968058968058968e-06, "loss": 1.5683, "step": 6740 }, { "epoch": 0.19887955182072828, "grad_norm": 4.2439633069791105, "learning_rate": 4.971744471744472e-06, "loss": 1.5349, "step": 6745 }, { "epoch": 0.19902697921273774, "grad_norm": 4.280544446223665, "learning_rate": 4.975429975429976e-06, "loss": 1.5734, "step": 6750 }, { "epoch": 0.19917440660474717, "grad_norm": 4.313506927792215, "learning_rate": 4.979115479115479e-06, "loss": 1.5124, "step": 6755 }, { "epoch": 0.1993218339967566, "grad_norm": 4.178708897716497, "learning_rate": 4.982800982800983e-06, "loss": 1.4759, "step": 6760 }, { "epoch": 0.19946926138876603, "grad_norm": 4.159133860308691, "learning_rate": 4.9864864864864865e-06, "loss": 1.5248, "step": 6765 }, { "epoch": 0.19961668878077546, "grad_norm": 4.293860068580252, "learning_rate": 4.99017199017199e-06, "loss": 1.4878, "step": 6770 }, { "epoch": 0.19976411617278492, "grad_norm": 4.065981865955815, "learning_rate": 4.993857493857494e-06, "loss": 1.5037, "step": 6775 }, { "epoch": 0.19991154356479435, "grad_norm": 3.960745581960171, "learning_rate": 4.997542997542998e-06, "loss": 1.5201, "step": 6780 }, { "epoch": 0.20005897095680378, "grad_norm": 4.575977873588778, "learning_rate": 5.001228501228502e-06, "loss": 1.5381, "step": 6785 }, { "epoch": 0.2002063983488132, "grad_norm": 4.17184957815167, "learning_rate": 5.004914004914005e-06, "loss": 1.5247, "step": 6790 }, { "epoch": 0.20035382574082264, "grad_norm": 4.21944402034713, "learning_rate": 5.008599508599508e-06, "loss": 1.5071, "step": 6795 }, { "epoch": 0.20050125313283207, "grad_norm": 4.662185588748063, "learning_rate": 5.012285012285012e-06, "loss": 1.5677, "step": 6800 }, { "epoch": 0.20064868052484153, "grad_norm": 4.161908198486526, "learning_rate": 5.015970515970516e-06, "loss": 1.4973, "step": 6805 }, { "epoch": 0.20079610791685096, "grad_norm": 4.510123613791742, "learning_rate": 5.01965601965602e-06, "loss": 1.5871, "step": 6810 }, { "epoch": 0.2009435353088604, "grad_norm": 4.267766881317534, "learning_rate": 5.0233415233415235e-06, "loss": 1.5686, "step": 6815 }, { "epoch": 0.20109096270086982, "grad_norm": 4.426149738318495, "learning_rate": 5.027027027027027e-06, "loss": 1.5795, "step": 6820 }, { "epoch": 0.20123839009287925, "grad_norm": 4.078043070665065, "learning_rate": 5.030712530712531e-06, "loss": 1.5545, "step": 6825 }, { "epoch": 0.20138581748488868, "grad_norm": 4.139214318794003, "learning_rate": 5.034398034398034e-06, "loss": 1.4925, "step": 6830 }, { "epoch": 0.20153324487689814, "grad_norm": 4.652506203198286, "learning_rate": 5.038083538083538e-06, "loss": 1.5313, "step": 6835 }, { "epoch": 0.20168067226890757, "grad_norm": 4.449187257355482, "learning_rate": 5.041769041769042e-06, "loss": 1.5213, "step": 6840 }, { "epoch": 0.201828099660917, "grad_norm": 4.432151416730318, "learning_rate": 5.045454545454546e-06, "loss": 1.5312, "step": 6845 }, { "epoch": 0.20197552705292643, "grad_norm": 3.9069480932805485, "learning_rate": 5.04914004914005e-06, "loss": 1.5353, "step": 6850 }, { "epoch": 0.20212295444493586, "grad_norm": 4.220689503470978, "learning_rate": 5.0528255528255535e-06, "loss": 1.5281, "step": 6855 }, { "epoch": 0.20227038183694532, "grad_norm": 3.91064760131349, "learning_rate": 5.056511056511057e-06, "loss": 1.5231, "step": 6860 }, { "epoch": 0.20241780922895475, "grad_norm": 4.205159480786589, "learning_rate": 5.06019656019656e-06, "loss": 1.5068, "step": 6865 }, { "epoch": 0.20256523662096418, "grad_norm": 4.181613761822436, "learning_rate": 5.0638820638820634e-06, "loss": 1.5648, "step": 6870 }, { "epoch": 0.2027126640129736, "grad_norm": 4.306168220097488, "learning_rate": 5.067567567567567e-06, "loss": 1.5237, "step": 6875 }, { "epoch": 0.20286009140498304, "grad_norm": 3.9683714479592784, "learning_rate": 5.071253071253071e-06, "loss": 1.5253, "step": 6880 }, { "epoch": 0.20300751879699247, "grad_norm": 3.982575789755713, "learning_rate": 5.074938574938575e-06, "loss": 1.5123, "step": 6885 }, { "epoch": 0.20315494618900193, "grad_norm": 4.353313029256612, "learning_rate": 5.078624078624079e-06, "loss": 1.5064, "step": 6890 }, { "epoch": 0.20330237358101136, "grad_norm": 3.9651956914952753, "learning_rate": 5.082309582309583e-06, "loss": 1.4733, "step": 6895 }, { "epoch": 0.2034498009730208, "grad_norm": 4.068577030662976, "learning_rate": 5.085995085995086e-06, "loss": 1.5108, "step": 6900 }, { "epoch": 0.20359722836503022, "grad_norm": 4.0898453251211055, "learning_rate": 5.08968058968059e-06, "loss": 1.5139, "step": 6905 }, { "epoch": 0.20374465575703965, "grad_norm": 4.180981016883256, "learning_rate": 5.0933660933660935e-06, "loss": 1.556, "step": 6910 }, { "epoch": 0.20389208314904908, "grad_norm": 4.061436662531236, "learning_rate": 5.097051597051597e-06, "loss": 1.5369, "step": 6915 }, { "epoch": 0.20403951054105854, "grad_norm": 4.075194021417355, "learning_rate": 5.100737100737101e-06, "loss": 1.5441, "step": 6920 }, { "epoch": 0.20418693793306797, "grad_norm": 4.165867810716145, "learning_rate": 5.104422604422605e-06, "loss": 1.4965, "step": 6925 }, { "epoch": 0.2043343653250774, "grad_norm": 4.367970287445921, "learning_rate": 5.108108108108108e-06, "loss": 1.5146, "step": 6930 }, { "epoch": 0.20448179271708683, "grad_norm": 4.065806606222042, "learning_rate": 5.111793611793612e-06, "loss": 1.4962, "step": 6935 }, { "epoch": 0.20462922010909626, "grad_norm": 4.2788597093405665, "learning_rate": 5.115479115479115e-06, "loss": 1.4603, "step": 6940 }, { "epoch": 0.20477664750110572, "grad_norm": 4.4193759142674445, "learning_rate": 5.119164619164619e-06, "loss": 1.555, "step": 6945 }, { "epoch": 0.20492407489311515, "grad_norm": 4.183858624782273, "learning_rate": 5.122850122850123e-06, "loss": 1.562, "step": 6950 }, { "epoch": 0.20507150228512458, "grad_norm": 4.018352361047902, "learning_rate": 5.126535626535627e-06, "loss": 1.5542, "step": 6955 }, { "epoch": 0.205218929677134, "grad_norm": 4.288095856432363, "learning_rate": 5.1302211302211305e-06, "loss": 1.5178, "step": 6960 }, { "epoch": 0.20536635706914344, "grad_norm": 4.069668578828257, "learning_rate": 5.133906633906634e-06, "loss": 1.5322, "step": 6965 }, { "epoch": 0.20551378446115287, "grad_norm": 4.276640368457421, "learning_rate": 5.137592137592138e-06, "loss": 1.5355, "step": 6970 }, { "epoch": 0.20566121185316233, "grad_norm": 4.432962894275495, "learning_rate": 5.141277641277641e-06, "loss": 1.5019, "step": 6975 }, { "epoch": 0.20580863924517176, "grad_norm": 4.162919255193176, "learning_rate": 5.144963144963145e-06, "loss": 1.4907, "step": 6980 }, { "epoch": 0.2059560666371812, "grad_norm": 3.900680794183332, "learning_rate": 5.148648648648649e-06, "loss": 1.4893, "step": 6985 }, { "epoch": 0.20610349402919062, "grad_norm": 4.099443387702935, "learning_rate": 5.152334152334153e-06, "loss": 1.4831, "step": 6990 }, { "epoch": 0.20625092142120005, "grad_norm": 4.137875469982281, "learning_rate": 5.156019656019657e-06, "loss": 1.528, "step": 6995 }, { "epoch": 0.20639834881320948, "grad_norm": 4.069423427333595, "learning_rate": 5.15970515970516e-06, "loss": 1.5036, "step": 7000 }, { "epoch": 0.20639834881320948, "eval_loss": 1.2512558698654175, "eval_runtime": 4.2675, "eval_samples_per_second": 92.795, "eval_steps_per_second": 3.046, "step": 7000 }, { "epoch": 0.20654577620521894, "grad_norm": 4.851673368605097, "learning_rate": 5.1633906633906636e-06, "loss": 1.5534, "step": 7005 }, { "epoch": 0.20669320359722837, "grad_norm": 4.445843982613598, "learning_rate": 5.1670761670761666e-06, "loss": 1.5401, "step": 7010 }, { "epoch": 0.2068406309892378, "grad_norm": 4.066898089340695, "learning_rate": 5.1707616707616704e-06, "loss": 1.4689, "step": 7015 }, { "epoch": 0.20698805838124723, "grad_norm": 4.255554730958822, "learning_rate": 5.174447174447174e-06, "loss": 1.5514, "step": 7020 }, { "epoch": 0.20713548577325666, "grad_norm": 4.030988910597569, "learning_rate": 5.178132678132678e-06, "loss": 1.5057, "step": 7025 }, { "epoch": 0.20728291316526612, "grad_norm": 4.102150170437154, "learning_rate": 5.181818181818182e-06, "loss": 1.5147, "step": 7030 }, { "epoch": 0.20743034055727555, "grad_norm": 4.328756574047326, "learning_rate": 5.185503685503686e-06, "loss": 1.521, "step": 7035 }, { "epoch": 0.20757776794928498, "grad_norm": 4.124287072726458, "learning_rate": 5.18918918918919e-06, "loss": 1.5215, "step": 7040 }, { "epoch": 0.2077251953412944, "grad_norm": 4.434916128232345, "learning_rate": 5.192874692874694e-06, "loss": 1.5463, "step": 7045 }, { "epoch": 0.20787262273330384, "grad_norm": 4.109244737392048, "learning_rate": 5.196560196560197e-06, "loss": 1.5203, "step": 7050 }, { "epoch": 0.20802005012531327, "grad_norm": 4.861088412715481, "learning_rate": 5.2002457002457005e-06, "loss": 1.5338, "step": 7055 }, { "epoch": 0.20816747751732273, "grad_norm": 4.126205299876975, "learning_rate": 5.203931203931204e-06, "loss": 1.5148, "step": 7060 }, { "epoch": 0.20831490490933216, "grad_norm": 4.249470114284185, "learning_rate": 5.207616707616707e-06, "loss": 1.5269, "step": 7065 }, { "epoch": 0.2084623323013416, "grad_norm": 4.338485564257113, "learning_rate": 5.211302211302211e-06, "loss": 1.5919, "step": 7070 }, { "epoch": 0.20860975969335102, "grad_norm": 4.122527766986989, "learning_rate": 5.214987714987715e-06, "loss": 1.5169, "step": 7075 }, { "epoch": 0.20875718708536045, "grad_norm": 4.07068096834637, "learning_rate": 5.218673218673219e-06, "loss": 1.5225, "step": 7080 }, { "epoch": 0.20890461447736988, "grad_norm": 4.128978823765757, "learning_rate": 5.222358722358722e-06, "loss": 1.5657, "step": 7085 }, { "epoch": 0.20905204186937934, "grad_norm": 4.506523901726518, "learning_rate": 5.226044226044226e-06, "loss": 1.5146, "step": 7090 }, { "epoch": 0.20919946926138877, "grad_norm": 4.078410338674873, "learning_rate": 5.22972972972973e-06, "loss": 1.5479, "step": 7095 }, { "epoch": 0.2093468966533982, "grad_norm": 4.188924517477319, "learning_rate": 5.233415233415234e-06, "loss": 1.5513, "step": 7100 }, { "epoch": 0.20949432404540763, "grad_norm": 4.054042086588167, "learning_rate": 5.2371007371007375e-06, "loss": 1.494, "step": 7105 }, { "epoch": 0.20964175143741706, "grad_norm": 4.092198556611842, "learning_rate": 5.240786240786241e-06, "loss": 1.5542, "step": 7110 }, { "epoch": 0.20978917882942652, "grad_norm": 4.0846408331990345, "learning_rate": 5.244471744471745e-06, "loss": 1.4751, "step": 7115 }, { "epoch": 0.20993660622143595, "grad_norm": 4.091591874912921, "learning_rate": 5.248157248157248e-06, "loss": 1.5082, "step": 7120 }, { "epoch": 0.21008403361344538, "grad_norm": 3.969895677455106, "learning_rate": 5.251842751842752e-06, "loss": 1.5516, "step": 7125 }, { "epoch": 0.2102314610054548, "grad_norm": 3.854184625162002, "learning_rate": 5.255528255528255e-06, "loss": 1.5158, "step": 7130 }, { "epoch": 0.21037888839746424, "grad_norm": 4.249606879854289, "learning_rate": 5.259213759213759e-06, "loss": 1.5194, "step": 7135 }, { "epoch": 0.21052631578947367, "grad_norm": 4.256794357640564, "learning_rate": 5.262899262899263e-06, "loss": 1.5843, "step": 7140 }, { "epoch": 0.21067374318148313, "grad_norm": 3.8694606645490706, "learning_rate": 5.266584766584767e-06, "loss": 1.5298, "step": 7145 }, { "epoch": 0.21082117057349256, "grad_norm": 4.240128846734841, "learning_rate": 5.2702702702702705e-06, "loss": 1.5255, "step": 7150 }, { "epoch": 0.210968597965502, "grad_norm": 3.9555977840586776, "learning_rate": 5.273955773955774e-06, "loss": 1.5208, "step": 7155 }, { "epoch": 0.21111602535751142, "grad_norm": 4.166133036569056, "learning_rate": 5.277641277641277e-06, "loss": 1.5107, "step": 7160 }, { "epoch": 0.21126345274952085, "grad_norm": 3.9888629056790177, "learning_rate": 5.281326781326781e-06, "loss": 1.5239, "step": 7165 }, { "epoch": 0.21141088014153028, "grad_norm": 4.136077274144807, "learning_rate": 5.285012285012285e-06, "loss": 1.5316, "step": 7170 }, { "epoch": 0.21155830753353974, "grad_norm": 4.47722242519177, "learning_rate": 5.288697788697789e-06, "loss": 1.5521, "step": 7175 }, { "epoch": 0.21170573492554917, "grad_norm": 4.039440655852943, "learning_rate": 5.292383292383293e-06, "loss": 1.581, "step": 7180 }, { "epoch": 0.2118531623175586, "grad_norm": 4.634065988401195, "learning_rate": 5.296068796068797e-06, "loss": 1.5375, "step": 7185 }, { "epoch": 0.21200058970956803, "grad_norm": 4.164751166574238, "learning_rate": 5.299754299754301e-06, "loss": 1.5067, "step": 7190 }, { "epoch": 0.21214801710157746, "grad_norm": 4.192460716887706, "learning_rate": 5.303439803439804e-06, "loss": 1.5661, "step": 7195 }, { "epoch": 0.21229544449358692, "grad_norm": 4.065743017583822, "learning_rate": 5.307125307125307e-06, "loss": 1.5599, "step": 7200 }, { "epoch": 0.21244287188559635, "grad_norm": 4.158719561464079, "learning_rate": 5.3108108108108105e-06, "loss": 1.5644, "step": 7205 }, { "epoch": 0.21259029927760578, "grad_norm": 4.2889150740968045, "learning_rate": 5.314496314496314e-06, "loss": 1.5325, "step": 7210 }, { "epoch": 0.2127377266696152, "grad_norm": 4.188871228266476, "learning_rate": 5.318181818181818e-06, "loss": 1.472, "step": 7215 }, { "epoch": 0.21288515406162464, "grad_norm": 4.182151014955877, "learning_rate": 5.321867321867322e-06, "loss": 1.5389, "step": 7220 }, { "epoch": 0.21303258145363407, "grad_norm": 3.9811056094568342, "learning_rate": 5.325552825552826e-06, "loss": 1.4781, "step": 7225 }, { "epoch": 0.21318000884564353, "grad_norm": 4.298917660818243, "learning_rate": 5.329238329238329e-06, "loss": 1.5279, "step": 7230 }, { "epoch": 0.21332743623765296, "grad_norm": 4.030369355194052, "learning_rate": 5.332923832923833e-06, "loss": 1.5712, "step": 7235 }, { "epoch": 0.2134748636296624, "grad_norm": 4.161131886606753, "learning_rate": 5.336609336609337e-06, "loss": 1.5395, "step": 7240 }, { "epoch": 0.21362229102167182, "grad_norm": 3.9502347068011177, "learning_rate": 5.340294840294841e-06, "loss": 1.4951, "step": 7245 }, { "epoch": 0.21376971841368125, "grad_norm": 4.325839830440908, "learning_rate": 5.3439803439803444e-06, "loss": 1.5595, "step": 7250 }, { "epoch": 0.21391714580569068, "grad_norm": 4.0177390984607975, "learning_rate": 5.347665847665848e-06, "loss": 1.5576, "step": 7255 }, { "epoch": 0.21406457319770014, "grad_norm": 4.09812677552055, "learning_rate": 5.351351351351352e-06, "loss": 1.5046, "step": 7260 }, { "epoch": 0.21421200058970957, "grad_norm": 3.9385884552867005, "learning_rate": 5.355036855036855e-06, "loss": 1.5547, "step": 7265 }, { "epoch": 0.214359427981719, "grad_norm": 4.08945668551657, "learning_rate": 5.358722358722358e-06, "loss": 1.5476, "step": 7270 }, { "epoch": 0.21450685537372843, "grad_norm": 4.19622412599667, "learning_rate": 5.362407862407862e-06, "loss": 1.5953, "step": 7275 }, { "epoch": 0.21465428276573786, "grad_norm": 4.000336529416252, "learning_rate": 5.366093366093366e-06, "loss": 1.5254, "step": 7280 }, { "epoch": 0.21480171015774732, "grad_norm": 4.057178642752418, "learning_rate": 5.36977886977887e-06, "loss": 1.5619, "step": 7285 }, { "epoch": 0.21494913754975675, "grad_norm": 4.081039501603637, "learning_rate": 5.373464373464374e-06, "loss": 1.5189, "step": 7290 }, { "epoch": 0.21509656494176618, "grad_norm": 3.8967481587421418, "learning_rate": 5.3771498771498775e-06, "loss": 1.5001, "step": 7295 }, { "epoch": 0.2152439923337756, "grad_norm": 4.274190554381559, "learning_rate": 5.380835380835381e-06, "loss": 1.5154, "step": 7300 }, { "epoch": 0.21539141972578504, "grad_norm": 4.482921466384568, "learning_rate": 5.384520884520884e-06, "loss": 1.4903, "step": 7305 }, { "epoch": 0.21553884711779447, "grad_norm": 4.486655702619752, "learning_rate": 5.388206388206388e-06, "loss": 1.53, "step": 7310 }, { "epoch": 0.21568627450980393, "grad_norm": 3.9373719594430763, "learning_rate": 5.391891891891892e-06, "loss": 1.5284, "step": 7315 }, { "epoch": 0.21583370190181336, "grad_norm": 4.036322440002778, "learning_rate": 5.395577395577396e-06, "loss": 1.5572, "step": 7320 }, { "epoch": 0.2159811292938228, "grad_norm": 3.9460671243155057, "learning_rate": 5.3992628992629e-06, "loss": 1.5303, "step": 7325 }, { "epoch": 0.21612855668583222, "grad_norm": 4.628454500622671, "learning_rate": 5.402948402948403e-06, "loss": 1.5299, "step": 7330 }, { "epoch": 0.21627598407784165, "grad_norm": 4.048280070372348, "learning_rate": 5.406633906633907e-06, "loss": 1.5048, "step": 7335 }, { "epoch": 0.21642341146985108, "grad_norm": 4.23427570489701, "learning_rate": 5.41031941031941e-06, "loss": 1.5326, "step": 7340 }, { "epoch": 0.21657083886186054, "grad_norm": 4.328938261170158, "learning_rate": 5.414004914004914e-06, "loss": 1.5027, "step": 7345 }, { "epoch": 0.21671826625386997, "grad_norm": 4.055658036978166, "learning_rate": 5.4176904176904175e-06, "loss": 1.495, "step": 7350 }, { "epoch": 0.2168656936458794, "grad_norm": 4.0419051029420885, "learning_rate": 5.421375921375921e-06, "loss": 1.5188, "step": 7355 }, { "epoch": 0.21701312103788883, "grad_norm": 3.978815245678338, "learning_rate": 5.425061425061425e-06, "loss": 1.5061, "step": 7360 }, { "epoch": 0.21716054842989826, "grad_norm": 4.262074246751534, "learning_rate": 5.428746928746929e-06, "loss": 1.5322, "step": 7365 }, { "epoch": 0.21730797582190772, "grad_norm": 4.209469429858196, "learning_rate": 5.432432432432433e-06, "loss": 1.5414, "step": 7370 }, { "epoch": 0.21745540321391715, "grad_norm": 3.785267603582714, "learning_rate": 5.436117936117937e-06, "loss": 1.5443, "step": 7375 }, { "epoch": 0.21760283060592658, "grad_norm": 4.107951631561071, "learning_rate": 5.43980343980344e-06, "loss": 1.5681, "step": 7380 }, { "epoch": 0.21775025799793601, "grad_norm": 4.03307910242405, "learning_rate": 5.443488943488944e-06, "loss": 1.5532, "step": 7385 }, { "epoch": 0.21789768538994544, "grad_norm": 4.046064992425742, "learning_rate": 5.4471744471744476e-06, "loss": 1.5445, "step": 7390 }, { "epoch": 0.21804511278195488, "grad_norm": 4.171371101099465, "learning_rate": 5.4508599508599514e-06, "loss": 1.5454, "step": 7395 }, { "epoch": 0.21819254017396433, "grad_norm": 4.2387474791713835, "learning_rate": 5.4545454545454545e-06, "loss": 1.5374, "step": 7400 }, { "epoch": 0.21833996756597376, "grad_norm": 4.210699587590974, "learning_rate": 5.458230958230958e-06, "loss": 1.5513, "step": 7405 }, { "epoch": 0.2184873949579832, "grad_norm": 3.9297357724895643, "learning_rate": 5.461916461916462e-06, "loss": 1.5319, "step": 7410 }, { "epoch": 0.21863482234999262, "grad_norm": 4.270797450762634, "learning_rate": 5.465601965601965e-06, "loss": 1.6008, "step": 7415 }, { "epoch": 0.21878224974200206, "grad_norm": 4.46855602237406, "learning_rate": 5.469287469287469e-06, "loss": 1.5466, "step": 7420 }, { "epoch": 0.21892967713401149, "grad_norm": 3.9931111270074355, "learning_rate": 5.472972972972973e-06, "loss": 1.5567, "step": 7425 }, { "epoch": 0.21907710452602094, "grad_norm": 4.0375574208905105, "learning_rate": 5.476658476658477e-06, "loss": 1.5251, "step": 7430 }, { "epoch": 0.21922453191803037, "grad_norm": 4.310079795401331, "learning_rate": 5.480343980343981e-06, "loss": 1.5474, "step": 7435 }, { "epoch": 0.2193719593100398, "grad_norm": 4.127716198635117, "learning_rate": 5.4840294840294845e-06, "loss": 1.539, "step": 7440 }, { "epoch": 0.21951938670204924, "grad_norm": 4.099101126996386, "learning_rate": 5.487714987714988e-06, "loss": 1.4871, "step": 7445 }, { "epoch": 0.21966681409405867, "grad_norm": 3.8720783397124467, "learning_rate": 5.491400491400491e-06, "loss": 1.538, "step": 7450 }, { "epoch": 0.21981424148606812, "grad_norm": 3.9899339411309107, "learning_rate": 5.495085995085995e-06, "loss": 1.528, "step": 7455 }, { "epoch": 0.21996166887807755, "grad_norm": 4.1596447147094935, "learning_rate": 5.498771498771499e-06, "loss": 1.568, "step": 7460 }, { "epoch": 0.22010909627008698, "grad_norm": 4.056023686387235, "learning_rate": 5.502457002457002e-06, "loss": 1.5388, "step": 7465 }, { "epoch": 0.22025652366209642, "grad_norm": 3.9209387547696553, "learning_rate": 5.506142506142506e-06, "loss": 1.5404, "step": 7470 }, { "epoch": 0.22040395105410585, "grad_norm": 4.008089032428857, "learning_rate": 5.50982800982801e-06, "loss": 1.545, "step": 7475 }, { "epoch": 0.22055137844611528, "grad_norm": 3.989959690711018, "learning_rate": 5.513513513513514e-06, "loss": 1.535, "step": 7480 }, { "epoch": 0.22069880583812473, "grad_norm": 3.9641782683165765, "learning_rate": 5.517199017199017e-06, "loss": 1.5019, "step": 7485 }, { "epoch": 0.22084623323013416, "grad_norm": 4.327323908674799, "learning_rate": 5.520884520884521e-06, "loss": 1.5056, "step": 7490 }, { "epoch": 0.2209936606221436, "grad_norm": 3.87973796781085, "learning_rate": 5.5245700245700245e-06, "loss": 1.5381, "step": 7495 }, { "epoch": 0.22114108801415303, "grad_norm": 4.548403604991598, "learning_rate": 5.528255528255528e-06, "loss": 1.5536, "step": 7500 }, { "epoch": 0.22114108801415303, "eval_loss": 1.2496649026870728, "eval_runtime": 4.181, "eval_samples_per_second": 94.715, "eval_steps_per_second": 3.109, "step": 7500 }, { "epoch": 0.22128851540616246, "grad_norm": 4.223760275171769, "learning_rate": 5.531941031941032e-06, "loss": 1.5112, "step": 7505 }, { "epoch": 0.22143594279817191, "grad_norm": 4.043277272870582, "learning_rate": 5.535626535626536e-06, "loss": 1.5366, "step": 7510 }, { "epoch": 0.22158337019018134, "grad_norm": 3.991043930898502, "learning_rate": 5.53931203931204e-06, "loss": 1.4977, "step": 7515 }, { "epoch": 0.22173079758219078, "grad_norm": 4.068290921130821, "learning_rate": 5.542997542997544e-06, "loss": 1.5, "step": 7520 }, { "epoch": 0.2218782249742002, "grad_norm": 3.9564455245012153, "learning_rate": 5.546683046683047e-06, "loss": 1.5598, "step": 7525 }, { "epoch": 0.22202565236620964, "grad_norm": 3.8128178556428605, "learning_rate": 5.550368550368551e-06, "loss": 1.5768, "step": 7530 }, { "epoch": 0.22217307975821907, "grad_norm": 4.065850683767889, "learning_rate": 5.554054054054054e-06, "loss": 1.5116, "step": 7535 }, { "epoch": 0.22232050715022852, "grad_norm": 3.848905162038672, "learning_rate": 5.557739557739558e-06, "loss": 1.5491, "step": 7540 }, { "epoch": 0.22246793454223796, "grad_norm": 4.3793534723643734, "learning_rate": 5.5614250614250614e-06, "loss": 1.505, "step": 7545 }, { "epoch": 0.22261536193424739, "grad_norm": 4.144141919073907, "learning_rate": 5.565110565110565e-06, "loss": 1.5534, "step": 7550 }, { "epoch": 0.22276278932625682, "grad_norm": 4.303549372373969, "learning_rate": 5.568796068796069e-06, "loss": 1.5323, "step": 7555 }, { "epoch": 0.22291021671826625, "grad_norm": 4.221256190494861, "learning_rate": 5.572481572481572e-06, "loss": 1.5618, "step": 7560 }, { "epoch": 0.22305764411027568, "grad_norm": 3.9775498765685495, "learning_rate": 5.576167076167076e-06, "loss": 1.539, "step": 7565 }, { "epoch": 0.22320507150228514, "grad_norm": 3.956215397361823, "learning_rate": 5.57985257985258e-06, "loss": 1.541, "step": 7570 }, { "epoch": 0.22335249889429457, "grad_norm": 4.019499633867239, "learning_rate": 5.583538083538084e-06, "loss": 1.5405, "step": 7575 }, { "epoch": 0.223499926286304, "grad_norm": 3.952601044496301, "learning_rate": 5.587223587223588e-06, "loss": 1.546, "step": 7580 }, { "epoch": 0.22364735367831343, "grad_norm": 4.044872989076681, "learning_rate": 5.5909090909090915e-06, "loss": 1.517, "step": 7585 }, { "epoch": 0.22379478107032286, "grad_norm": 4.297462687575453, "learning_rate": 5.594594594594595e-06, "loss": 1.5632, "step": 7590 }, { "epoch": 0.22394220846233232, "grad_norm": 5.729669836458591, "learning_rate": 5.598280098280098e-06, "loss": 1.5471, "step": 7595 }, { "epoch": 0.22408963585434175, "grad_norm": 3.9575606200444944, "learning_rate": 5.601965601965601e-06, "loss": 1.5187, "step": 7600 }, { "epoch": 0.22423706324635118, "grad_norm": 3.989684497700279, "learning_rate": 5.605651105651105e-06, "loss": 1.5552, "step": 7605 }, { "epoch": 0.2243844906383606, "grad_norm": 3.98009940000669, "learning_rate": 5.609336609336609e-06, "loss": 1.5435, "step": 7610 }, { "epoch": 0.22453191803037004, "grad_norm": 4.209870301980477, "learning_rate": 5.613022113022113e-06, "loss": 1.5232, "step": 7615 }, { "epoch": 0.22467934542237947, "grad_norm": 3.9637246746758534, "learning_rate": 5.616707616707617e-06, "loss": 1.5156, "step": 7620 }, { "epoch": 0.22482677281438893, "grad_norm": 4.198443309170578, "learning_rate": 5.620393120393121e-06, "loss": 1.5014, "step": 7625 }, { "epoch": 0.22497420020639836, "grad_norm": 3.899146059740555, "learning_rate": 5.624078624078625e-06, "loss": 1.4975, "step": 7630 }, { "epoch": 0.2251216275984078, "grad_norm": 3.8547851973335523, "learning_rate": 5.627764127764128e-06, "loss": 1.5043, "step": 7635 }, { "epoch": 0.22526905499041722, "grad_norm": 3.9915170238833206, "learning_rate": 5.6314496314496315e-06, "loss": 1.5066, "step": 7640 }, { "epoch": 0.22541648238242665, "grad_norm": 3.717174550105078, "learning_rate": 5.635135135135135e-06, "loss": 1.5689, "step": 7645 }, { "epoch": 0.22556390977443608, "grad_norm": 4.638898784349238, "learning_rate": 5.638820638820639e-06, "loss": 1.5223, "step": 7650 }, { "epoch": 0.22571133716644554, "grad_norm": 4.1244266288183145, "learning_rate": 5.642506142506143e-06, "loss": 1.5236, "step": 7655 }, { "epoch": 0.22585876455845497, "grad_norm": 4.202565476230537, "learning_rate": 5.646191646191647e-06, "loss": 1.5465, "step": 7660 }, { "epoch": 0.2260061919504644, "grad_norm": 3.9612402680177152, "learning_rate": 5.64987714987715e-06, "loss": 1.511, "step": 7665 }, { "epoch": 0.22615361934247383, "grad_norm": 4.038277168990977, "learning_rate": 5.653562653562653e-06, "loss": 1.5591, "step": 7670 }, { "epoch": 0.22630104673448326, "grad_norm": 4.149066467698659, "learning_rate": 5.657248157248157e-06, "loss": 1.5553, "step": 7675 }, { "epoch": 0.22644847412649272, "grad_norm": 3.789473764491264, "learning_rate": 5.660933660933661e-06, "loss": 1.5991, "step": 7680 }, { "epoch": 0.22659590151850215, "grad_norm": 3.980044893987358, "learning_rate": 5.6646191646191646e-06, "loss": 1.6025, "step": 7685 }, { "epoch": 0.22674332891051158, "grad_norm": 4.062692635510045, "learning_rate": 5.6683046683046684e-06, "loss": 1.4939, "step": 7690 }, { "epoch": 0.226890756302521, "grad_norm": 3.9255698650825206, "learning_rate": 5.671990171990172e-06, "loss": 1.6058, "step": 7695 }, { "epoch": 0.22703818369453044, "grad_norm": 4.223865883399966, "learning_rate": 5.675675675675676e-06, "loss": 1.5095, "step": 7700 }, { "epoch": 0.22718561108653987, "grad_norm": 4.030813685550369, "learning_rate": 5.679361179361179e-06, "loss": 1.5323, "step": 7705 }, { "epoch": 0.22733303847854933, "grad_norm": 3.963157521163111, "learning_rate": 5.683046683046683e-06, "loss": 1.4574, "step": 7710 }, { "epoch": 0.22748046587055876, "grad_norm": 4.126185313812171, "learning_rate": 5.686732186732187e-06, "loss": 1.5554, "step": 7715 }, { "epoch": 0.2276278932625682, "grad_norm": 3.989138562464432, "learning_rate": 5.690417690417691e-06, "loss": 1.4679, "step": 7720 }, { "epoch": 0.22777532065457762, "grad_norm": 4.154777968753672, "learning_rate": 5.694103194103195e-06, "loss": 1.4812, "step": 7725 }, { "epoch": 0.22792274804658705, "grad_norm": 4.258281507076478, "learning_rate": 5.6977886977886985e-06, "loss": 1.4929, "step": 7730 }, { "epoch": 0.22807017543859648, "grad_norm": 4.1910794443060055, "learning_rate": 5.7014742014742015e-06, "loss": 1.5658, "step": 7735 }, { "epoch": 0.22821760283060594, "grad_norm": 4.088691441129559, "learning_rate": 5.705159705159705e-06, "loss": 1.5486, "step": 7740 }, { "epoch": 0.22836503022261537, "grad_norm": 3.9795003906870776, "learning_rate": 5.708845208845208e-06, "loss": 1.5289, "step": 7745 }, { "epoch": 0.2285124576146248, "grad_norm": 4.244918680975201, "learning_rate": 5.712530712530712e-06, "loss": 1.6295, "step": 7750 }, { "epoch": 0.22865988500663423, "grad_norm": 3.9684404916837286, "learning_rate": 5.716216216216216e-06, "loss": 1.5432, "step": 7755 }, { "epoch": 0.22880731239864366, "grad_norm": 3.883301498938316, "learning_rate": 5.71990171990172e-06, "loss": 1.5369, "step": 7760 }, { "epoch": 0.22895473979065312, "grad_norm": 3.9751382188993056, "learning_rate": 5.723587223587224e-06, "loss": 1.5016, "step": 7765 }, { "epoch": 0.22910216718266255, "grad_norm": 3.9654078795914387, "learning_rate": 5.727272727272728e-06, "loss": 1.5223, "step": 7770 }, { "epoch": 0.22924959457467198, "grad_norm": 4.084169162626245, "learning_rate": 5.730958230958232e-06, "loss": 1.5772, "step": 7775 }, { "epoch": 0.2293970219666814, "grad_norm": 3.7995584336684147, "learning_rate": 5.734643734643735e-06, "loss": 1.539, "step": 7780 }, { "epoch": 0.22954444935869084, "grad_norm": 4.114944964146326, "learning_rate": 5.7383292383292385e-06, "loss": 1.492, "step": 7785 }, { "epoch": 0.22969187675070027, "grad_norm": 3.8961470079964107, "learning_rate": 5.742014742014742e-06, "loss": 1.5407, "step": 7790 }, { "epoch": 0.22983930414270973, "grad_norm": 3.7061068929114547, "learning_rate": 5.745700245700246e-06, "loss": 1.5293, "step": 7795 }, { "epoch": 0.22998673153471916, "grad_norm": 4.116180728739046, "learning_rate": 5.749385749385749e-06, "loss": 1.5355, "step": 7800 }, { "epoch": 0.2301341589267286, "grad_norm": 3.975061042670156, "learning_rate": 5.753071253071253e-06, "loss": 1.5296, "step": 7805 }, { "epoch": 0.23028158631873802, "grad_norm": 3.780136911023601, "learning_rate": 5.756756756756757e-06, "loss": 1.5096, "step": 7810 }, { "epoch": 0.23042901371074745, "grad_norm": 10.774386799530527, "learning_rate": 5.76044226044226e-06, "loss": 1.4999, "step": 7815 }, { "epoch": 0.23057644110275688, "grad_norm": 4.09278385412238, "learning_rate": 5.764127764127764e-06, "loss": 1.5373, "step": 7820 }, { "epoch": 0.23072386849476634, "grad_norm": 3.949279375486183, "learning_rate": 5.767813267813268e-06, "loss": 1.4998, "step": 7825 }, { "epoch": 0.23087129588677577, "grad_norm": 3.9417611306049265, "learning_rate": 5.7714987714987716e-06, "loss": 1.4867, "step": 7830 }, { "epoch": 0.2310187232787852, "grad_norm": 4.125756394088745, "learning_rate": 5.7751842751842754e-06, "loss": 1.5308, "step": 7835 }, { "epoch": 0.23116615067079463, "grad_norm": 4.050920168164231, "learning_rate": 5.778869778869779e-06, "loss": 1.5114, "step": 7840 }, { "epoch": 0.23131357806280406, "grad_norm": 3.9281825279328677, "learning_rate": 5.782555282555283e-06, "loss": 1.5449, "step": 7845 }, { "epoch": 0.23146100545481352, "grad_norm": 4.194881148873216, "learning_rate": 5.786240786240787e-06, "loss": 1.5861, "step": 7850 }, { "epoch": 0.23160843284682295, "grad_norm": 4.027356343731839, "learning_rate": 5.78992628992629e-06, "loss": 1.5296, "step": 7855 }, { "epoch": 0.23175586023883238, "grad_norm": 4.353202726617244, "learning_rate": 5.793611793611794e-06, "loss": 1.5659, "step": 7860 }, { "epoch": 0.2319032876308418, "grad_norm": 3.853876506935644, "learning_rate": 5.797297297297298e-06, "loss": 1.5398, "step": 7865 }, { "epoch": 0.23205071502285124, "grad_norm": 3.832929860651242, "learning_rate": 5.800982800982801e-06, "loss": 1.5102, "step": 7870 }, { "epoch": 0.23219814241486067, "grad_norm": 4.095744513355343, "learning_rate": 5.804668304668305e-06, "loss": 1.5643, "step": 7875 }, { "epoch": 0.23234556980687013, "grad_norm": 4.337498720303091, "learning_rate": 5.8083538083538085e-06, "loss": 1.5434, "step": 7880 }, { "epoch": 0.23249299719887956, "grad_norm": 5.873395262128692, "learning_rate": 5.812039312039312e-06, "loss": 1.5638, "step": 7885 }, { "epoch": 0.232640424590889, "grad_norm": 3.896585025467251, "learning_rate": 5.815724815724815e-06, "loss": 1.5374, "step": 7890 }, { "epoch": 0.23278785198289842, "grad_norm": 4.102642888550035, "learning_rate": 5.819410319410319e-06, "loss": 1.5523, "step": 7895 }, { "epoch": 0.23293527937490785, "grad_norm": 4.2199708028363965, "learning_rate": 5.823095823095823e-06, "loss": 1.5604, "step": 7900 }, { "epoch": 0.23308270676691728, "grad_norm": 4.035347447296033, "learning_rate": 5.826781326781327e-06, "loss": 1.5752, "step": 7905 }, { "epoch": 0.23323013415892674, "grad_norm": 4.036887045309839, "learning_rate": 5.830466830466831e-06, "loss": 1.5177, "step": 7910 }, { "epoch": 0.23337756155093617, "grad_norm": 3.876647944610196, "learning_rate": 5.834152334152335e-06, "loss": 1.5044, "step": 7915 }, { "epoch": 0.2335249889429456, "grad_norm": 3.9141595653617327, "learning_rate": 5.837837837837839e-06, "loss": 1.5376, "step": 7920 }, { "epoch": 0.23367241633495503, "grad_norm": 3.764551665764003, "learning_rate": 5.841523341523342e-06, "loss": 1.5342, "step": 7925 }, { "epoch": 0.23381984372696446, "grad_norm": 4.043973252505501, "learning_rate": 5.8452088452088455e-06, "loss": 1.553, "step": 7930 }, { "epoch": 0.23396727111897392, "grad_norm": 3.910902389054561, "learning_rate": 5.8488943488943485e-06, "loss": 1.5636, "step": 7935 }, { "epoch": 0.23411469851098335, "grad_norm": 4.072360090120724, "learning_rate": 5.852579852579852e-06, "loss": 1.5548, "step": 7940 }, { "epoch": 0.23426212590299278, "grad_norm": 4.332277517007033, "learning_rate": 5.856265356265356e-06, "loss": 1.5116, "step": 7945 }, { "epoch": 0.2344095532950022, "grad_norm": 3.9114868353131067, "learning_rate": 5.85995085995086e-06, "loss": 1.5513, "step": 7950 }, { "epoch": 0.23455698068701164, "grad_norm": 4.06321473441798, "learning_rate": 5.863636363636364e-06, "loss": 1.5408, "step": 7955 }, { "epoch": 0.23470440807902107, "grad_norm": 3.9755258870329384, "learning_rate": 5.867321867321868e-06, "loss": 1.5004, "step": 7960 }, { "epoch": 0.23485183547103053, "grad_norm": 3.808383737528817, "learning_rate": 5.871007371007371e-06, "loss": 1.5544, "step": 7965 }, { "epoch": 0.23499926286303996, "grad_norm": 3.835751341896111, "learning_rate": 5.874692874692875e-06, "loss": 1.505, "step": 7970 }, { "epoch": 0.2351466902550494, "grad_norm": 3.8698115597029044, "learning_rate": 5.8783783783783786e-06, "loss": 1.5417, "step": 7975 }, { "epoch": 0.23529411764705882, "grad_norm": 3.9226040858617144, "learning_rate": 5.882063882063882e-06, "loss": 1.4939, "step": 7980 }, { "epoch": 0.23544154503906825, "grad_norm": 4.057792198327479, "learning_rate": 5.885749385749386e-06, "loss": 1.5563, "step": 7985 }, { "epoch": 0.23558897243107768, "grad_norm": 4.4286683557535955, "learning_rate": 5.88943488943489e-06, "loss": 1.4597, "step": 7990 }, { "epoch": 0.23573639982308714, "grad_norm": 3.972241184247657, "learning_rate": 5.893120393120394e-06, "loss": 1.5712, "step": 7995 }, { "epoch": 0.23588382721509657, "grad_norm": 4.052378779338096, "learning_rate": 5.896805896805896e-06, "loss": 1.5096, "step": 8000 }, { "epoch": 0.23588382721509657, "eval_loss": 1.2487941980361938, "eval_runtime": 4.3222, "eval_samples_per_second": 91.621, "eval_steps_per_second": 3.008, "step": 8000 }, { "epoch": 0.236031254607106, "grad_norm": 4.121299143252703, "learning_rate": 5.9004914004914e-06, "loss": 1.5816, "step": 8005 }, { "epoch": 0.23617868199911543, "grad_norm": 4.106693812211854, "learning_rate": 5.904176904176904e-06, "loss": 1.5116, "step": 8010 }, { "epoch": 0.23632610939112486, "grad_norm": 3.895098641175809, "learning_rate": 5.907862407862408e-06, "loss": 1.5393, "step": 8015 }, { "epoch": 0.23647353678313432, "grad_norm": 3.9287578495558524, "learning_rate": 5.911547911547912e-06, "loss": 1.5537, "step": 8020 }, { "epoch": 0.23662096417514375, "grad_norm": 4.050581261622933, "learning_rate": 5.9152334152334155e-06, "loss": 1.5878, "step": 8025 }, { "epoch": 0.23676839156715318, "grad_norm": 3.959644398869074, "learning_rate": 5.918918918918919e-06, "loss": 1.5151, "step": 8030 }, { "epoch": 0.2369158189591626, "grad_norm": 3.974933505354195, "learning_rate": 5.922604422604422e-06, "loss": 1.5749, "step": 8035 }, { "epoch": 0.23706324635117204, "grad_norm": 3.8367979777687484, "learning_rate": 5.926289926289926e-06, "loss": 1.5303, "step": 8040 }, { "epoch": 0.23721067374318147, "grad_norm": 4.011607280387499, "learning_rate": 5.92997542997543e-06, "loss": 1.5655, "step": 8045 }, { "epoch": 0.23735810113519093, "grad_norm": 3.9099777070745296, "learning_rate": 5.933660933660934e-06, "loss": 1.4903, "step": 8050 }, { "epoch": 0.23750552852720036, "grad_norm": 3.8675706204199947, "learning_rate": 5.937346437346438e-06, "loss": 1.4718, "step": 8055 }, { "epoch": 0.2376529559192098, "grad_norm": 3.92062072166269, "learning_rate": 5.941031941031942e-06, "loss": 1.5297, "step": 8060 }, { "epoch": 0.23780038331121922, "grad_norm": 3.946171462110707, "learning_rate": 5.9447174447174456e-06, "loss": 1.5301, "step": 8065 }, { "epoch": 0.23794781070322865, "grad_norm": 4.2279490637288095, "learning_rate": 5.948402948402949e-06, "loss": 1.5608, "step": 8070 }, { "epoch": 0.23809523809523808, "grad_norm": 4.111685639674045, "learning_rate": 5.952088452088452e-06, "loss": 1.5372, "step": 8075 }, { "epoch": 0.23824266548724754, "grad_norm": 4.093227078815436, "learning_rate": 5.9557739557739555e-06, "loss": 1.4661, "step": 8080 }, { "epoch": 0.23839009287925697, "grad_norm": 3.9919106584560122, "learning_rate": 5.959459459459459e-06, "loss": 1.5594, "step": 8085 }, { "epoch": 0.2385375202712664, "grad_norm": 4.110677614422339, "learning_rate": 5.963144963144963e-06, "loss": 1.4981, "step": 8090 }, { "epoch": 0.23868494766327583, "grad_norm": 3.8261486037788073, "learning_rate": 5.966830466830467e-06, "loss": 1.5621, "step": 8095 }, { "epoch": 0.23883237505528526, "grad_norm": 3.9189859259110227, "learning_rate": 5.970515970515971e-06, "loss": 1.4877, "step": 8100 }, { "epoch": 0.23897980244729472, "grad_norm": 4.284598200432961, "learning_rate": 5.974201474201475e-06, "loss": 1.54, "step": 8105 }, { "epoch": 0.23912722983930415, "grad_norm": 3.7818142169465876, "learning_rate": 5.977886977886978e-06, "loss": 1.4946, "step": 8110 }, { "epoch": 0.23927465723131358, "grad_norm": 3.9775470861652535, "learning_rate": 5.981572481572482e-06, "loss": 1.5356, "step": 8115 }, { "epoch": 0.239422084623323, "grad_norm": 3.57729205052764, "learning_rate": 5.9852579852579855e-06, "loss": 1.4841, "step": 8120 }, { "epoch": 0.23956951201533244, "grad_norm": 4.175685146388374, "learning_rate": 5.988943488943489e-06, "loss": 1.58, "step": 8125 }, { "epoch": 0.23971693940734187, "grad_norm": 3.962795799388355, "learning_rate": 5.992628992628993e-06, "loss": 1.5054, "step": 8130 }, { "epoch": 0.23986436679935133, "grad_norm": 4.0848323391084245, "learning_rate": 5.996314496314496e-06, "loss": 1.5046, "step": 8135 }, { "epoch": 0.24001179419136076, "grad_norm": 4.385817961793517, "learning_rate": 6e-06, "loss": 1.5673, "step": 8140 }, { "epoch": 0.2401592215833702, "grad_norm": 3.983613641653474, "learning_rate": 6.003685503685503e-06, "loss": 1.5293, "step": 8145 }, { "epoch": 0.24030664897537962, "grad_norm": 4.838898491939367, "learning_rate": 6.007371007371007e-06, "loss": 1.511, "step": 8150 }, { "epoch": 0.24045407636738905, "grad_norm": 4.111309622457098, "learning_rate": 6.011056511056511e-06, "loss": 1.5613, "step": 8155 }, { "epoch": 0.24060150375939848, "grad_norm": 4.0777265328827035, "learning_rate": 6.014742014742015e-06, "loss": 1.5214, "step": 8160 }, { "epoch": 0.24074893115140794, "grad_norm": 4.162307078470707, "learning_rate": 6.018427518427519e-06, "loss": 1.5145, "step": 8165 }, { "epoch": 0.24089635854341737, "grad_norm": 4.080099096532248, "learning_rate": 6.0221130221130225e-06, "loss": 1.5143, "step": 8170 }, { "epoch": 0.2410437859354268, "grad_norm": 4.082022257153008, "learning_rate": 6.025798525798526e-06, "loss": 1.5507, "step": 8175 }, { "epoch": 0.24119121332743623, "grad_norm": 4.503225530553738, "learning_rate": 6.02948402948403e-06, "loss": 1.5809, "step": 8180 }, { "epoch": 0.24133864071944566, "grad_norm": 4.122118047221487, "learning_rate": 6.033169533169533e-06, "loss": 1.5064, "step": 8185 }, { "epoch": 0.24148606811145512, "grad_norm": 4.22664364318167, "learning_rate": 6.036855036855037e-06, "loss": 1.5574, "step": 8190 }, { "epoch": 0.24163349550346455, "grad_norm": 4.00365452932215, "learning_rate": 6.040540540540541e-06, "loss": 1.5435, "step": 8195 }, { "epoch": 0.24178092289547398, "grad_norm": 3.9991182243557915, "learning_rate": 6.044226044226045e-06, "loss": 1.5266, "step": 8200 }, { "epoch": 0.2419283502874834, "grad_norm": 3.8961711794571823, "learning_rate": 6.047911547911548e-06, "loss": 1.512, "step": 8205 }, { "epoch": 0.24207577767949284, "grad_norm": 3.912077926030402, "learning_rate": 6.051597051597052e-06, "loss": 1.5346, "step": 8210 }, { "epoch": 0.24222320507150227, "grad_norm": 3.9856476532364984, "learning_rate": 6.055282555282556e-06, "loss": 1.526, "step": 8215 }, { "epoch": 0.24237063246351173, "grad_norm": 3.8711953903805005, "learning_rate": 6.058968058968059e-06, "loss": 1.5339, "step": 8220 }, { "epoch": 0.24251805985552116, "grad_norm": 4.341751655591071, "learning_rate": 6.0626535626535625e-06, "loss": 1.5416, "step": 8225 }, { "epoch": 0.2426654872475306, "grad_norm": 3.8336160395935988, "learning_rate": 6.066339066339066e-06, "loss": 1.5264, "step": 8230 }, { "epoch": 0.24281291463954002, "grad_norm": 4.019542809288778, "learning_rate": 6.07002457002457e-06, "loss": 1.5821, "step": 8235 }, { "epoch": 0.24296034203154945, "grad_norm": 4.14614508504884, "learning_rate": 6.073710073710074e-06, "loss": 1.5742, "step": 8240 }, { "epoch": 0.24310776942355888, "grad_norm": 3.9442011187626123, "learning_rate": 6.077395577395578e-06, "loss": 1.5555, "step": 8245 }, { "epoch": 0.24325519681556834, "grad_norm": 3.933657683304174, "learning_rate": 6.081081081081082e-06, "loss": 1.5318, "step": 8250 }, { "epoch": 0.24340262420757777, "grad_norm": 4.17035721638918, "learning_rate": 6.084766584766585e-06, "loss": 1.5349, "step": 8255 }, { "epoch": 0.2435500515995872, "grad_norm": 4.000328605328862, "learning_rate": 6.088452088452089e-06, "loss": 1.556, "step": 8260 }, { "epoch": 0.24369747899159663, "grad_norm": 4.1591631661325765, "learning_rate": 6.0921375921375925e-06, "loss": 1.5278, "step": 8265 }, { "epoch": 0.24384490638360606, "grad_norm": 4.132943977092382, "learning_rate": 6.0958230958230955e-06, "loss": 1.5635, "step": 8270 }, { "epoch": 0.24399233377561552, "grad_norm": 4.0247239175740255, "learning_rate": 6.099508599508599e-06, "loss": 1.5702, "step": 8275 }, { "epoch": 0.24413976116762495, "grad_norm": 4.029979875992246, "learning_rate": 6.103194103194103e-06, "loss": 1.5506, "step": 8280 }, { "epoch": 0.24428718855963438, "grad_norm": 3.786763637987946, "learning_rate": 6.106879606879607e-06, "loss": 1.54, "step": 8285 }, { "epoch": 0.2444346159516438, "grad_norm": 3.703375806965794, "learning_rate": 6.110565110565111e-06, "loss": 1.5102, "step": 8290 }, { "epoch": 0.24458204334365324, "grad_norm": 4.172207924673764, "learning_rate": 6.114250614250614e-06, "loss": 1.5406, "step": 8295 }, { "epoch": 0.24472947073566267, "grad_norm": 4.202311583569171, "learning_rate": 6.117936117936118e-06, "loss": 1.5253, "step": 8300 }, { "epoch": 0.24487689812767213, "grad_norm": 4.089930839801212, "learning_rate": 6.121621621621622e-06, "loss": 1.5494, "step": 8305 }, { "epoch": 0.24502432551968156, "grad_norm": 3.7665259823648047, "learning_rate": 6.125307125307126e-06, "loss": 1.5043, "step": 8310 }, { "epoch": 0.245171752911691, "grad_norm": 3.802049612649219, "learning_rate": 6.1289926289926295e-06, "loss": 1.524, "step": 8315 }, { "epoch": 0.24531918030370042, "grad_norm": 3.984797505836719, "learning_rate": 6.132678132678133e-06, "loss": 1.5466, "step": 8320 }, { "epoch": 0.24546660769570985, "grad_norm": 3.8791298494229505, "learning_rate": 6.136363636363637e-06, "loss": 1.5155, "step": 8325 }, { "epoch": 0.24561403508771928, "grad_norm": 4.058491534453695, "learning_rate": 6.14004914004914e-06, "loss": 1.5494, "step": 8330 }, { "epoch": 0.24576146247972874, "grad_norm": 3.955747996718432, "learning_rate": 6.143734643734643e-06, "loss": 1.4823, "step": 8335 }, { "epoch": 0.24590888987173817, "grad_norm": 4.370082555452415, "learning_rate": 6.147420147420147e-06, "loss": 1.5118, "step": 8340 }, { "epoch": 0.2460563172637476, "grad_norm": 3.9920702946910414, "learning_rate": 6.151105651105651e-06, "loss": 1.5575, "step": 8345 }, { "epoch": 0.24620374465575703, "grad_norm": 4.014225748042825, "learning_rate": 6.154791154791155e-06, "loss": 1.5045, "step": 8350 }, { "epoch": 0.24635117204776646, "grad_norm": 3.8625104910104104, "learning_rate": 6.158476658476659e-06, "loss": 1.5727, "step": 8355 }, { "epoch": 0.24649859943977592, "grad_norm": 3.9631507605933622, "learning_rate": 6.1621621621621626e-06, "loss": 1.5422, "step": 8360 }, { "epoch": 0.24664602683178535, "grad_norm": 3.9603323980673197, "learning_rate": 6.165847665847666e-06, "loss": 1.5104, "step": 8365 }, { "epoch": 0.24679345422379478, "grad_norm": 4.214656081666823, "learning_rate": 6.1695331695331695e-06, "loss": 1.5226, "step": 8370 }, { "epoch": 0.24694088161580421, "grad_norm": 3.691308641729351, "learning_rate": 6.173218673218673e-06, "loss": 1.5356, "step": 8375 }, { "epoch": 0.24708830900781364, "grad_norm": 3.830732363257464, "learning_rate": 6.176904176904177e-06, "loss": 1.5245, "step": 8380 }, { "epoch": 0.24723573639982308, "grad_norm": 3.7432432951299734, "learning_rate": 6.180589680589681e-06, "loss": 1.556, "step": 8385 }, { "epoch": 0.24738316379183253, "grad_norm": 4.145699752846287, "learning_rate": 6.184275184275185e-06, "loss": 1.5261, "step": 8390 }, { "epoch": 0.24753059118384196, "grad_norm": 3.936425488673325, "learning_rate": 6.187960687960689e-06, "loss": 1.5524, "step": 8395 }, { "epoch": 0.2476780185758514, "grad_norm": 3.922864288163358, "learning_rate": 6.191646191646192e-06, "loss": 1.5337, "step": 8400 }, { "epoch": 0.24782544596786082, "grad_norm": 3.9199016230447232, "learning_rate": 6.195331695331695e-06, "loss": 1.5047, "step": 8405 }, { "epoch": 0.24797287335987026, "grad_norm": 3.9344090194321995, "learning_rate": 6.199017199017199e-06, "loss": 1.6346, "step": 8410 }, { "epoch": 0.24812030075187969, "grad_norm": 4.082154859594756, "learning_rate": 6.2027027027027025e-06, "loss": 1.5627, "step": 8415 }, { "epoch": 0.24826772814388914, "grad_norm": 4.146769452834552, "learning_rate": 6.206388206388206e-06, "loss": 1.5483, "step": 8420 }, { "epoch": 0.24841515553589857, "grad_norm": 4.099555776998035, "learning_rate": 6.21007371007371e-06, "loss": 1.5428, "step": 8425 }, { "epoch": 0.248562582927908, "grad_norm": 3.953640045094208, "learning_rate": 6.213759213759214e-06, "loss": 1.5681, "step": 8430 }, { "epoch": 0.24871001031991744, "grad_norm": 3.9212898818956865, "learning_rate": 6.217444717444718e-06, "loss": 1.5677, "step": 8435 }, { "epoch": 0.24885743771192687, "grad_norm": 3.9146227016615307, "learning_rate": 6.221130221130221e-06, "loss": 1.5192, "step": 8440 }, { "epoch": 0.24900486510393632, "grad_norm": 4.097526465503367, "learning_rate": 6.224815724815725e-06, "loss": 1.5329, "step": 8445 }, { "epoch": 0.24915229249594575, "grad_norm": 3.742858769762974, "learning_rate": 6.228501228501229e-06, "loss": 1.544, "step": 8450 }, { "epoch": 0.24929971988795518, "grad_norm": 3.795774876268617, "learning_rate": 6.232186732186733e-06, "loss": 1.519, "step": 8455 }, { "epoch": 0.24944714727996462, "grad_norm": 3.9525455060886032, "learning_rate": 6.2358722358722365e-06, "loss": 1.5838, "step": 8460 }, { "epoch": 0.24959457467197405, "grad_norm": 3.7953444938197367, "learning_rate": 6.23955773955774e-06, "loss": 1.5142, "step": 8465 }, { "epoch": 0.24974200206398348, "grad_norm": 4.028787030457706, "learning_rate": 6.243243243243243e-06, "loss": 1.5519, "step": 8470 }, { "epoch": 0.24988942945599293, "grad_norm": 3.894402799132542, "learning_rate": 6.246928746928746e-06, "loss": 1.5276, "step": 8475 }, { "epoch": 0.25003685684800236, "grad_norm": 3.991037438380138, "learning_rate": 6.25061425061425e-06, "loss": 1.5152, "step": 8480 }, { "epoch": 0.25018428424001177, "grad_norm": 3.7352221548011553, "learning_rate": 6.254299754299754e-06, "loss": 1.5319, "step": 8485 }, { "epoch": 0.2503317116320212, "grad_norm": 4.012032057026194, "learning_rate": 6.257985257985258e-06, "loss": 1.5269, "step": 8490 }, { "epoch": 0.2504791390240307, "grad_norm": 3.922251109932927, "learning_rate": 6.261670761670762e-06, "loss": 1.5537, "step": 8495 }, { "epoch": 0.2506265664160401, "grad_norm": 3.9006266951946555, "learning_rate": 6.265356265356266e-06, "loss": 1.5529, "step": 8500 }, { "epoch": 0.2506265664160401, "eval_loss": 1.248063325881958, "eval_runtime": 4.2128, "eval_samples_per_second": 93.999, "eval_steps_per_second": 3.086, "step": 8500 }, { "epoch": 0.25077399380804954, "grad_norm": 3.8234141451387917, "learning_rate": 6.2690417690417696e-06, "loss": 1.5309, "step": 8505 }, { "epoch": 0.25092142120005895, "grad_norm": 3.931559127751527, "learning_rate": 6.272727272727273e-06, "loss": 1.4727, "step": 8510 }, { "epoch": 0.2510688485920684, "grad_norm": 3.866742093897212, "learning_rate": 6.2764127764127764e-06, "loss": 1.5023, "step": 8515 }, { "epoch": 0.25121627598407786, "grad_norm": 3.7625568176657347, "learning_rate": 6.28009828009828e-06, "loss": 1.5583, "step": 8520 }, { "epoch": 0.25136370337608727, "grad_norm": 3.9837733057338416, "learning_rate": 6.283783783783784e-06, "loss": 1.5443, "step": 8525 }, { "epoch": 0.2515111307680967, "grad_norm": 3.8905106325681516, "learning_rate": 6.287469287469288e-06, "loss": 1.5365, "step": 8530 }, { "epoch": 0.2516585581601061, "grad_norm": 3.9610322707433174, "learning_rate": 6.291154791154791e-06, "loss": 1.5779, "step": 8535 }, { "epoch": 0.2518059855521156, "grad_norm": 3.8704999494632655, "learning_rate": 6.294840294840295e-06, "loss": 1.56, "step": 8540 }, { "epoch": 0.25195341294412504, "grad_norm": 3.941033968147878, "learning_rate": 6.298525798525799e-06, "loss": 1.5513, "step": 8545 }, { "epoch": 0.25210084033613445, "grad_norm": 4.105071811058492, "learning_rate": 6.302211302211302e-06, "loss": 1.5702, "step": 8550 }, { "epoch": 0.2522482677281439, "grad_norm": 3.9652860529989717, "learning_rate": 6.305896805896806e-06, "loss": 1.5397, "step": 8555 }, { "epoch": 0.2523956951201533, "grad_norm": 3.962501262784763, "learning_rate": 6.3095823095823095e-06, "loss": 1.56, "step": 8560 }, { "epoch": 0.25254312251216277, "grad_norm": 3.8617422452682004, "learning_rate": 6.313267813267813e-06, "loss": 1.4847, "step": 8565 }, { "epoch": 0.25269054990417217, "grad_norm": 3.850556328709991, "learning_rate": 6.316953316953317e-06, "loss": 1.5787, "step": 8570 }, { "epoch": 0.2528379772961816, "grad_norm": 4.829037483259847, "learning_rate": 6.320638820638821e-06, "loss": 1.5464, "step": 8575 }, { "epoch": 0.2529854046881911, "grad_norm": 3.9795442226772892, "learning_rate": 6.324324324324325e-06, "loss": 1.5051, "step": 8580 }, { "epoch": 0.2531328320802005, "grad_norm": 3.967195411919433, "learning_rate": 6.328009828009828e-06, "loss": 1.5704, "step": 8585 }, { "epoch": 0.25328025947220995, "grad_norm": 3.953220226474803, "learning_rate": 6.331695331695332e-06, "loss": 1.572, "step": 8590 }, { "epoch": 0.25342768686421935, "grad_norm": 3.781877978184127, "learning_rate": 6.335380835380836e-06, "loss": 1.5632, "step": 8595 }, { "epoch": 0.2535751142562288, "grad_norm": 3.810836672722412, "learning_rate": 6.33906633906634e-06, "loss": 1.5628, "step": 8600 }, { "epoch": 0.25372254164823826, "grad_norm": 3.731165398196301, "learning_rate": 6.342751842751843e-06, "loss": 1.5153, "step": 8605 }, { "epoch": 0.25386996904024767, "grad_norm": 4.695937221537779, "learning_rate": 6.3464373464373465e-06, "loss": 1.5393, "step": 8610 }, { "epoch": 0.2540173964322571, "grad_norm": 3.881251511255391, "learning_rate": 6.35012285012285e-06, "loss": 1.589, "step": 8615 }, { "epoch": 0.25416482382426653, "grad_norm": 3.8794299901336475, "learning_rate": 6.353808353808353e-06, "loss": 1.5374, "step": 8620 }, { "epoch": 0.254312251216276, "grad_norm": 3.7994424498813397, "learning_rate": 6.357493857493857e-06, "loss": 1.5189, "step": 8625 }, { "epoch": 0.25445967860828544, "grad_norm": 3.808755634701464, "learning_rate": 6.361179361179361e-06, "loss": 1.5245, "step": 8630 }, { "epoch": 0.25460710600029485, "grad_norm": 3.796905506426836, "learning_rate": 6.364864864864865e-06, "loss": 1.5204, "step": 8635 }, { "epoch": 0.2547545333923043, "grad_norm": 3.9314805720287467, "learning_rate": 6.368550368550369e-06, "loss": 1.4752, "step": 8640 }, { "epoch": 0.2549019607843137, "grad_norm": 3.9322009080112776, "learning_rate": 6.372235872235873e-06, "loss": 1.5488, "step": 8645 }, { "epoch": 0.25504938817632317, "grad_norm": 3.844026826225262, "learning_rate": 6.3759213759213766e-06, "loss": 1.5105, "step": 8650 }, { "epoch": 0.25519681556833257, "grad_norm": 3.81846962475953, "learning_rate": 6.37960687960688e-06, "loss": 1.4997, "step": 8655 }, { "epoch": 0.255344242960342, "grad_norm": 3.8991116257119396, "learning_rate": 6.3832923832923834e-06, "loss": 1.5559, "step": 8660 }, { "epoch": 0.2554916703523515, "grad_norm": 3.9786473296457494, "learning_rate": 6.386977886977887e-06, "loss": 1.5397, "step": 8665 }, { "epoch": 0.2556390977443609, "grad_norm": 3.8356614412859815, "learning_rate": 6.39066339066339e-06, "loss": 1.5886, "step": 8670 }, { "epoch": 0.25578652513637035, "grad_norm": 3.904016932066124, "learning_rate": 6.394348894348894e-06, "loss": 1.4805, "step": 8675 }, { "epoch": 0.25593395252837975, "grad_norm": 3.8724421979437276, "learning_rate": 6.398034398034398e-06, "loss": 1.5219, "step": 8680 }, { "epoch": 0.2560813799203892, "grad_norm": 3.8152705928310833, "learning_rate": 6.401719901719902e-06, "loss": 1.5377, "step": 8685 }, { "epoch": 0.25622880731239867, "grad_norm": 4.243032204606896, "learning_rate": 6.405405405405406e-06, "loss": 1.5779, "step": 8690 }, { "epoch": 0.25637623470440807, "grad_norm": 3.6503771509043124, "learning_rate": 6.409090909090909e-06, "loss": 1.5357, "step": 8695 }, { "epoch": 0.2565236620964175, "grad_norm": 4.163062680778196, "learning_rate": 6.412776412776413e-06, "loss": 1.5554, "step": 8700 }, { "epoch": 0.25667108948842693, "grad_norm": 3.911241273786849, "learning_rate": 6.4164619164619165e-06, "loss": 1.5016, "step": 8705 }, { "epoch": 0.2568185168804364, "grad_norm": 3.9389061780479135, "learning_rate": 6.42014742014742e-06, "loss": 1.5468, "step": 8710 }, { "epoch": 0.25696594427244585, "grad_norm": 3.8030143676462154, "learning_rate": 6.423832923832924e-06, "loss": 1.55, "step": 8715 }, { "epoch": 0.25711337166445525, "grad_norm": 3.8237093782769684, "learning_rate": 6.427518427518428e-06, "loss": 1.5531, "step": 8720 }, { "epoch": 0.2572607990564647, "grad_norm": 3.8895874379523803, "learning_rate": 6.431203931203932e-06, "loss": 1.5473, "step": 8725 }, { "epoch": 0.2574082264484741, "grad_norm": 4.061677273315105, "learning_rate": 6.434889434889435e-06, "loss": 1.5445, "step": 8730 }, { "epoch": 0.25755565384048357, "grad_norm": 3.8610969718098533, "learning_rate": 6.438574938574939e-06, "loss": 1.4838, "step": 8735 }, { "epoch": 0.25770308123249297, "grad_norm": 4.1611112403660995, "learning_rate": 6.442260442260442e-06, "loss": 1.5549, "step": 8740 }, { "epoch": 0.25785050862450243, "grad_norm": 6.150661464373784, "learning_rate": 6.445945945945946e-06, "loss": 1.5209, "step": 8745 }, { "epoch": 0.2579979360165119, "grad_norm": 3.939517780273363, "learning_rate": 6.44963144963145e-06, "loss": 1.562, "step": 8750 }, { "epoch": 0.2581453634085213, "grad_norm": 3.6912536085936787, "learning_rate": 6.4533169533169535e-06, "loss": 1.5295, "step": 8755 }, { "epoch": 0.25829279080053075, "grad_norm": 3.990017562493093, "learning_rate": 6.457002457002457e-06, "loss": 1.5766, "step": 8760 }, { "epoch": 0.25844021819254015, "grad_norm": 3.800942846644571, "learning_rate": 6.460687960687961e-06, "loss": 1.5251, "step": 8765 }, { "epoch": 0.2585876455845496, "grad_norm": 4.063171049055868, "learning_rate": 6.464373464373464e-06, "loss": 1.5154, "step": 8770 }, { "epoch": 0.25873507297655907, "grad_norm": 3.6361391759248947, "learning_rate": 6.468058968058968e-06, "loss": 1.5723, "step": 8775 }, { "epoch": 0.25888250036856847, "grad_norm": 3.8270256232858233, "learning_rate": 6.471744471744472e-06, "loss": 1.6563, "step": 8780 }, { "epoch": 0.2590299277605779, "grad_norm": 3.8779723627889338, "learning_rate": 6.475429975429976e-06, "loss": 1.5329, "step": 8785 }, { "epoch": 0.25917735515258733, "grad_norm": 3.895235493355022, "learning_rate": 6.47911547911548e-06, "loss": 1.5635, "step": 8790 }, { "epoch": 0.2593247825445968, "grad_norm": 3.9268647469101094, "learning_rate": 6.4828009828009835e-06, "loss": 1.5556, "step": 8795 }, { "epoch": 0.25947220993660625, "grad_norm": 3.835776479460344, "learning_rate": 6.486486486486487e-06, "loss": 1.4835, "step": 8800 }, { "epoch": 0.25961963732861565, "grad_norm": 3.8026497417541503, "learning_rate": 6.49017199017199e-06, "loss": 1.5535, "step": 8805 }, { "epoch": 0.2597670647206251, "grad_norm": 3.867095358993842, "learning_rate": 6.4938574938574934e-06, "loss": 1.5041, "step": 8810 }, { "epoch": 0.2599144921126345, "grad_norm": 3.9239076332685467, "learning_rate": 6.497542997542997e-06, "loss": 1.5753, "step": 8815 }, { "epoch": 0.26006191950464397, "grad_norm": 3.9598912391429, "learning_rate": 6.501228501228501e-06, "loss": 1.4885, "step": 8820 }, { "epoch": 0.26020934689665337, "grad_norm": 3.6161328654251395, "learning_rate": 6.504914004914005e-06, "loss": 1.5299, "step": 8825 }, { "epoch": 0.26035677428866283, "grad_norm": 3.7634694814219167, "learning_rate": 6.508599508599509e-06, "loss": 1.5299, "step": 8830 }, { "epoch": 0.2605042016806723, "grad_norm": 4.088887341442, "learning_rate": 6.512285012285013e-06, "loss": 1.5571, "step": 8835 }, { "epoch": 0.2606516290726817, "grad_norm": 3.8709832584276698, "learning_rate": 6.515970515970516e-06, "loss": 1.533, "step": 8840 }, { "epoch": 0.26079905646469115, "grad_norm": 3.968085025850937, "learning_rate": 6.51965601965602e-06, "loss": 1.4997, "step": 8845 }, { "epoch": 0.26094648385670055, "grad_norm": 3.8457305013610537, "learning_rate": 6.5233415233415235e-06, "loss": 1.4727, "step": 8850 }, { "epoch": 0.26109391124871, "grad_norm": 4.049689180605046, "learning_rate": 6.527027027027027e-06, "loss": 1.5813, "step": 8855 }, { "epoch": 0.26124133864071947, "grad_norm": 3.820728637836512, "learning_rate": 6.530712530712531e-06, "loss": 1.458, "step": 8860 }, { "epoch": 0.26138876603272887, "grad_norm": 3.8010208369410936, "learning_rate": 6.534398034398035e-06, "loss": 1.4817, "step": 8865 }, { "epoch": 0.26153619342473833, "grad_norm": 3.9447931219101267, "learning_rate": 6.538083538083538e-06, "loss": 1.5728, "step": 8870 }, { "epoch": 0.26168362081674773, "grad_norm": 4.02987549800815, "learning_rate": 6.541769041769042e-06, "loss": 1.5414, "step": 8875 }, { "epoch": 0.2618310482087572, "grad_norm": 3.854233264401459, "learning_rate": 6.545454545454545e-06, "loss": 1.5233, "step": 8880 }, { "epoch": 0.26197847560076665, "grad_norm": 3.7198574402238056, "learning_rate": 6.549140049140049e-06, "loss": 1.4514, "step": 8885 }, { "epoch": 0.26212590299277605, "grad_norm": 3.837238320578648, "learning_rate": 6.552825552825553e-06, "loss": 1.5479, "step": 8890 }, { "epoch": 0.2622733303847855, "grad_norm": 3.976207254538604, "learning_rate": 6.556511056511057e-06, "loss": 1.5679, "step": 8895 }, { "epoch": 0.2624207577767949, "grad_norm": 3.762453440448635, "learning_rate": 6.5601965601965605e-06, "loss": 1.5449, "step": 8900 }, { "epoch": 0.26256818516880437, "grad_norm": 3.98452873202251, "learning_rate": 6.563882063882064e-06, "loss": 1.5426, "step": 8905 }, { "epoch": 0.26271561256081377, "grad_norm": 3.884214762491692, "learning_rate": 6.567567567567568e-06, "loss": 1.5814, "step": 8910 }, { "epoch": 0.26286303995282323, "grad_norm": 4.071699343481248, "learning_rate": 6.571253071253071e-06, "loss": 1.5467, "step": 8915 }, { "epoch": 0.2630104673448327, "grad_norm": 3.776184329081836, "learning_rate": 6.574938574938575e-06, "loss": 1.5139, "step": 8920 }, { "epoch": 0.2631578947368421, "grad_norm": 3.785398676163507, "learning_rate": 6.578624078624079e-06, "loss": 1.5405, "step": 8925 }, { "epoch": 0.26330532212885155, "grad_norm": 3.773830363809044, "learning_rate": 6.582309582309583e-06, "loss": 1.489, "step": 8930 }, { "epoch": 0.26345274952086095, "grad_norm": 3.7611569147984527, "learning_rate": 6.585995085995087e-06, "loss": 1.5591, "step": 8935 }, { "epoch": 0.2636001769128704, "grad_norm": 3.9826952223133083, "learning_rate": 6.58968058968059e-06, "loss": 1.5564, "step": 8940 }, { "epoch": 0.26374760430487987, "grad_norm": 3.879658910584593, "learning_rate": 6.5933660933660935e-06, "loss": 1.598, "step": 8945 }, { "epoch": 0.26389503169688927, "grad_norm": 3.6343856385259254, "learning_rate": 6.5970515970515966e-06, "loss": 1.5305, "step": 8950 }, { "epoch": 0.26404245908889873, "grad_norm": 3.7004069916511537, "learning_rate": 6.6007371007371004e-06, "loss": 1.513, "step": 8955 }, { "epoch": 0.26418988648090813, "grad_norm": 3.812683474848389, "learning_rate": 6.604422604422604e-06, "loss": 1.5496, "step": 8960 }, { "epoch": 0.2643373138729176, "grad_norm": 3.6598201624792717, "learning_rate": 6.608108108108108e-06, "loss": 1.5224, "step": 8965 }, { "epoch": 0.26448474126492705, "grad_norm": 4.095147924380726, "learning_rate": 6.611793611793612e-06, "loss": 1.5528, "step": 8970 }, { "epoch": 0.26463216865693645, "grad_norm": 4.483436567220779, "learning_rate": 6.615479115479116e-06, "loss": 1.5335, "step": 8975 }, { "epoch": 0.2647795960489459, "grad_norm": 3.6993571037124497, "learning_rate": 6.61916461916462e-06, "loss": 1.5623, "step": 8980 }, { "epoch": 0.2649270234409553, "grad_norm": 3.9174090515319158, "learning_rate": 6.622850122850124e-06, "loss": 1.5277, "step": 8985 }, { "epoch": 0.26507445083296477, "grad_norm": 3.781186080242722, "learning_rate": 6.626535626535627e-06, "loss": 1.5101, "step": 8990 }, { "epoch": 0.26522187822497423, "grad_norm": 3.807278688562436, "learning_rate": 6.6302211302211305e-06, "loss": 1.4887, "step": 8995 }, { "epoch": 0.26536930561698363, "grad_norm": 3.8606492891945625, "learning_rate": 6.633906633906634e-06, "loss": 1.5534, "step": 9000 }, { "epoch": 0.26536930561698363, "eval_loss": 1.2464121580123901, "eval_runtime": 4.239, "eval_samples_per_second": 93.417, "eval_steps_per_second": 3.067, "step": 9000 }, { "epoch": 0.2655167330089931, "grad_norm": 3.7192716486182453, "learning_rate": 6.637592137592137e-06, "loss": 1.4769, "step": 9005 }, { "epoch": 0.2656641604010025, "grad_norm": 3.784363426291659, "learning_rate": 6.641277641277641e-06, "loss": 1.533, "step": 9010 }, { "epoch": 0.26581158779301195, "grad_norm": 3.830271236760314, "learning_rate": 6.644963144963145e-06, "loss": 1.5387, "step": 9015 }, { "epoch": 0.26595901518502135, "grad_norm": 3.860240654674207, "learning_rate": 6.648648648648649e-06, "loss": 1.5659, "step": 9020 }, { "epoch": 0.2661064425770308, "grad_norm": 3.9618935324337365, "learning_rate": 6.652334152334152e-06, "loss": 1.5764, "step": 9025 }, { "epoch": 0.26625386996904027, "grad_norm": 3.8218286689402676, "learning_rate": 6.656019656019656e-06, "loss": 1.5092, "step": 9030 }, { "epoch": 0.26640129736104967, "grad_norm": 3.931657557966806, "learning_rate": 6.65970515970516e-06, "loss": 1.5315, "step": 9035 }, { "epoch": 0.26654872475305913, "grad_norm": 3.9515902964817515, "learning_rate": 6.663390663390664e-06, "loss": 1.5825, "step": 9040 }, { "epoch": 0.26669615214506853, "grad_norm": 3.7555770130298964, "learning_rate": 6.6670761670761675e-06, "loss": 1.5223, "step": 9045 }, { "epoch": 0.266843579537078, "grad_norm": 3.829703636524505, "learning_rate": 6.670761670761671e-06, "loss": 1.541, "step": 9050 }, { "epoch": 0.26699100692908745, "grad_norm": 3.8951697332207913, "learning_rate": 6.674447174447175e-06, "loss": 1.5302, "step": 9055 }, { "epoch": 0.26713843432109685, "grad_norm": 3.87300507217203, "learning_rate": 6.678132678132678e-06, "loss": 1.5219, "step": 9060 }, { "epoch": 0.2672858617131063, "grad_norm": 4.2117418715237225, "learning_rate": 6.681818181818182e-06, "loss": 1.5857, "step": 9065 }, { "epoch": 0.2674332891051157, "grad_norm": 3.8474341441000797, "learning_rate": 6.685503685503686e-06, "loss": 1.6009, "step": 9070 }, { "epoch": 0.26758071649712517, "grad_norm": 3.9278489499544307, "learning_rate": 6.689189189189189e-06, "loss": 1.5572, "step": 9075 }, { "epoch": 0.26772814388913463, "grad_norm": 3.8789079035480696, "learning_rate": 6.692874692874693e-06, "loss": 1.4915, "step": 9080 }, { "epoch": 0.26787557128114403, "grad_norm": 4.012567345752683, "learning_rate": 6.696560196560197e-06, "loss": 1.524, "step": 9085 }, { "epoch": 0.2680229986731535, "grad_norm": 4.196784715533326, "learning_rate": 6.7002457002457005e-06, "loss": 1.5253, "step": 9090 }, { "epoch": 0.2681704260651629, "grad_norm": 3.8640686355235636, "learning_rate": 6.703931203931204e-06, "loss": 1.5117, "step": 9095 }, { "epoch": 0.26831785345717235, "grad_norm": 3.827507228579623, "learning_rate": 6.707616707616707e-06, "loss": 1.5031, "step": 9100 }, { "epoch": 0.26846528084918175, "grad_norm": 3.8236410526260216, "learning_rate": 6.711302211302211e-06, "loss": 1.5658, "step": 9105 }, { "epoch": 0.2686127082411912, "grad_norm": 4.049421166999161, "learning_rate": 6.714987714987715e-06, "loss": 1.5648, "step": 9110 }, { "epoch": 0.26876013563320067, "grad_norm": 3.7822281291818474, "learning_rate": 6.718673218673219e-06, "loss": 1.4914, "step": 9115 }, { "epoch": 0.2689075630252101, "grad_norm": 3.9339245748881813, "learning_rate": 6.722358722358723e-06, "loss": 1.526, "step": 9120 }, { "epoch": 0.26905499041721953, "grad_norm": 5.776902478490698, "learning_rate": 6.726044226044227e-06, "loss": 1.5745, "step": 9125 }, { "epoch": 0.26920241780922893, "grad_norm": 3.9592724625720237, "learning_rate": 6.729729729729731e-06, "loss": 1.4582, "step": 9130 }, { "epoch": 0.2693498452012384, "grad_norm": 3.9951046928091096, "learning_rate": 6.733415233415234e-06, "loss": 1.5571, "step": 9135 }, { "epoch": 0.26949727259324785, "grad_norm": 3.680881217949451, "learning_rate": 6.737100737100737e-06, "loss": 1.5456, "step": 9140 }, { "epoch": 0.26964469998525725, "grad_norm": 3.762644127460639, "learning_rate": 6.7407862407862405e-06, "loss": 1.5174, "step": 9145 }, { "epoch": 0.2697921273772667, "grad_norm": 3.8521842410130773, "learning_rate": 6.744471744471744e-06, "loss": 1.4888, "step": 9150 }, { "epoch": 0.2699395547692761, "grad_norm": 11.150691974574901, "learning_rate": 6.748157248157248e-06, "loss": 1.4666, "step": 9155 }, { "epoch": 0.27008698216128557, "grad_norm": 3.975199658780264, "learning_rate": 6.751842751842752e-06, "loss": 1.5167, "step": 9160 }, { "epoch": 0.27023440955329503, "grad_norm": 3.802573549216154, "learning_rate": 6.755528255528256e-06, "loss": 1.5591, "step": 9165 }, { "epoch": 0.27038183694530443, "grad_norm": 3.7947180629220187, "learning_rate": 6.759213759213759e-06, "loss": 1.5456, "step": 9170 }, { "epoch": 0.2705292643373139, "grad_norm": 4.073165099379776, "learning_rate": 6.762899262899263e-06, "loss": 1.5459, "step": 9175 }, { "epoch": 0.2706766917293233, "grad_norm": 3.977781232212813, "learning_rate": 6.766584766584767e-06, "loss": 1.5464, "step": 9180 }, { "epoch": 0.27082411912133275, "grad_norm": 3.825824040951039, "learning_rate": 6.770270270270271e-06, "loss": 1.5551, "step": 9185 }, { "epoch": 0.27097154651334215, "grad_norm": 10.804503286251226, "learning_rate": 6.7739557739557744e-06, "loss": 1.526, "step": 9190 }, { "epoch": 0.2711189739053516, "grad_norm": 4.143587963017745, "learning_rate": 6.777641277641278e-06, "loss": 1.5821, "step": 9195 }, { "epoch": 0.27126640129736107, "grad_norm": 3.8776768062489535, "learning_rate": 6.781326781326782e-06, "loss": 1.4889, "step": 9200 }, { "epoch": 0.2714138286893705, "grad_norm": 3.9125590582023455, "learning_rate": 6.785012285012285e-06, "loss": 1.5655, "step": 9205 }, { "epoch": 0.27156125608137993, "grad_norm": 3.916934180323149, "learning_rate": 6.788697788697788e-06, "loss": 1.4909, "step": 9210 }, { "epoch": 0.27170868347338933, "grad_norm": 3.7659192733527247, "learning_rate": 6.792383292383292e-06, "loss": 1.5538, "step": 9215 }, { "epoch": 0.2718561108653988, "grad_norm": 6.78433740582177, "learning_rate": 6.796068796068796e-06, "loss": 1.5115, "step": 9220 }, { "epoch": 0.27200353825740825, "grad_norm": 3.6635796912528047, "learning_rate": 6.7997542997543e-06, "loss": 1.5251, "step": 9225 }, { "epoch": 0.27215096564941765, "grad_norm": 3.8308299682184153, "learning_rate": 6.803439803439804e-06, "loss": 1.4828, "step": 9230 }, { "epoch": 0.2722983930414271, "grad_norm": 3.918641940129232, "learning_rate": 6.8071253071253075e-06, "loss": 1.5321, "step": 9235 }, { "epoch": 0.2724458204334365, "grad_norm": 3.8094422112519144, "learning_rate": 6.810810810810811e-06, "loss": 1.538, "step": 9240 }, { "epoch": 0.272593247825446, "grad_norm": 3.8410818130826248, "learning_rate": 6.814496314496314e-06, "loss": 1.4927, "step": 9245 }, { "epoch": 0.27274067521745543, "grad_norm": 3.7499566298152502, "learning_rate": 6.818181818181818e-06, "loss": 1.5605, "step": 9250 }, { "epoch": 0.27288810260946483, "grad_norm": 3.971984600975859, "learning_rate": 6.821867321867322e-06, "loss": 1.5619, "step": 9255 }, { "epoch": 0.2730355300014743, "grad_norm": 4.099595233910412, "learning_rate": 6.825552825552826e-06, "loss": 1.5004, "step": 9260 }, { "epoch": 0.2731829573934837, "grad_norm": 3.955373954462639, "learning_rate": 6.82923832923833e-06, "loss": 1.5381, "step": 9265 }, { "epoch": 0.27333038478549315, "grad_norm": 3.8125231662430226, "learning_rate": 6.832923832923834e-06, "loss": 1.5755, "step": 9270 }, { "epoch": 0.27347781217750256, "grad_norm": 3.875494897997426, "learning_rate": 6.836609336609337e-06, "loss": 1.5416, "step": 9275 }, { "epoch": 0.273625239569512, "grad_norm": 3.8533268226996205, "learning_rate": 6.84029484029484e-06, "loss": 1.5372, "step": 9280 }, { "epoch": 0.27377266696152147, "grad_norm": 3.8448174842130296, "learning_rate": 6.843980343980344e-06, "loss": 1.5382, "step": 9285 }, { "epoch": 0.2739200943535309, "grad_norm": 3.801104247764228, "learning_rate": 6.8476658476658475e-06, "loss": 1.5661, "step": 9290 }, { "epoch": 0.27406752174554033, "grad_norm": 3.9446693093714864, "learning_rate": 6.851351351351351e-06, "loss": 1.532, "step": 9295 }, { "epoch": 0.27421494913754973, "grad_norm": 3.8430634728109, "learning_rate": 6.855036855036855e-06, "loss": 1.5784, "step": 9300 }, { "epoch": 0.2743623765295592, "grad_norm": 3.7848677508724613, "learning_rate": 6.858722358722359e-06, "loss": 1.5374, "step": 9305 }, { "epoch": 0.27450980392156865, "grad_norm": 3.7922786567471016, "learning_rate": 6.862407862407863e-06, "loss": 1.5114, "step": 9310 }, { "epoch": 0.27465723131357805, "grad_norm": 3.9362790920146247, "learning_rate": 6.866093366093366e-06, "loss": 1.515, "step": 9315 }, { "epoch": 0.2748046587055875, "grad_norm": 3.7260324692520315, "learning_rate": 6.86977886977887e-06, "loss": 1.5518, "step": 9320 }, { "epoch": 0.2749520860975969, "grad_norm": 3.642004874178236, "learning_rate": 6.873464373464374e-06, "loss": 1.551, "step": 9325 }, { "epoch": 0.2750995134896064, "grad_norm": 3.9034384189547278, "learning_rate": 6.8771498771498776e-06, "loss": 1.5971, "step": 9330 }, { "epoch": 0.27524694088161583, "grad_norm": 3.760656619099615, "learning_rate": 6.8808353808353814e-06, "loss": 1.4944, "step": 9335 }, { "epoch": 0.27539436827362523, "grad_norm": 3.538474312185551, "learning_rate": 6.8845208845208845e-06, "loss": 1.4986, "step": 9340 }, { "epoch": 0.2755417956656347, "grad_norm": 3.5585261355622837, "learning_rate": 6.888206388206388e-06, "loss": 1.5266, "step": 9345 }, { "epoch": 0.2756892230576441, "grad_norm": 3.6221290235106105, "learning_rate": 6.891891891891892e-06, "loss": 1.5562, "step": 9350 }, { "epoch": 0.27583665044965355, "grad_norm": 3.6962488118687875, "learning_rate": 6.895577395577395e-06, "loss": 1.5716, "step": 9355 }, { "epoch": 0.27598407784166296, "grad_norm": 3.861074542920795, "learning_rate": 6.899262899262899e-06, "loss": 1.5459, "step": 9360 }, { "epoch": 0.2761315052336724, "grad_norm": 3.871413490478382, "learning_rate": 6.902948402948403e-06, "loss": 1.5093, "step": 9365 }, { "epoch": 0.2762789326256819, "grad_norm": 4.14665591373841, "learning_rate": 6.906633906633907e-06, "loss": 1.5315, "step": 9370 }, { "epoch": 0.2764263600176913, "grad_norm": 3.8181068902821234, "learning_rate": 6.910319410319411e-06, "loss": 1.5233, "step": 9375 }, { "epoch": 0.27657378740970073, "grad_norm": 3.7023262765815117, "learning_rate": 6.9140049140049145e-06, "loss": 1.5808, "step": 9380 }, { "epoch": 0.27672121480171014, "grad_norm": 3.84046379456393, "learning_rate": 6.917690417690418e-06, "loss": 1.5673, "step": 9385 }, { "epoch": 0.2768686421937196, "grad_norm": 3.813341091577275, "learning_rate": 6.921375921375921e-06, "loss": 1.5062, "step": 9390 }, { "epoch": 0.27701606958572905, "grad_norm": 3.774081819433191, "learning_rate": 6.925061425061425e-06, "loss": 1.5553, "step": 9395 }, { "epoch": 0.27716349697773845, "grad_norm": 3.8140002914335014, "learning_rate": 6.928746928746929e-06, "loss": 1.5521, "step": 9400 }, { "epoch": 0.2773109243697479, "grad_norm": 4.091937671430375, "learning_rate": 6.932432432432433e-06, "loss": 1.4732, "step": 9405 }, { "epoch": 0.2774583517617573, "grad_norm": 4.039162780906842, "learning_rate": 6.936117936117936e-06, "loss": 1.5208, "step": 9410 }, { "epoch": 0.2776057791537668, "grad_norm": 3.779422218474031, "learning_rate": 6.93980343980344e-06, "loss": 1.5307, "step": 9415 }, { "epoch": 0.27775320654577623, "grad_norm": 3.9661060425118335, "learning_rate": 6.943488943488944e-06, "loss": 1.5753, "step": 9420 }, { "epoch": 0.27790063393778563, "grad_norm": 3.7929684690814933, "learning_rate": 6.947174447174447e-06, "loss": 1.5857, "step": 9425 }, { "epoch": 0.2780480613297951, "grad_norm": 3.8082316439876176, "learning_rate": 6.950859950859951e-06, "loss": 1.5551, "step": 9430 }, { "epoch": 0.2781954887218045, "grad_norm": 3.860539416293014, "learning_rate": 6.9545454545454545e-06, "loss": 1.4941, "step": 9435 }, { "epoch": 0.27834291611381395, "grad_norm": 3.6792691677851956, "learning_rate": 6.958230958230958e-06, "loss": 1.5323, "step": 9440 }, { "epoch": 0.27849034350582336, "grad_norm": 4.130599586135921, "learning_rate": 6.961916461916462e-06, "loss": 1.5417, "step": 9445 }, { "epoch": 0.2786377708978328, "grad_norm": 3.9854472917191095, "learning_rate": 6.965601965601966e-06, "loss": 1.5965, "step": 9450 }, { "epoch": 0.2787851982898423, "grad_norm": 3.63664607003526, "learning_rate": 6.96928746928747e-06, "loss": 1.5888, "step": 9455 }, { "epoch": 0.2789326256818517, "grad_norm": 3.846375097282453, "learning_rate": 6.972972972972974e-06, "loss": 1.536, "step": 9460 }, { "epoch": 0.27908005307386113, "grad_norm": 3.7856505635673963, "learning_rate": 6.976658476658477e-06, "loss": 1.5584, "step": 9465 }, { "epoch": 0.27922748046587054, "grad_norm": 3.7056702356978155, "learning_rate": 6.980343980343981e-06, "loss": 1.5743, "step": 9470 }, { "epoch": 0.27937490785788, "grad_norm": 3.6129793828237813, "learning_rate": 6.984029484029484e-06, "loss": 1.5186, "step": 9475 }, { "epoch": 0.27952233524988945, "grad_norm": 4.159047717735024, "learning_rate": 6.987714987714988e-06, "loss": 1.5851, "step": 9480 }, { "epoch": 0.27966976264189886, "grad_norm": 3.6431468814055266, "learning_rate": 6.9914004914004914e-06, "loss": 1.5336, "step": 9485 }, { "epoch": 0.2798171900339083, "grad_norm": 3.8366335994317713, "learning_rate": 6.995085995085995e-06, "loss": 1.5066, "step": 9490 }, { "epoch": 0.2799646174259177, "grad_norm": 3.8287281253108065, "learning_rate": 6.998771498771499e-06, "loss": 1.6189, "step": 9495 }, { "epoch": 0.2801120448179272, "grad_norm": 3.981589938063094, "learning_rate": 7.002457002457002e-06, "loss": 1.5335, "step": 9500 }, { "epoch": 0.2801120448179272, "eval_loss": 1.2466639280319214, "eval_runtime": 4.1466, "eval_samples_per_second": 95.5, "eval_steps_per_second": 3.135, "step": 9500 }, { "epoch": 0.28025947220993663, "grad_norm": 3.614691415560362, "learning_rate": 7.006142506142506e-06, "loss": 1.5193, "step": 9505 }, { "epoch": 0.28040689960194604, "grad_norm": 3.669047571922322, "learning_rate": 7.00982800982801e-06, "loss": 1.5133, "step": 9510 }, { "epoch": 0.2805543269939555, "grad_norm": 4.113687536164276, "learning_rate": 7.013513513513514e-06, "loss": 1.5414, "step": 9515 }, { "epoch": 0.2807017543859649, "grad_norm": 3.7123714511934205, "learning_rate": 7.017199017199018e-06, "loss": 1.5084, "step": 9520 }, { "epoch": 0.28084918177797435, "grad_norm": 3.8601591773560635, "learning_rate": 7.0208845208845215e-06, "loss": 1.5292, "step": 9525 }, { "epoch": 0.28099660916998376, "grad_norm": 3.715220534665049, "learning_rate": 7.024570024570025e-06, "loss": 1.5716, "step": 9530 }, { "epoch": 0.2811440365619932, "grad_norm": 3.838759284503014, "learning_rate": 7.028255528255528e-06, "loss": 1.567, "step": 9535 }, { "epoch": 0.2812914639540027, "grad_norm": 3.754059936976869, "learning_rate": 7.031941031941031e-06, "loss": 1.5436, "step": 9540 }, { "epoch": 0.2814388913460121, "grad_norm": 3.7270130458021535, "learning_rate": 7.035626535626535e-06, "loss": 1.5601, "step": 9545 }, { "epoch": 0.28158631873802153, "grad_norm": 3.7096822567073193, "learning_rate": 7.039312039312039e-06, "loss": 1.532, "step": 9550 }, { "epoch": 0.28173374613003094, "grad_norm": 3.6552080319616858, "learning_rate": 7.042997542997543e-06, "loss": 1.5786, "step": 9555 }, { "epoch": 0.2818811735220404, "grad_norm": 3.7717154236178128, "learning_rate": 7.046683046683047e-06, "loss": 1.4985, "step": 9560 }, { "epoch": 0.28202860091404985, "grad_norm": 3.690277942498122, "learning_rate": 7.050368550368551e-06, "loss": 1.4921, "step": 9565 }, { "epoch": 0.28217602830605926, "grad_norm": 3.6001418211012526, "learning_rate": 7.054054054054055e-06, "loss": 1.5646, "step": 9570 }, { "epoch": 0.2823234556980687, "grad_norm": 3.9121178001472847, "learning_rate": 7.057739557739558e-06, "loss": 1.4928, "step": 9575 }, { "epoch": 0.2824708830900781, "grad_norm": 3.6405046194160025, "learning_rate": 7.0614250614250615e-06, "loss": 1.5134, "step": 9580 }, { "epoch": 0.2826183104820876, "grad_norm": 3.595215913902209, "learning_rate": 7.065110565110565e-06, "loss": 1.5357, "step": 9585 }, { "epoch": 0.28276573787409703, "grad_norm": 3.6620775562617665, "learning_rate": 7.068796068796069e-06, "loss": 1.5522, "step": 9590 }, { "epoch": 0.28291316526610644, "grad_norm": 3.8134760244204386, "learning_rate": 7.072481572481573e-06, "loss": 1.5108, "step": 9595 }, { "epoch": 0.2830605926581159, "grad_norm": 4.486909946874239, "learning_rate": 7.076167076167077e-06, "loss": 1.5345, "step": 9600 }, { "epoch": 0.2832080200501253, "grad_norm": 3.945661892609033, "learning_rate": 7.079852579852581e-06, "loss": 1.5427, "step": 9605 }, { "epoch": 0.28335544744213476, "grad_norm": 3.801763829845078, "learning_rate": 7.083538083538083e-06, "loss": 1.5746, "step": 9610 }, { "epoch": 0.28350287483414416, "grad_norm": 3.942888816958758, "learning_rate": 7.087223587223587e-06, "loss": 1.5793, "step": 9615 }, { "epoch": 0.2836503022261536, "grad_norm": 3.875808453231579, "learning_rate": 7.090909090909091e-06, "loss": 1.5763, "step": 9620 }, { "epoch": 0.2837977296181631, "grad_norm": 3.942883094447388, "learning_rate": 7.0945945945945946e-06, "loss": 1.5823, "step": 9625 }, { "epoch": 0.2839451570101725, "grad_norm": 3.708568049701601, "learning_rate": 7.0982800982800984e-06, "loss": 1.5323, "step": 9630 }, { "epoch": 0.28409258440218194, "grad_norm": 3.8405828749324624, "learning_rate": 7.101965601965602e-06, "loss": 1.5676, "step": 9635 }, { "epoch": 0.28424001179419134, "grad_norm": 4.53833943311759, "learning_rate": 7.105651105651106e-06, "loss": 1.4922, "step": 9640 }, { "epoch": 0.2843874391862008, "grad_norm": 3.570184752932487, "learning_rate": 7.109336609336609e-06, "loss": 1.522, "step": 9645 }, { "epoch": 0.28453486657821025, "grad_norm": 3.964034807551269, "learning_rate": 7.113022113022113e-06, "loss": 1.5358, "step": 9650 }, { "epoch": 0.28468229397021966, "grad_norm": 3.781527066491222, "learning_rate": 7.116707616707617e-06, "loss": 1.5517, "step": 9655 }, { "epoch": 0.2848297213622291, "grad_norm": 3.8885629643632975, "learning_rate": 7.120393120393121e-06, "loss": 1.5247, "step": 9660 }, { "epoch": 0.2849771487542385, "grad_norm": 3.663819525774223, "learning_rate": 7.124078624078625e-06, "loss": 1.5181, "step": 9665 }, { "epoch": 0.285124576146248, "grad_norm": 3.5472883554442225, "learning_rate": 7.1277641277641285e-06, "loss": 1.471, "step": 9670 }, { "epoch": 0.28527200353825743, "grad_norm": 3.6988130009361853, "learning_rate": 7.1314496314496315e-06, "loss": 1.5675, "step": 9675 }, { "epoch": 0.28541943093026684, "grad_norm": 3.877738645280619, "learning_rate": 7.135135135135135e-06, "loss": 1.5443, "step": 9680 }, { "epoch": 0.2855668583222763, "grad_norm": 3.6776967966533576, "learning_rate": 7.138820638820638e-06, "loss": 1.5344, "step": 9685 }, { "epoch": 0.2857142857142857, "grad_norm": 3.7756441739351576, "learning_rate": 7.142506142506142e-06, "loss": 1.5535, "step": 9690 }, { "epoch": 0.28586171310629516, "grad_norm": 3.7312228155437652, "learning_rate": 7.146191646191646e-06, "loss": 1.5741, "step": 9695 }, { "epoch": 0.28600914049830456, "grad_norm": 3.954106562684888, "learning_rate": 7.14987714987715e-06, "loss": 1.5522, "step": 9700 }, { "epoch": 0.286156567890314, "grad_norm": 3.9700565314851914, "learning_rate": 7.153562653562654e-06, "loss": 1.5321, "step": 9705 }, { "epoch": 0.2863039952823235, "grad_norm": 3.7211112489660048, "learning_rate": 7.157248157248158e-06, "loss": 1.5551, "step": 9710 }, { "epoch": 0.2864514226743329, "grad_norm": 6.504624506814422, "learning_rate": 7.160933660933662e-06, "loss": 1.587, "step": 9715 }, { "epoch": 0.28659885006634234, "grad_norm": 3.915113544015173, "learning_rate": 7.164619164619165e-06, "loss": 1.5409, "step": 9720 }, { "epoch": 0.28674627745835174, "grad_norm": 3.7580712287494635, "learning_rate": 7.1683046683046685e-06, "loss": 1.5645, "step": 9725 }, { "epoch": 0.2868937048503612, "grad_norm": 3.6638209349797597, "learning_rate": 7.171990171990172e-06, "loss": 1.5428, "step": 9730 }, { "epoch": 0.28704113224237066, "grad_norm": 3.8667108993856476, "learning_rate": 7.175675675675676e-06, "loss": 1.5161, "step": 9735 }, { "epoch": 0.28718855963438006, "grad_norm": 3.8150556189063876, "learning_rate": 7.179361179361179e-06, "loss": 1.5784, "step": 9740 }, { "epoch": 0.2873359870263895, "grad_norm": 3.9350112258893755, "learning_rate": 7.183046683046683e-06, "loss": 1.548, "step": 9745 }, { "epoch": 0.2874834144183989, "grad_norm": 3.669660166941989, "learning_rate": 7.186732186732187e-06, "loss": 1.5547, "step": 9750 }, { "epoch": 0.2876308418104084, "grad_norm": 3.850967951420313, "learning_rate": 7.19041769041769e-06, "loss": 1.5654, "step": 9755 }, { "epoch": 0.28777826920241784, "grad_norm": 3.825352802782395, "learning_rate": 7.194103194103194e-06, "loss": 1.5692, "step": 9760 }, { "epoch": 0.28792569659442724, "grad_norm": 3.8944921346677615, "learning_rate": 7.197788697788698e-06, "loss": 1.5328, "step": 9765 }, { "epoch": 0.2880731239864367, "grad_norm": 3.8342534828463446, "learning_rate": 7.2014742014742016e-06, "loss": 1.5419, "step": 9770 }, { "epoch": 0.2882205513784461, "grad_norm": 3.7559226533558423, "learning_rate": 7.205159705159705e-06, "loss": 1.5921, "step": 9775 }, { "epoch": 0.28836797877045556, "grad_norm": 3.569032356475187, "learning_rate": 7.208845208845209e-06, "loss": 1.5687, "step": 9780 }, { "epoch": 0.28851540616246496, "grad_norm": 3.799993734247008, "learning_rate": 7.212530712530713e-06, "loss": 1.5536, "step": 9785 }, { "epoch": 0.2886628335544744, "grad_norm": 4.056487910948587, "learning_rate": 7.216216216216217e-06, "loss": 1.6124, "step": 9790 }, { "epoch": 0.2888102609464839, "grad_norm": 3.6318920418004903, "learning_rate": 7.21990171990172e-06, "loss": 1.4919, "step": 9795 }, { "epoch": 0.2889576883384933, "grad_norm": 3.787595616816149, "learning_rate": 7.223587223587224e-06, "loss": 1.5341, "step": 9800 }, { "epoch": 0.28910511573050274, "grad_norm": 3.6992092025959864, "learning_rate": 7.227272727272728e-06, "loss": 1.4814, "step": 9805 }, { "epoch": 0.28925254312251214, "grad_norm": 3.787341681247046, "learning_rate": 7.230958230958231e-06, "loss": 1.5218, "step": 9810 }, { "epoch": 0.2893999705145216, "grad_norm": 3.7237085613688006, "learning_rate": 7.234643734643735e-06, "loss": 1.5364, "step": 9815 }, { "epoch": 0.28954739790653106, "grad_norm": 3.751018152138054, "learning_rate": 7.2383292383292385e-06, "loss": 1.5382, "step": 9820 }, { "epoch": 0.28969482529854046, "grad_norm": 3.9183042519412323, "learning_rate": 7.242014742014742e-06, "loss": 1.5741, "step": 9825 }, { "epoch": 0.2898422526905499, "grad_norm": 3.8163895339580542, "learning_rate": 7.245700245700245e-06, "loss": 1.4841, "step": 9830 }, { "epoch": 0.2899896800825593, "grad_norm": 3.5703269187677367, "learning_rate": 7.249385749385749e-06, "loss": 1.5501, "step": 9835 }, { "epoch": 0.2901371074745688, "grad_norm": 3.72483657365098, "learning_rate": 7.253071253071253e-06, "loss": 1.5185, "step": 9840 }, { "epoch": 0.29028453486657824, "grad_norm": 4.730030517184685, "learning_rate": 7.256756756756757e-06, "loss": 1.5352, "step": 9845 }, { "epoch": 0.29043196225858764, "grad_norm": 3.9302809461258996, "learning_rate": 7.260442260442261e-06, "loss": 1.5126, "step": 9850 }, { "epoch": 0.2905793896505971, "grad_norm": 3.9145850991630584, "learning_rate": 7.264127764127765e-06, "loss": 1.5414, "step": 9855 }, { "epoch": 0.2907268170426065, "grad_norm": 3.901327587564934, "learning_rate": 7.267813267813269e-06, "loss": 1.5475, "step": 9860 }, { "epoch": 0.29087424443461596, "grad_norm": 3.6147758593532986, "learning_rate": 7.271498771498772e-06, "loss": 1.5502, "step": 9865 }, { "epoch": 0.29102167182662536, "grad_norm": 3.9421163545114184, "learning_rate": 7.2751842751842755e-06, "loss": 1.5505, "step": 9870 }, { "epoch": 0.2911690992186348, "grad_norm": 3.871923337584659, "learning_rate": 7.2788697788697785e-06, "loss": 1.5413, "step": 9875 }, { "epoch": 0.2913165266106443, "grad_norm": 3.8383595412097917, "learning_rate": 7.282555282555282e-06, "loss": 1.5972, "step": 9880 }, { "epoch": 0.2914639540026537, "grad_norm": 3.7218710841070384, "learning_rate": 7.286240786240786e-06, "loss": 1.5503, "step": 9885 }, { "epoch": 0.29161138139466314, "grad_norm": 3.656133588202333, "learning_rate": 7.28992628992629e-06, "loss": 1.5764, "step": 9890 }, { "epoch": 0.29175880878667254, "grad_norm": 3.79071346475504, "learning_rate": 7.293611793611794e-06, "loss": 1.5365, "step": 9895 }, { "epoch": 0.291906236178682, "grad_norm": 3.94540226464029, "learning_rate": 7.297297297297298e-06, "loss": 1.5592, "step": 9900 }, { "epoch": 0.29205366357069146, "grad_norm": 3.9639423322920644, "learning_rate": 7.300982800982801e-06, "loss": 1.5468, "step": 9905 }, { "epoch": 0.29220109096270086, "grad_norm": 3.6476369332304706, "learning_rate": 7.304668304668305e-06, "loss": 1.5469, "step": 9910 }, { "epoch": 0.2923485183547103, "grad_norm": 3.844111360785013, "learning_rate": 7.3083538083538085e-06, "loss": 1.4741, "step": 9915 }, { "epoch": 0.2924959457467197, "grad_norm": 4.058458417182123, "learning_rate": 7.312039312039312e-06, "loss": 1.5838, "step": 9920 }, { "epoch": 0.2926433731387292, "grad_norm": 3.8932490114593694, "learning_rate": 7.315724815724816e-06, "loss": 1.5921, "step": 9925 }, { "epoch": 0.29279080053073864, "grad_norm": 3.687667932506368, "learning_rate": 7.31941031941032e-06, "loss": 1.5834, "step": 9930 }, { "epoch": 0.29293822792274804, "grad_norm": 3.731418646075217, "learning_rate": 7.323095823095824e-06, "loss": 1.5521, "step": 9935 }, { "epoch": 0.2930856553147575, "grad_norm": 4.001996207847821, "learning_rate": 7.326781326781327e-06, "loss": 1.5586, "step": 9940 }, { "epoch": 0.2932330827067669, "grad_norm": 3.4353776846598194, "learning_rate": 7.33046683046683e-06, "loss": 1.5149, "step": 9945 }, { "epoch": 0.29338051009877636, "grad_norm": 3.78806711053689, "learning_rate": 7.334152334152334e-06, "loss": 1.5573, "step": 9950 }, { "epoch": 0.29352793749078576, "grad_norm": 3.9247289461184134, "learning_rate": 7.337837837837838e-06, "loss": 1.5712, "step": 9955 }, { "epoch": 0.2936753648827952, "grad_norm": 3.622271805464399, "learning_rate": 7.341523341523342e-06, "loss": 1.5731, "step": 9960 }, { "epoch": 0.2938227922748047, "grad_norm": 3.611097483891908, "learning_rate": 7.3452088452088455e-06, "loss": 1.4975, "step": 9965 }, { "epoch": 0.2939702196668141, "grad_norm": 3.750433269539275, "learning_rate": 7.348894348894349e-06, "loss": 1.4912, "step": 9970 }, { "epoch": 0.29411764705882354, "grad_norm": 3.863904370141927, "learning_rate": 7.352579852579852e-06, "loss": 1.5618, "step": 9975 }, { "epoch": 0.29426507445083294, "grad_norm": 3.7729614306052337, "learning_rate": 7.356265356265356e-06, "loss": 1.5816, "step": 9980 }, { "epoch": 0.2944125018428424, "grad_norm": 3.655226241939143, "learning_rate": 7.35995085995086e-06, "loss": 1.4964, "step": 9985 }, { "epoch": 0.29455992923485186, "grad_norm": 3.6901861287593176, "learning_rate": 7.363636363636364e-06, "loss": 1.5249, "step": 9990 }, { "epoch": 0.29470735662686126, "grad_norm": 4.044924320595459, "learning_rate": 7.367321867321868e-06, "loss": 1.5226, "step": 9995 }, { "epoch": 0.2948547840188707, "grad_norm": 4.356598605395776, "learning_rate": 7.371007371007372e-06, "loss": 1.5538, "step": 10000 }, { "epoch": 0.2948547840188707, "eval_loss": 1.24652898311615, "eval_runtime": 4.2433, "eval_samples_per_second": 93.324, "eval_steps_per_second": 3.064, "step": 10000 }, { "epoch": 0.2950022114108801, "grad_norm": 3.6148198467694406, "learning_rate": 7.3746928746928756e-06, "loss": 1.5011, "step": 10005 }, { "epoch": 0.2951496388028896, "grad_norm": 3.611513824394577, "learning_rate": 7.378378378378379e-06, "loss": 1.567, "step": 10010 }, { "epoch": 0.29529706619489904, "grad_norm": 3.674939154243048, "learning_rate": 7.382063882063882e-06, "loss": 1.5594, "step": 10015 }, { "epoch": 0.29544449358690844, "grad_norm": 3.5643922352869266, "learning_rate": 7.3857493857493855e-06, "loss": 1.5455, "step": 10020 }, { "epoch": 0.2955919209789179, "grad_norm": 4.098507473024187, "learning_rate": 7.389434889434889e-06, "loss": 1.5265, "step": 10025 }, { "epoch": 0.2957393483709273, "grad_norm": 3.570498569102528, "learning_rate": 7.393120393120393e-06, "loss": 1.5531, "step": 10030 }, { "epoch": 0.29588677576293676, "grad_norm": 3.8831565517341975, "learning_rate": 7.396805896805897e-06, "loss": 1.5468, "step": 10035 }, { "epoch": 0.29603420315494616, "grad_norm": 3.8391389381982033, "learning_rate": 7.400491400491401e-06, "loss": 1.5426, "step": 10040 }, { "epoch": 0.2961816305469556, "grad_norm": 3.6762557273300756, "learning_rate": 7.404176904176905e-06, "loss": 1.5708, "step": 10045 }, { "epoch": 0.2963290579389651, "grad_norm": 3.5348837241994144, "learning_rate": 7.407862407862408e-06, "loss": 1.542, "step": 10050 }, { "epoch": 0.2964764853309745, "grad_norm": 3.747048601858641, "learning_rate": 7.411547911547912e-06, "loss": 1.5472, "step": 10055 }, { "epoch": 0.29662391272298394, "grad_norm": 3.6238634183600924, "learning_rate": 7.4152334152334155e-06, "loss": 1.5285, "step": 10060 }, { "epoch": 0.29677134011499334, "grad_norm": 3.781184956787328, "learning_rate": 7.418918918918919e-06, "loss": 1.5937, "step": 10065 }, { "epoch": 0.2969187675070028, "grad_norm": 3.5281143803258734, "learning_rate": 7.422604422604423e-06, "loss": 1.5885, "step": 10070 }, { "epoch": 0.29706619489901226, "grad_norm": 3.7954597809349964, "learning_rate": 7.426289926289926e-06, "loss": 1.5105, "step": 10075 }, { "epoch": 0.29721362229102166, "grad_norm": 3.655466524396302, "learning_rate": 7.42997542997543e-06, "loss": 1.5332, "step": 10080 }, { "epoch": 0.2973610496830311, "grad_norm": 3.7596052799390045, "learning_rate": 7.433660933660933e-06, "loss": 1.5235, "step": 10085 }, { "epoch": 0.2975084770750405, "grad_norm": 3.5025233922006773, "learning_rate": 7.437346437346437e-06, "loss": 1.5477, "step": 10090 }, { "epoch": 0.29765590446705, "grad_norm": 3.6323306582884505, "learning_rate": 7.441031941031941e-06, "loss": 1.5622, "step": 10095 }, { "epoch": 0.29780333185905944, "grad_norm": 3.808390786710831, "learning_rate": 7.444717444717445e-06, "loss": 1.5678, "step": 10100 }, { "epoch": 0.29795075925106884, "grad_norm": 3.5784590249674615, "learning_rate": 7.448402948402949e-06, "loss": 1.5092, "step": 10105 }, { "epoch": 0.2980981866430783, "grad_norm": 3.814342975812633, "learning_rate": 7.4520884520884525e-06, "loss": 1.558, "step": 10110 }, { "epoch": 0.2982456140350877, "grad_norm": 3.569439108562482, "learning_rate": 7.455773955773956e-06, "loss": 1.4991, "step": 10115 }, { "epoch": 0.29839304142709716, "grad_norm": 3.561875729207017, "learning_rate": 7.45945945945946e-06, "loss": 1.5496, "step": 10120 }, { "epoch": 0.29854046881910656, "grad_norm": 3.6641198031618667, "learning_rate": 7.463144963144963e-06, "loss": 1.5439, "step": 10125 }, { "epoch": 0.298687896211116, "grad_norm": 3.719722415214812, "learning_rate": 7.466830466830467e-06, "loss": 1.5332, "step": 10130 }, { "epoch": 0.2988353236031255, "grad_norm": 3.743116147401075, "learning_rate": 7.470515970515971e-06, "loss": 1.5013, "step": 10135 }, { "epoch": 0.2989827509951349, "grad_norm": 3.7951546562118477, "learning_rate": 7.474201474201475e-06, "loss": 1.5612, "step": 10140 }, { "epoch": 0.29913017838714434, "grad_norm": 3.5651910993006926, "learning_rate": 7.477886977886978e-06, "loss": 1.5312, "step": 10145 }, { "epoch": 0.29927760577915374, "grad_norm": 3.824548920786256, "learning_rate": 7.481572481572482e-06, "loss": 1.5221, "step": 10150 }, { "epoch": 0.2994250331711632, "grad_norm": 3.5695628708945355, "learning_rate": 7.485257985257986e-06, "loss": 1.5183, "step": 10155 }, { "epoch": 0.29957246056317266, "grad_norm": 3.739519078439491, "learning_rate": 7.488943488943489e-06, "loss": 1.5028, "step": 10160 }, { "epoch": 0.29971988795518206, "grad_norm": 3.9756994615549393, "learning_rate": 7.4926289926289925e-06, "loss": 1.5856, "step": 10165 }, { "epoch": 0.2998673153471915, "grad_norm": 3.65651430066635, "learning_rate": 7.496314496314496e-06, "loss": 1.56, "step": 10170 }, { "epoch": 0.3000147427392009, "grad_norm": 3.671282472389185, "learning_rate": 7.5e-06, "loss": 1.4945, "step": 10175 }, { "epoch": 0.3001621701312104, "grad_norm": 3.5875253415526123, "learning_rate": 7.499999944825984e-06, "loss": 1.5605, "step": 10180 }, { "epoch": 0.30030959752321984, "grad_norm": 3.7742243011958188, "learning_rate": 7.499999779303937e-06, "loss": 1.5036, "step": 10185 }, { "epoch": 0.30045702491522924, "grad_norm": 3.7616362553100875, "learning_rate": 7.499999503433865e-06, "loss": 1.5755, "step": 10190 }, { "epoch": 0.3006044523072387, "grad_norm": 3.5470489981293865, "learning_rate": 7.499999117215776e-06, "loss": 1.5291, "step": 10195 }, { "epoch": 0.3007518796992481, "grad_norm": 3.6954768227992925, "learning_rate": 7.499998620649681e-06, "loss": 1.5473, "step": 10200 }, { "epoch": 0.30089930709125756, "grad_norm": 3.5423736336907696, "learning_rate": 7.4999980137355936e-06, "loss": 1.5207, "step": 10205 }, { "epoch": 0.30104673448326696, "grad_norm": 3.5457099397635914, "learning_rate": 7.4999972964735335e-06, "loss": 1.5049, "step": 10210 }, { "epoch": 0.3011941618752764, "grad_norm": 3.603102489819855, "learning_rate": 7.49999646886352e-06, "loss": 1.5213, "step": 10215 }, { "epoch": 0.3013415892672859, "grad_norm": 3.791844999161912, "learning_rate": 7.499995530905579e-06, "loss": 1.5741, "step": 10220 }, { "epoch": 0.3014890166592953, "grad_norm": 3.718776266284609, "learning_rate": 7.499994482599737e-06, "loss": 1.5908, "step": 10225 }, { "epoch": 0.30163644405130474, "grad_norm": 3.824555862864395, "learning_rate": 7.4999933239460255e-06, "loss": 1.5157, "step": 10230 }, { "epoch": 0.30178387144331414, "grad_norm": 3.9390911018999537, "learning_rate": 7.4999920549444785e-06, "loss": 1.5503, "step": 10235 }, { "epoch": 0.3019312988353236, "grad_norm": 3.68828393538967, "learning_rate": 7.499990675595133e-06, "loss": 1.5511, "step": 10240 }, { "epoch": 0.30207872622733306, "grad_norm": 3.9238283749705563, "learning_rate": 7.49998918589803e-06, "loss": 1.5852, "step": 10245 }, { "epoch": 0.30222615361934246, "grad_norm": 3.5436491330858857, "learning_rate": 7.499987585853213e-06, "loss": 1.5578, "step": 10250 }, { "epoch": 0.3023735810113519, "grad_norm": 3.7527782532191103, "learning_rate": 7.499985875460731e-06, "loss": 1.5469, "step": 10255 }, { "epoch": 0.3025210084033613, "grad_norm": 3.613260932674915, "learning_rate": 7.49998405472063e-06, "loss": 1.5188, "step": 10260 }, { "epoch": 0.3026684357953708, "grad_norm": 3.3348513493547842, "learning_rate": 7.499982123632967e-06, "loss": 1.5041, "step": 10265 }, { "epoch": 0.30281586318738024, "grad_norm": 3.843699918064759, "learning_rate": 7.499980082197798e-06, "loss": 1.5661, "step": 10270 }, { "epoch": 0.30296329057938964, "grad_norm": 3.536938448410628, "learning_rate": 7.499977930415184e-06, "loss": 1.4986, "step": 10275 }, { "epoch": 0.3031107179713991, "grad_norm": 3.9226933289898094, "learning_rate": 7.499975668285185e-06, "loss": 1.5258, "step": 10280 }, { "epoch": 0.3032581453634085, "grad_norm": 3.4818014317726176, "learning_rate": 7.4999732958078725e-06, "loss": 1.5057, "step": 10285 }, { "epoch": 0.30340557275541796, "grad_norm": 3.668037952390132, "learning_rate": 7.499970812983313e-06, "loss": 1.5503, "step": 10290 }, { "epoch": 0.30355300014742737, "grad_norm": 3.7113772616814424, "learning_rate": 7.499968219811579e-06, "loss": 1.5572, "step": 10295 }, { "epoch": 0.3037004275394368, "grad_norm": 3.5010931407507377, "learning_rate": 7.499965516292751e-06, "loss": 1.5212, "step": 10300 }, { "epoch": 0.3038478549314463, "grad_norm": 3.7064497913594896, "learning_rate": 7.499962702426903e-06, "loss": 1.5301, "step": 10305 }, { "epoch": 0.3039952823234557, "grad_norm": 3.6102271526560195, "learning_rate": 7.499959778214121e-06, "loss": 1.5335, "step": 10310 }, { "epoch": 0.30414270971546514, "grad_norm": 3.4950645514282423, "learning_rate": 7.499956743654491e-06, "loss": 1.5126, "step": 10315 }, { "epoch": 0.30429013710747455, "grad_norm": 3.8502583765481955, "learning_rate": 7.499953598748101e-06, "loss": 1.5253, "step": 10320 }, { "epoch": 0.304437564499484, "grad_norm": 3.554201528429542, "learning_rate": 7.499950343495046e-06, "loss": 1.5456, "step": 10325 }, { "epoch": 0.30458499189149346, "grad_norm": 4.097836002485635, "learning_rate": 7.4999469778954185e-06, "loss": 1.5419, "step": 10330 }, { "epoch": 0.30473241928350286, "grad_norm": 3.6314725016767126, "learning_rate": 7.499943501949321e-06, "loss": 1.57, "step": 10335 }, { "epoch": 0.3048798466755123, "grad_norm": 3.4613118831738268, "learning_rate": 7.499939915656852e-06, "loss": 1.6037, "step": 10340 }, { "epoch": 0.3050272740675217, "grad_norm": 3.788721291202628, "learning_rate": 7.49993621901812e-06, "loss": 1.5157, "step": 10345 }, { "epoch": 0.3051747014595312, "grad_norm": 3.605805119313727, "learning_rate": 7.499932412033232e-06, "loss": 1.5286, "step": 10350 }, { "epoch": 0.30532212885154064, "grad_norm": 3.7251988839729955, "learning_rate": 7.499928494702302e-06, "loss": 1.5462, "step": 10355 }, { "epoch": 0.30546955624355004, "grad_norm": 3.605381071781421, "learning_rate": 7.499924467025444e-06, "loss": 1.5652, "step": 10360 }, { "epoch": 0.3056169836355595, "grad_norm": 4.000361710864829, "learning_rate": 7.499920329002776e-06, "loss": 1.5322, "step": 10365 }, { "epoch": 0.3057644110275689, "grad_norm": 3.698890606620254, "learning_rate": 7.49991608063442e-06, "loss": 1.5472, "step": 10370 }, { "epoch": 0.30591183841957836, "grad_norm": 3.988163542233093, "learning_rate": 7.4999117219205025e-06, "loss": 1.6058, "step": 10375 }, { "epoch": 0.30605926581158777, "grad_norm": 3.78398897567886, "learning_rate": 7.499907252861149e-06, "loss": 1.5308, "step": 10380 }, { "epoch": 0.3062066932035972, "grad_norm": 3.7414797802532394, "learning_rate": 7.499902673456494e-06, "loss": 1.5175, "step": 10385 }, { "epoch": 0.3063541205956067, "grad_norm": 3.600099915860288, "learning_rate": 7.49989798370667e-06, "loss": 1.5402, "step": 10390 }, { "epoch": 0.3065015479876161, "grad_norm": 3.896898477191744, "learning_rate": 7.4998931836118165e-06, "loss": 1.5005, "step": 10395 }, { "epoch": 0.30664897537962554, "grad_norm": 3.6500966284643273, "learning_rate": 7.499888273172074e-06, "loss": 1.5981, "step": 10400 }, { "epoch": 0.30679640277163495, "grad_norm": 3.4953730511885204, "learning_rate": 7.499883252387588e-06, "loss": 1.5795, "step": 10405 }, { "epoch": 0.3069438301636444, "grad_norm": 3.8205410451773, "learning_rate": 7.499878121258503e-06, "loss": 1.5517, "step": 10410 }, { "epoch": 0.30709125755565386, "grad_norm": 3.906617891390179, "learning_rate": 7.499872879784974e-06, "loss": 1.5356, "step": 10415 }, { "epoch": 0.30723868494766327, "grad_norm": 3.5913140419457306, "learning_rate": 7.499867527967154e-06, "loss": 1.4655, "step": 10420 }, { "epoch": 0.3073861123396727, "grad_norm": 3.9298431252458337, "learning_rate": 7.4998620658052e-06, "loss": 1.5515, "step": 10425 }, { "epoch": 0.3075335397316821, "grad_norm": 4.029211294128619, "learning_rate": 7.4998564932992724e-06, "loss": 1.4977, "step": 10430 }, { "epoch": 0.3076809671236916, "grad_norm": 3.5556435157512216, "learning_rate": 7.499850810449535e-06, "loss": 1.5054, "step": 10435 }, { "epoch": 0.30782839451570104, "grad_norm": 3.616600210495342, "learning_rate": 7.499845017256157e-06, "loss": 1.5538, "step": 10440 }, { "epoch": 0.30797582190771045, "grad_norm": 3.464096068675515, "learning_rate": 7.499839113719307e-06, "loss": 1.5839, "step": 10445 }, { "epoch": 0.3081232492997199, "grad_norm": 3.6660628570453344, "learning_rate": 7.4998330998391586e-06, "loss": 1.5493, "step": 10450 }, { "epoch": 0.3082706766917293, "grad_norm": 3.56836465930199, "learning_rate": 7.49982697561589e-06, "loss": 1.4763, "step": 10455 }, { "epoch": 0.30841810408373876, "grad_norm": 3.6554237453254266, "learning_rate": 7.49982074104968e-06, "loss": 1.5284, "step": 10460 }, { "epoch": 0.30856553147574817, "grad_norm": 3.8629474993846262, "learning_rate": 7.499814396140713e-06, "loss": 1.587, "step": 10465 }, { "epoch": 0.3087129588677576, "grad_norm": 3.9158278820967407, "learning_rate": 7.499807940889176e-06, "loss": 1.5857, "step": 10470 }, { "epoch": 0.3088603862597671, "grad_norm": 3.6683053019458147, "learning_rate": 7.499801375295258e-06, "loss": 1.5948, "step": 10475 }, { "epoch": 0.3090078136517765, "grad_norm": 3.489764164147045, "learning_rate": 7.499794699359154e-06, "loss": 1.5524, "step": 10480 }, { "epoch": 0.30915524104378594, "grad_norm": 3.7799567367919553, "learning_rate": 7.499787913081058e-06, "loss": 1.5359, "step": 10485 }, { "epoch": 0.30930266843579535, "grad_norm": 3.6037997159700934, "learning_rate": 7.499781016461172e-06, "loss": 1.5692, "step": 10490 }, { "epoch": 0.3094500958278048, "grad_norm": 4.00051543367501, "learning_rate": 7.499774009499696e-06, "loss": 1.5756, "step": 10495 }, { "epoch": 0.30959752321981426, "grad_norm": 3.522185082763116, "learning_rate": 7.499766892196839e-06, "loss": 1.5036, "step": 10500 }, { "epoch": 0.30959752321981426, "eval_loss": 1.2422162294387817, "eval_runtime": 4.115, "eval_samples_per_second": 96.234, "eval_steps_per_second": 3.159, "step": 10500 }, { "epoch": 0.30974495061182367, "grad_norm": 3.654118253236649, "learning_rate": 7.499759664552809e-06, "loss": 1.571, "step": 10505 }, { "epoch": 0.3098923780038331, "grad_norm": 3.6514960905455114, "learning_rate": 7.49975232656782e-06, "loss": 1.5131, "step": 10510 }, { "epoch": 0.3100398053958425, "grad_norm": 3.5532843899099986, "learning_rate": 7.499744878242086e-06, "loss": 1.5808, "step": 10515 }, { "epoch": 0.310187232787852, "grad_norm": 3.5922592793663424, "learning_rate": 7.499737319575828e-06, "loss": 1.5462, "step": 10520 }, { "epoch": 0.31033466017986144, "grad_norm": 3.7544214587489497, "learning_rate": 7.4997296505692664e-06, "loss": 1.5961, "step": 10525 }, { "epoch": 0.31048208757187085, "grad_norm": 3.6567971796440384, "learning_rate": 7.49972187122263e-06, "loss": 1.5755, "step": 10530 }, { "epoch": 0.3106295149638803, "grad_norm": 3.4968220470863844, "learning_rate": 7.499713981536144e-06, "loss": 1.5195, "step": 10535 }, { "epoch": 0.3107769423558897, "grad_norm": 3.664197850487889, "learning_rate": 7.499705981510044e-06, "loss": 1.5854, "step": 10540 }, { "epoch": 0.31092436974789917, "grad_norm": 3.5913033832083396, "learning_rate": 7.499697871144562e-06, "loss": 1.524, "step": 10545 }, { "epoch": 0.31107179713990857, "grad_norm": 3.75675683108124, "learning_rate": 7.4996896504399395e-06, "loss": 1.5548, "step": 10550 }, { "epoch": 0.311219224531918, "grad_norm": 3.650364415298582, "learning_rate": 7.499681319396417e-06, "loss": 1.5339, "step": 10555 }, { "epoch": 0.3113666519239275, "grad_norm": 3.874503589570771, "learning_rate": 7.499672878014241e-06, "loss": 1.5633, "step": 10560 }, { "epoch": 0.3115140793159369, "grad_norm": 4.038611154928383, "learning_rate": 7.499664326293658e-06, "loss": 1.5241, "step": 10565 }, { "epoch": 0.31166150670794635, "grad_norm": 3.830255296483896, "learning_rate": 7.499655664234921e-06, "loss": 1.557, "step": 10570 }, { "epoch": 0.31180893409995575, "grad_norm": 3.578228138160765, "learning_rate": 7.499646891838283e-06, "loss": 1.525, "step": 10575 }, { "epoch": 0.3119563614919652, "grad_norm": 3.7870802721956847, "learning_rate": 7.499638009104005e-06, "loss": 1.6173, "step": 10580 }, { "epoch": 0.31210378888397466, "grad_norm": 3.7158139048007994, "learning_rate": 7.499629016032346e-06, "loss": 1.538, "step": 10585 }, { "epoch": 0.31225121627598407, "grad_norm": 3.6958148961910213, "learning_rate": 7.4996199126235725e-06, "loss": 1.544, "step": 10590 }, { "epoch": 0.3123986436679935, "grad_norm": 3.654832769347792, "learning_rate": 7.499610698877951e-06, "loss": 1.4925, "step": 10595 }, { "epoch": 0.31254607106000293, "grad_norm": 3.6318854211146774, "learning_rate": 7.499601374795753e-06, "loss": 1.558, "step": 10600 }, { "epoch": 0.3126934984520124, "grad_norm": 3.6672457664474876, "learning_rate": 7.499591940377253e-06, "loss": 1.5641, "step": 10605 }, { "epoch": 0.31284092584402184, "grad_norm": 4.378830775761778, "learning_rate": 7.499582395622728e-06, "loss": 1.4843, "step": 10610 }, { "epoch": 0.31298835323603125, "grad_norm": 3.562914019322276, "learning_rate": 7.499572740532461e-06, "loss": 1.5906, "step": 10615 }, { "epoch": 0.3131357806280407, "grad_norm": 3.6777741114342803, "learning_rate": 7.499562975106733e-06, "loss": 1.569, "step": 10620 }, { "epoch": 0.3132832080200501, "grad_norm": 3.844721032382873, "learning_rate": 7.499553099345834e-06, "loss": 1.5475, "step": 10625 }, { "epoch": 0.31343063541205957, "grad_norm": 3.6130268028199053, "learning_rate": 7.4995431132500525e-06, "loss": 1.5172, "step": 10630 }, { "epoch": 0.31357806280406897, "grad_norm": 3.69829201918449, "learning_rate": 7.499533016819684e-06, "loss": 1.5675, "step": 10635 }, { "epoch": 0.3137254901960784, "grad_norm": 3.542004910663313, "learning_rate": 7.499522810055025e-06, "loss": 1.5945, "step": 10640 }, { "epoch": 0.3138729175880879, "grad_norm": 3.4854236385814206, "learning_rate": 7.499512492956376e-06, "loss": 1.5357, "step": 10645 }, { "epoch": 0.3140203449800973, "grad_norm": 3.6395075679257807, "learning_rate": 7.499502065524041e-06, "loss": 1.5368, "step": 10650 }, { "epoch": 0.31416777237210675, "grad_norm": 3.6139948025913147, "learning_rate": 7.499491527758323e-06, "loss": 1.5064, "step": 10655 }, { "epoch": 0.31431519976411615, "grad_norm": 3.3951671034326245, "learning_rate": 7.499480879659539e-06, "loss": 1.5396, "step": 10660 }, { "epoch": 0.3144626271561256, "grad_norm": 3.8045188956767304, "learning_rate": 7.499470121227997e-06, "loss": 1.5539, "step": 10665 }, { "epoch": 0.31461005454813507, "grad_norm": 3.7948029456971923, "learning_rate": 7.499459252464016e-06, "loss": 1.5539, "step": 10670 }, { "epoch": 0.31475748194014447, "grad_norm": 3.5445533007583085, "learning_rate": 7.4994482733679145e-06, "loss": 1.5602, "step": 10675 }, { "epoch": 0.3149049093321539, "grad_norm": 3.6287280594444438, "learning_rate": 7.499437183940016e-06, "loss": 1.5945, "step": 10680 }, { "epoch": 0.31505233672416333, "grad_norm": 3.693203071076396, "learning_rate": 7.499425984180647e-06, "loss": 1.5145, "step": 10685 }, { "epoch": 0.3151997641161728, "grad_norm": 3.8416593720131202, "learning_rate": 7.499414674090137e-06, "loss": 1.5028, "step": 10690 }, { "epoch": 0.31534719150818225, "grad_norm": 3.7004550152420927, "learning_rate": 7.499403253668819e-06, "loss": 1.553, "step": 10695 }, { "epoch": 0.31549461890019165, "grad_norm": 3.5023320016743646, "learning_rate": 7.499391722917029e-06, "loss": 1.5104, "step": 10700 }, { "epoch": 0.3156420462922011, "grad_norm": 3.67967606758472, "learning_rate": 7.4993800818351065e-06, "loss": 1.5223, "step": 10705 }, { "epoch": 0.3157894736842105, "grad_norm": 3.5681883728640744, "learning_rate": 7.4993683304233935e-06, "loss": 1.5344, "step": 10710 }, { "epoch": 0.31593690107621997, "grad_norm": 3.751338332532112, "learning_rate": 7.499356468682236e-06, "loss": 1.4724, "step": 10715 }, { "epoch": 0.31608432846822937, "grad_norm": 3.4591844682221957, "learning_rate": 7.499344496611982e-06, "loss": 1.5298, "step": 10720 }, { "epoch": 0.31623175586023883, "grad_norm": 3.588148228430963, "learning_rate": 7.499332414212986e-06, "loss": 1.5316, "step": 10725 }, { "epoch": 0.3163791832522483, "grad_norm": 3.448871527900935, "learning_rate": 7.499320221485603e-06, "loss": 1.5091, "step": 10730 }, { "epoch": 0.3165266106442577, "grad_norm": 3.561084877457996, "learning_rate": 7.499307918430191e-06, "loss": 1.5532, "step": 10735 }, { "epoch": 0.31667403803626715, "grad_norm": 3.6439184995966065, "learning_rate": 7.499295505047111e-06, "loss": 1.5161, "step": 10740 }, { "epoch": 0.31682146542827655, "grad_norm": 3.9386819441931924, "learning_rate": 7.499282981336731e-06, "loss": 1.556, "step": 10745 }, { "epoch": 0.316968892820286, "grad_norm": 3.554981890164776, "learning_rate": 7.499270347299417e-06, "loss": 1.5494, "step": 10750 }, { "epoch": 0.31711632021229547, "grad_norm": 3.570550757321823, "learning_rate": 7.499257602935542e-06, "loss": 1.4886, "step": 10755 }, { "epoch": 0.31726374760430487, "grad_norm": 4.3904764112532675, "learning_rate": 7.4992447482454806e-06, "loss": 1.5392, "step": 10760 }, { "epoch": 0.3174111749963143, "grad_norm": 4.279280938020418, "learning_rate": 7.499231783229611e-06, "loss": 1.5722, "step": 10765 }, { "epoch": 0.31755860238832373, "grad_norm": 3.805950042903218, "learning_rate": 7.499218707888317e-06, "loss": 1.5577, "step": 10770 }, { "epoch": 0.3177060297803332, "grad_norm": 3.4471295031357445, "learning_rate": 7.49920552222198e-06, "loss": 1.5329, "step": 10775 }, { "epoch": 0.31785345717234265, "grad_norm": 3.604342052300517, "learning_rate": 7.499192226230989e-06, "loss": 1.5387, "step": 10780 }, { "epoch": 0.31800088456435205, "grad_norm": 3.834924074932983, "learning_rate": 7.499178819915736e-06, "loss": 1.5645, "step": 10785 }, { "epoch": 0.3181483119563615, "grad_norm": 3.6011595599542168, "learning_rate": 7.499165303276616e-06, "loss": 1.5683, "step": 10790 }, { "epoch": 0.3182957393483709, "grad_norm": 3.526637284461587, "learning_rate": 7.499151676314024e-06, "loss": 1.5337, "step": 10795 }, { "epoch": 0.31844316674038037, "grad_norm": 3.7357444647243883, "learning_rate": 7.499137939028364e-06, "loss": 1.5187, "step": 10800 }, { "epoch": 0.31859059413238977, "grad_norm": 3.717223458221615, "learning_rate": 7.499124091420039e-06, "loss": 1.5492, "step": 10805 }, { "epoch": 0.31873802152439923, "grad_norm": 3.5075024421771643, "learning_rate": 7.4991101334894564e-06, "loss": 1.5383, "step": 10810 }, { "epoch": 0.3188854489164087, "grad_norm": 3.4630798040131014, "learning_rate": 7.499096065237027e-06, "loss": 1.477, "step": 10815 }, { "epoch": 0.3190328763084181, "grad_norm": 3.9830222533610065, "learning_rate": 7.4990818866631654e-06, "loss": 1.5214, "step": 10820 }, { "epoch": 0.31918030370042755, "grad_norm": 3.735552346894079, "learning_rate": 7.499067597768288e-06, "loss": 1.5343, "step": 10825 }, { "epoch": 0.31932773109243695, "grad_norm": 3.9856741116180974, "learning_rate": 7.499053198552815e-06, "loss": 1.6034, "step": 10830 }, { "epoch": 0.3194751584844464, "grad_norm": 3.5554166677525747, "learning_rate": 7.499038689017171e-06, "loss": 1.5653, "step": 10835 }, { "epoch": 0.31962258587645587, "grad_norm": 3.793857993927486, "learning_rate": 7.499024069161782e-06, "loss": 1.5554, "step": 10840 }, { "epoch": 0.31977001326846527, "grad_norm": 3.5289112520565036, "learning_rate": 7.499009338987081e-06, "loss": 1.5187, "step": 10845 }, { "epoch": 0.31991744066047473, "grad_norm": 3.7123661154717564, "learning_rate": 7.4989944984934965e-06, "loss": 1.563, "step": 10850 }, { "epoch": 0.32006486805248413, "grad_norm": 3.8205927608923544, "learning_rate": 7.4989795476814695e-06, "loss": 1.5273, "step": 10855 }, { "epoch": 0.3202122954444936, "grad_norm": 3.4721623615446284, "learning_rate": 7.498964486551439e-06, "loss": 1.5588, "step": 10860 }, { "epoch": 0.32035972283650305, "grad_norm": 3.625499654417413, "learning_rate": 7.498949315103847e-06, "loss": 1.5246, "step": 10865 }, { "epoch": 0.32050715022851245, "grad_norm": 4.411548720567437, "learning_rate": 7.49893403333914e-06, "loss": 1.524, "step": 10870 }, { "epoch": 0.3206545776205219, "grad_norm": 3.5939059433411273, "learning_rate": 7.498918641257769e-06, "loss": 1.534, "step": 10875 }, { "epoch": 0.3208020050125313, "grad_norm": 3.644521390293532, "learning_rate": 7.498903138860185e-06, "loss": 1.5397, "step": 10880 }, { "epoch": 0.32094943240454077, "grad_norm": 3.702516472021258, "learning_rate": 7.498887526146847e-06, "loss": 1.5612, "step": 10885 }, { "epoch": 0.32109685979655017, "grad_norm": 3.703852795188497, "learning_rate": 7.498871803118212e-06, "loss": 1.586, "step": 10890 }, { "epoch": 0.32124428718855963, "grad_norm": 3.53018654186739, "learning_rate": 7.498855969774743e-06, "loss": 1.5059, "step": 10895 }, { "epoch": 0.3213917145805691, "grad_norm": 3.601631858312455, "learning_rate": 7.498840026116907e-06, "loss": 1.5403, "step": 10900 }, { "epoch": 0.3215391419725785, "grad_norm": 3.6018074752141884, "learning_rate": 7.4988239721451715e-06, "loss": 1.5613, "step": 10905 }, { "epoch": 0.32168656936458795, "grad_norm": 3.5194723299104576, "learning_rate": 7.4988078078600105e-06, "loss": 1.5911, "step": 10910 }, { "epoch": 0.32183399675659735, "grad_norm": 3.4088638697735374, "learning_rate": 7.4987915332619e-06, "loss": 1.535, "step": 10915 }, { "epoch": 0.3219814241486068, "grad_norm": 3.696657763314165, "learning_rate": 7.498775148351316e-06, "loss": 1.5195, "step": 10920 }, { "epoch": 0.32212885154061627, "grad_norm": 3.5407514100862287, "learning_rate": 7.498758653128744e-06, "loss": 1.4855, "step": 10925 }, { "epoch": 0.32227627893262567, "grad_norm": 3.6759725728097004, "learning_rate": 7.4987420475946676e-06, "loss": 1.5441, "step": 10930 }, { "epoch": 0.32242370632463513, "grad_norm": 3.5632988999629496, "learning_rate": 7.498725331749576e-06, "loss": 1.5374, "step": 10935 }, { "epoch": 0.32257113371664453, "grad_norm": 3.5886240202393633, "learning_rate": 7.498708505593961e-06, "loss": 1.5307, "step": 10940 }, { "epoch": 0.322718561108654, "grad_norm": 3.6753141378186176, "learning_rate": 7.498691569128317e-06, "loss": 1.5398, "step": 10945 }, { "epoch": 0.32286598850066345, "grad_norm": 3.40567351544022, "learning_rate": 7.4986745223531446e-06, "loss": 1.5031, "step": 10950 }, { "epoch": 0.32301341589267285, "grad_norm": 3.6754427255343, "learning_rate": 7.498657365268942e-06, "loss": 1.4946, "step": 10955 }, { "epoch": 0.3231608432846823, "grad_norm": 3.5337415909638357, "learning_rate": 7.498640097876217e-06, "loss": 1.5401, "step": 10960 }, { "epoch": 0.3233082706766917, "grad_norm": 3.654117475766019, "learning_rate": 7.498622720175477e-06, "loss": 1.4992, "step": 10965 }, { "epoch": 0.32345569806870117, "grad_norm": 3.4322418215038977, "learning_rate": 7.498605232167232e-06, "loss": 1.5419, "step": 10970 }, { "epoch": 0.32360312546071063, "grad_norm": 3.80961280434979, "learning_rate": 7.498587633851998e-06, "loss": 1.4881, "step": 10975 }, { "epoch": 0.32375055285272003, "grad_norm": 3.642257322935868, "learning_rate": 7.498569925230292e-06, "loss": 1.5413, "step": 10980 }, { "epoch": 0.3238979802447295, "grad_norm": 3.609602488932197, "learning_rate": 7.498552106302637e-06, "loss": 1.5296, "step": 10985 }, { "epoch": 0.3240454076367389, "grad_norm": 3.507477516005813, "learning_rate": 7.498534177069553e-06, "loss": 1.548, "step": 10990 }, { "epoch": 0.32419283502874835, "grad_norm": 3.506562563701921, "learning_rate": 7.498516137531573e-06, "loss": 1.5909, "step": 10995 }, { "epoch": 0.32434026242075775, "grad_norm": 3.6280573532503757, "learning_rate": 7.498497987689224e-06, "loss": 1.5709, "step": 11000 }, { "epoch": 0.32434026242075775, "eval_loss": 1.240090012550354, "eval_runtime": 4.2911, "eval_samples_per_second": 92.284, "eval_steps_per_second": 3.03, "step": 11000 }, { "epoch": 0.3244876898127672, "grad_norm": 3.732963069332943, "learning_rate": 7.498479727543042e-06, "loss": 1.5686, "step": 11005 }, { "epoch": 0.32463511720477667, "grad_norm": 3.9248434164872203, "learning_rate": 7.498461357093564e-06, "loss": 1.4736, "step": 11010 }, { "epoch": 0.32478254459678607, "grad_norm": 4.443978712492379, "learning_rate": 7.49844287634133e-06, "loss": 1.5168, "step": 11015 }, { "epoch": 0.32492997198879553, "grad_norm": 3.7025294374730042, "learning_rate": 7.498424285286884e-06, "loss": 1.5433, "step": 11020 }, { "epoch": 0.32507739938080493, "grad_norm": 3.7989661708424016, "learning_rate": 7.498405583930773e-06, "loss": 1.5556, "step": 11025 }, { "epoch": 0.3252248267728144, "grad_norm": 3.6178686510540636, "learning_rate": 7.4983867722735475e-06, "loss": 1.4533, "step": 11030 }, { "epoch": 0.32537225416482385, "grad_norm": 3.5604836060651617, "learning_rate": 7.4983678503157605e-06, "loss": 1.5643, "step": 11035 }, { "epoch": 0.32551968155683325, "grad_norm": 3.3933918957545184, "learning_rate": 7.49834881805797e-06, "loss": 1.5728, "step": 11040 }, { "epoch": 0.3256671089488427, "grad_norm": 3.8795250977359887, "learning_rate": 7.498329675500735e-06, "loss": 1.531, "step": 11045 }, { "epoch": 0.3258145363408521, "grad_norm": 3.4518296354684, "learning_rate": 7.49831042264462e-06, "loss": 1.5681, "step": 11050 }, { "epoch": 0.32596196373286157, "grad_norm": 3.555838470365013, "learning_rate": 7.4982910594901895e-06, "loss": 1.5315, "step": 11055 }, { "epoch": 0.32610939112487103, "grad_norm": 3.5977265316776914, "learning_rate": 7.498271586038016e-06, "loss": 1.5339, "step": 11060 }, { "epoch": 0.32625681851688043, "grad_norm": 3.6750893501144466, "learning_rate": 7.4982520022886705e-06, "loss": 1.5215, "step": 11065 }, { "epoch": 0.3264042459088899, "grad_norm": 3.6552228037680425, "learning_rate": 7.498232308242729e-06, "loss": 1.5168, "step": 11070 }, { "epoch": 0.3265516733008993, "grad_norm": 3.619480824342602, "learning_rate": 7.498212503900772e-06, "loss": 1.5866, "step": 11075 }, { "epoch": 0.32669910069290875, "grad_norm": 8.620400809440833, "learning_rate": 7.498192589263382e-06, "loss": 1.6045, "step": 11080 }, { "epoch": 0.32684652808491815, "grad_norm": 4.614925736014305, "learning_rate": 7.498172564331145e-06, "loss": 1.5124, "step": 11085 }, { "epoch": 0.3269939554769276, "grad_norm": 3.5211557059791905, "learning_rate": 7.498152429104651e-06, "loss": 1.5671, "step": 11090 }, { "epoch": 0.32714138286893707, "grad_norm": 3.4320245320563005, "learning_rate": 7.4981321835844915e-06, "loss": 1.5578, "step": 11095 }, { "epoch": 0.32728881026094647, "grad_norm": 3.6051822849095374, "learning_rate": 7.498111827771262e-06, "loss": 1.5836, "step": 11100 }, { "epoch": 0.32743623765295593, "grad_norm": 3.66161793141178, "learning_rate": 7.4980913616655635e-06, "loss": 1.5335, "step": 11105 }, { "epoch": 0.32758366504496533, "grad_norm": 3.703907180088177, "learning_rate": 7.498070785267996e-06, "loss": 1.6041, "step": 11110 }, { "epoch": 0.3277310924369748, "grad_norm": 3.902690440090347, "learning_rate": 7.498050098579166e-06, "loss": 1.5453, "step": 11115 }, { "epoch": 0.32787851982898425, "grad_norm": 3.4755583789113698, "learning_rate": 7.49802930159968e-06, "loss": 1.5469, "step": 11120 }, { "epoch": 0.32802594722099365, "grad_norm": 3.518350168236764, "learning_rate": 7.498008394330155e-06, "loss": 1.5597, "step": 11125 }, { "epoch": 0.3281733746130031, "grad_norm": 3.5571932445254033, "learning_rate": 7.4979873767712015e-06, "loss": 1.5259, "step": 11130 }, { "epoch": 0.3283208020050125, "grad_norm": 3.6926812917229803, "learning_rate": 7.49796624892344e-06, "loss": 1.5508, "step": 11135 }, { "epoch": 0.32846822939702197, "grad_norm": 3.5554547992385457, "learning_rate": 7.497945010787492e-06, "loss": 1.5826, "step": 11140 }, { "epoch": 0.32861565678903143, "grad_norm": 3.4831875879123504, "learning_rate": 7.497923662363983e-06, "loss": 1.534, "step": 11145 }, { "epoch": 0.32876308418104083, "grad_norm": 3.712884675293794, "learning_rate": 7.4979022036535396e-06, "loss": 1.5484, "step": 11150 }, { "epoch": 0.3289105115730503, "grad_norm": 3.6330340018173275, "learning_rate": 7.497880634656794e-06, "loss": 1.5504, "step": 11155 }, { "epoch": 0.3290579389650597, "grad_norm": 3.493264837628755, "learning_rate": 7.497858955374382e-06, "loss": 1.5407, "step": 11160 }, { "epoch": 0.32920536635706915, "grad_norm": 3.6249502698620306, "learning_rate": 7.49783716580694e-06, "loss": 1.5653, "step": 11165 }, { "epoch": 0.32935279374907855, "grad_norm": 3.9905393873497585, "learning_rate": 7.497815265955111e-06, "loss": 1.5533, "step": 11170 }, { "epoch": 0.329500221141088, "grad_norm": 3.734995968082161, "learning_rate": 7.497793255819537e-06, "loss": 1.5192, "step": 11175 }, { "epoch": 0.32964764853309747, "grad_norm": 3.965609461352858, "learning_rate": 7.497771135400866e-06, "loss": 1.4616, "step": 11180 }, { "epoch": 0.3297950759251069, "grad_norm": 3.6158894053347463, "learning_rate": 7.4977489046997515e-06, "loss": 1.5948, "step": 11185 }, { "epoch": 0.32994250331711633, "grad_norm": 3.576793853156694, "learning_rate": 7.497726563716847e-06, "loss": 1.5903, "step": 11190 }, { "epoch": 0.33008993070912573, "grad_norm": 3.6740335183450545, "learning_rate": 7.497704112452807e-06, "loss": 1.6035, "step": 11195 }, { "epoch": 0.3302373581011352, "grad_norm": 3.7091213896996944, "learning_rate": 7.497681550908294e-06, "loss": 1.5842, "step": 11200 }, { "epoch": 0.33038478549314465, "grad_norm": 3.4142266611004577, "learning_rate": 7.497658879083973e-06, "loss": 1.5081, "step": 11205 }, { "epoch": 0.33053221288515405, "grad_norm": 3.654204288373501, "learning_rate": 7.497636096980509e-06, "loss": 1.6039, "step": 11210 }, { "epoch": 0.3306796402771635, "grad_norm": 3.6588003195332646, "learning_rate": 7.497613204598574e-06, "loss": 1.5158, "step": 11215 }, { "epoch": 0.3308270676691729, "grad_norm": 3.8120950621399112, "learning_rate": 7.497590201938842e-06, "loss": 1.5433, "step": 11220 }, { "epoch": 0.33097449506118237, "grad_norm": 3.757969304067108, "learning_rate": 7.4975670890019875e-06, "loss": 1.5399, "step": 11225 }, { "epoch": 0.33112192245319183, "grad_norm": 3.690875930569451, "learning_rate": 7.4975438657886935e-06, "loss": 1.5379, "step": 11230 }, { "epoch": 0.33126934984520123, "grad_norm": 3.606226653389303, "learning_rate": 7.497520532299641e-06, "loss": 1.5325, "step": 11235 }, { "epoch": 0.3314167772372107, "grad_norm": 3.5493961320555125, "learning_rate": 7.497497088535518e-06, "loss": 1.5072, "step": 11240 }, { "epoch": 0.3315642046292201, "grad_norm": 3.5911784595099845, "learning_rate": 7.497473534497015e-06, "loss": 1.5447, "step": 11245 }, { "epoch": 0.33171163202122955, "grad_norm": 3.5341578024413884, "learning_rate": 7.497449870184822e-06, "loss": 1.487, "step": 11250 }, { "epoch": 0.33185905941323895, "grad_norm": 3.6518195309148376, "learning_rate": 7.497426095599639e-06, "loss": 1.6083, "step": 11255 }, { "epoch": 0.3320064868052484, "grad_norm": 3.4509638954267303, "learning_rate": 7.497402210742163e-06, "loss": 1.6024, "step": 11260 }, { "epoch": 0.33215391419725787, "grad_norm": 3.4422494632579452, "learning_rate": 7.497378215613098e-06, "loss": 1.5332, "step": 11265 }, { "epoch": 0.3323013415892673, "grad_norm": 3.872712592423586, "learning_rate": 7.497354110213151e-06, "loss": 1.502, "step": 11270 }, { "epoch": 0.33244876898127673, "grad_norm": 3.7505697860337515, "learning_rate": 7.49732989454303e-06, "loss": 1.5703, "step": 11275 }, { "epoch": 0.33259619637328613, "grad_norm": 4.01930709213355, "learning_rate": 7.4973055686034475e-06, "loss": 1.5044, "step": 11280 }, { "epoch": 0.3327436237652956, "grad_norm": 3.8430550155383933, "learning_rate": 7.497281132395119e-06, "loss": 1.5831, "step": 11285 }, { "epoch": 0.33289105115730505, "grad_norm": 3.5665630562132, "learning_rate": 7.497256585918766e-06, "loss": 1.5559, "step": 11290 }, { "epoch": 0.33303847854931445, "grad_norm": 3.5211818078196093, "learning_rate": 7.497231929175107e-06, "loss": 1.5394, "step": 11295 }, { "epoch": 0.3331859059413239, "grad_norm": 3.410503818808801, "learning_rate": 7.497207162164871e-06, "loss": 1.5303, "step": 11300 }, { "epoch": 0.3333333333333333, "grad_norm": 3.4370057325174703, "learning_rate": 7.4971822848887846e-06, "loss": 1.5777, "step": 11305 }, { "epoch": 0.3334807607253428, "grad_norm": 3.706185792682485, "learning_rate": 7.497157297347581e-06, "loss": 1.5355, "step": 11310 }, { "epoch": 0.33362818811735223, "grad_norm": 3.647077443017038, "learning_rate": 7.497132199541995e-06, "loss": 1.5659, "step": 11315 }, { "epoch": 0.33377561550936163, "grad_norm": 3.6516094799970626, "learning_rate": 7.497106991472765e-06, "loss": 1.6039, "step": 11320 }, { "epoch": 0.3339230429013711, "grad_norm": 3.4062990675305245, "learning_rate": 7.497081673140633e-06, "loss": 1.5396, "step": 11325 }, { "epoch": 0.3340704702933805, "grad_norm": 3.4961714616353103, "learning_rate": 7.497056244546345e-06, "loss": 1.5142, "step": 11330 }, { "epoch": 0.33421789768538995, "grad_norm": 5.348084852980465, "learning_rate": 7.497030705690647e-06, "loss": 1.5552, "step": 11335 }, { "epoch": 0.33436532507739936, "grad_norm": 3.492012108204824, "learning_rate": 7.4970050565742935e-06, "loss": 1.5753, "step": 11340 }, { "epoch": 0.3345127524694088, "grad_norm": 3.6253378062456507, "learning_rate": 7.4969792971980364e-06, "loss": 1.5406, "step": 11345 }, { "epoch": 0.33466017986141827, "grad_norm": 3.4627805046231885, "learning_rate": 7.496953427562636e-06, "loss": 1.4881, "step": 11350 }, { "epoch": 0.3348076072534277, "grad_norm": 3.3635485926907744, "learning_rate": 7.4969274476688506e-06, "loss": 1.5392, "step": 11355 }, { "epoch": 0.33495503464543713, "grad_norm": 3.578005335652263, "learning_rate": 7.496901357517447e-06, "loss": 1.5709, "step": 11360 }, { "epoch": 0.33510246203744654, "grad_norm": 3.733549975369096, "learning_rate": 7.496875157109193e-06, "loss": 1.5064, "step": 11365 }, { "epoch": 0.335249889429456, "grad_norm": 3.566222629109316, "learning_rate": 7.496848846444859e-06, "loss": 1.5302, "step": 11370 }, { "epoch": 0.33539731682146545, "grad_norm": 3.7412465980594303, "learning_rate": 7.496822425525219e-06, "loss": 1.5219, "step": 11375 }, { "epoch": 0.33554474421347485, "grad_norm": 3.5731909406278177, "learning_rate": 7.496795894351051e-06, "loss": 1.5874, "step": 11380 }, { "epoch": 0.3356921716054843, "grad_norm": 3.744875341455367, "learning_rate": 7.496769252923134e-06, "loss": 1.5372, "step": 11385 }, { "epoch": 0.3358395989974937, "grad_norm": 3.514222301316183, "learning_rate": 7.496742501242256e-06, "loss": 1.5755, "step": 11390 }, { "epoch": 0.3359870263895032, "grad_norm": 3.5897930931118216, "learning_rate": 7.4967156393091995e-06, "loss": 1.551, "step": 11395 }, { "epoch": 0.33613445378151263, "grad_norm": 3.9315915517671995, "learning_rate": 7.496688667124759e-06, "loss": 1.515, "step": 11400 }, { "epoch": 0.33628188117352203, "grad_norm": 3.7042852368847377, "learning_rate": 7.496661584689724e-06, "loss": 1.5147, "step": 11405 }, { "epoch": 0.3364293085655315, "grad_norm": 3.641803273033955, "learning_rate": 7.496634392004896e-06, "loss": 1.5693, "step": 11410 }, { "epoch": 0.3365767359575409, "grad_norm": 3.666793009289732, "learning_rate": 7.49660708907107e-06, "loss": 1.5664, "step": 11415 }, { "epoch": 0.33672416334955035, "grad_norm": 3.7311174058476517, "learning_rate": 7.496579675889054e-06, "loss": 1.5479, "step": 11420 }, { "epoch": 0.33687159074155976, "grad_norm": 3.384498713162082, "learning_rate": 7.496552152459651e-06, "loss": 1.5196, "step": 11425 }, { "epoch": 0.3370190181335692, "grad_norm": 3.5189472605486936, "learning_rate": 7.496524518783675e-06, "loss": 1.5812, "step": 11430 }, { "epoch": 0.3371664455255787, "grad_norm": 3.7032896301251887, "learning_rate": 7.496496774861936e-06, "loss": 1.5426, "step": 11435 }, { "epoch": 0.3373138729175881, "grad_norm": 3.7840998341961036, "learning_rate": 7.496468920695252e-06, "loss": 1.5507, "step": 11440 }, { "epoch": 0.33746130030959753, "grad_norm": 3.456065550524248, "learning_rate": 7.496440956284442e-06, "loss": 1.5417, "step": 11445 }, { "epoch": 0.33760872770160694, "grad_norm": 3.3993682725656993, "learning_rate": 7.496412881630328e-06, "loss": 1.4804, "step": 11450 }, { "epoch": 0.3377561550936164, "grad_norm": 3.5368546221461488, "learning_rate": 7.4963846967337375e-06, "loss": 1.5388, "step": 11455 }, { "epoch": 0.33790358248562585, "grad_norm": 3.3357649992978615, "learning_rate": 7.496356401595499e-06, "loss": 1.5424, "step": 11460 }, { "epoch": 0.33805100987763526, "grad_norm": 3.566420018896237, "learning_rate": 7.496327996216446e-06, "loss": 1.5317, "step": 11465 }, { "epoch": 0.3381984372696447, "grad_norm": 3.450926243039604, "learning_rate": 7.4962994805974135e-06, "loss": 1.4709, "step": 11470 }, { "epoch": 0.3383458646616541, "grad_norm": 3.4680534461847086, "learning_rate": 7.496270854739241e-06, "loss": 1.5223, "step": 11475 }, { "epoch": 0.3384932920536636, "grad_norm": 3.461637927939886, "learning_rate": 7.496242118642771e-06, "loss": 1.4962, "step": 11480 }, { "epoch": 0.33864071944567303, "grad_norm": 3.6933371573577793, "learning_rate": 7.496213272308849e-06, "loss": 1.621, "step": 11485 }, { "epoch": 0.33878814683768244, "grad_norm": 3.55744257168945, "learning_rate": 7.496184315738323e-06, "loss": 1.5116, "step": 11490 }, { "epoch": 0.3389355742296919, "grad_norm": 3.558974403675045, "learning_rate": 7.496155248932047e-06, "loss": 1.5202, "step": 11495 }, { "epoch": 0.3390830016217013, "grad_norm": 3.515008586653725, "learning_rate": 7.496126071890874e-06, "loss": 1.625, "step": 11500 }, { "epoch": 0.3390830016217013, "eval_loss": 1.2361304759979248, "eval_runtime": 4.1997, "eval_samples_per_second": 94.292, "eval_steps_per_second": 3.095, "step": 11500 }, { "epoch": 0.33923042901371075, "grad_norm": 3.6142899583988295, "learning_rate": 7.4960967846156645e-06, "loss": 1.5953, "step": 11505 }, { "epoch": 0.33937785640572016, "grad_norm": 3.567516487802583, "learning_rate": 7.496067387107279e-06, "loss": 1.5267, "step": 11510 }, { "epoch": 0.3395252837977296, "grad_norm": 3.5329052287869724, "learning_rate": 7.496037879366584e-06, "loss": 1.57, "step": 11515 }, { "epoch": 0.3396727111897391, "grad_norm": 3.573661682408017, "learning_rate": 7.4960082613944456e-06, "loss": 1.5568, "step": 11520 }, { "epoch": 0.3398201385817485, "grad_norm": 3.8514727945559715, "learning_rate": 7.495978533191737e-06, "loss": 1.5643, "step": 11525 }, { "epoch": 0.33996756597375793, "grad_norm": 3.6153544218710905, "learning_rate": 7.495948694759333e-06, "loss": 1.5463, "step": 11530 }, { "epoch": 0.34011499336576734, "grad_norm": 3.4684740134796863, "learning_rate": 7.4959187460981115e-06, "loss": 1.5283, "step": 11535 }, { "epoch": 0.3402624207577768, "grad_norm": 3.5825490206336217, "learning_rate": 7.495888687208953e-06, "loss": 1.5571, "step": 11540 }, { "epoch": 0.34040984814978625, "grad_norm": 3.5713776191923707, "learning_rate": 7.495858518092743e-06, "loss": 1.5555, "step": 11545 }, { "epoch": 0.34055727554179566, "grad_norm": 3.6708433626795895, "learning_rate": 7.4958282387503675e-06, "loss": 1.5228, "step": 11550 }, { "epoch": 0.3407047029338051, "grad_norm": 3.7570105995370304, "learning_rate": 7.49579784918272e-06, "loss": 1.5586, "step": 11555 }, { "epoch": 0.3408521303258145, "grad_norm": 3.7654371324288736, "learning_rate": 7.4957673493906934e-06, "loss": 1.5217, "step": 11560 }, { "epoch": 0.340999557717824, "grad_norm": 3.5477526187178126, "learning_rate": 7.495736739375184e-06, "loss": 1.5242, "step": 11565 }, { "epoch": 0.34114698510983343, "grad_norm": 3.9558422435130622, "learning_rate": 7.495706019137096e-06, "loss": 1.576, "step": 11570 }, { "epoch": 0.34129441250184284, "grad_norm": 3.83212077371757, "learning_rate": 7.49567518867733e-06, "loss": 1.5297, "step": 11575 }, { "epoch": 0.3414418398938523, "grad_norm": 3.3910473251420186, "learning_rate": 7.495644247996794e-06, "loss": 1.5481, "step": 11580 }, { "epoch": 0.3415892672858617, "grad_norm": 3.5773683094580284, "learning_rate": 7.4956131970964e-06, "loss": 1.5225, "step": 11585 }, { "epoch": 0.34173669467787116, "grad_norm": 3.5221159022538084, "learning_rate": 7.49558203597706e-06, "loss": 1.5282, "step": 11590 }, { "epoch": 0.34188412206988056, "grad_norm": 6.0393568673298885, "learning_rate": 7.4955507646396915e-06, "loss": 1.511, "step": 11595 }, { "epoch": 0.34203154946189, "grad_norm": 3.488646162045194, "learning_rate": 7.495519383085216e-06, "loss": 1.5339, "step": 11600 }, { "epoch": 0.3421789768538995, "grad_norm": 3.4087973297992904, "learning_rate": 7.495487891314555e-06, "loss": 1.5112, "step": 11605 }, { "epoch": 0.3423264042459089, "grad_norm": 3.4532809958201427, "learning_rate": 7.495456289328635e-06, "loss": 1.507, "step": 11610 }, { "epoch": 0.34247383163791834, "grad_norm": 3.56093305533582, "learning_rate": 7.495424577128387e-06, "loss": 1.5455, "step": 11615 }, { "epoch": 0.34262125902992774, "grad_norm": 3.5773732612966027, "learning_rate": 7.495392754714745e-06, "loss": 1.5991, "step": 11620 }, { "epoch": 0.3427686864219372, "grad_norm": 3.6716120340893115, "learning_rate": 7.4953608220886435e-06, "loss": 1.5585, "step": 11625 }, { "epoch": 0.34291611381394665, "grad_norm": 3.4311048087841987, "learning_rate": 7.495328779251024e-06, "loss": 1.5566, "step": 11630 }, { "epoch": 0.34306354120595606, "grad_norm": 3.5774192645431007, "learning_rate": 7.4952966262028274e-06, "loss": 1.5144, "step": 11635 }, { "epoch": 0.3432109685979655, "grad_norm": 3.622676477898308, "learning_rate": 7.495264362945002e-06, "loss": 1.5422, "step": 11640 }, { "epoch": 0.3433583959899749, "grad_norm": 3.470693026816011, "learning_rate": 7.4952319894784946e-06, "loss": 1.535, "step": 11645 }, { "epoch": 0.3435058233819844, "grad_norm": 3.39899730736127, "learning_rate": 7.495199505804261e-06, "loss": 1.5358, "step": 11650 }, { "epoch": 0.34365325077399383, "grad_norm": 3.3321196898256296, "learning_rate": 7.495166911923255e-06, "loss": 1.5271, "step": 11655 }, { "epoch": 0.34380067816600324, "grad_norm": 3.638664345366668, "learning_rate": 7.495134207836435e-06, "loss": 1.5718, "step": 11660 }, { "epoch": 0.3439481055580127, "grad_norm": 3.467172331024815, "learning_rate": 7.495101393544765e-06, "loss": 1.5614, "step": 11665 }, { "epoch": 0.3440955329500221, "grad_norm": 3.444937218673788, "learning_rate": 7.49506846904921e-06, "loss": 1.5624, "step": 11670 }, { "epoch": 0.34424296034203156, "grad_norm": 3.5803965828983637, "learning_rate": 7.495035434350739e-06, "loss": 1.5813, "step": 11675 }, { "epoch": 0.34439038773404096, "grad_norm": 3.489016721466992, "learning_rate": 7.495002289450325e-06, "loss": 1.5881, "step": 11680 }, { "epoch": 0.3445378151260504, "grad_norm": 3.5361828821608743, "learning_rate": 7.494969034348941e-06, "loss": 1.5317, "step": 11685 }, { "epoch": 0.3446852425180599, "grad_norm": 3.513446686707788, "learning_rate": 7.4949356690475665e-06, "loss": 1.5339, "step": 11690 }, { "epoch": 0.3448326699100693, "grad_norm": 3.818590644506399, "learning_rate": 7.494902193547184e-06, "loss": 1.5177, "step": 11695 }, { "epoch": 0.34498009730207874, "grad_norm": 3.298889991361523, "learning_rate": 7.494868607848778e-06, "loss": 1.4925, "step": 11700 }, { "epoch": 0.34512752469408814, "grad_norm": 3.424993175045535, "learning_rate": 7.494834911953339e-06, "loss": 1.5001, "step": 11705 }, { "epoch": 0.3452749520860976, "grad_norm": 3.678646676055216, "learning_rate": 7.494801105861855e-06, "loss": 1.5307, "step": 11710 }, { "epoch": 0.34542237947810706, "grad_norm": 3.619325871549164, "learning_rate": 7.494767189575323e-06, "loss": 1.5333, "step": 11715 }, { "epoch": 0.34556980687011646, "grad_norm": 3.529096374244575, "learning_rate": 7.4947331630947395e-06, "loss": 1.5186, "step": 11720 }, { "epoch": 0.3457172342621259, "grad_norm": 3.3900852103496675, "learning_rate": 7.494699026421106e-06, "loss": 1.5665, "step": 11725 }, { "epoch": 0.3458646616541353, "grad_norm": 3.4919717072047725, "learning_rate": 7.494664779555429e-06, "loss": 1.5428, "step": 11730 }, { "epoch": 0.3460120890461448, "grad_norm": 3.5383891034373094, "learning_rate": 7.494630422498714e-06, "loss": 1.5665, "step": 11735 }, { "epoch": 0.34615951643815424, "grad_norm": 3.453493447334754, "learning_rate": 7.494595955251973e-06, "loss": 1.5658, "step": 11740 }, { "epoch": 0.34630694383016364, "grad_norm": 3.460297566159712, "learning_rate": 7.4945613778162215e-06, "loss": 1.565, "step": 11745 }, { "epoch": 0.3464543712221731, "grad_norm": 3.464879171322296, "learning_rate": 7.494526690192475e-06, "loss": 1.5786, "step": 11750 }, { "epoch": 0.3466017986141825, "grad_norm": 3.4057952433310112, "learning_rate": 7.494491892381754e-06, "loss": 1.522, "step": 11755 }, { "epoch": 0.34674922600619196, "grad_norm": 3.348240770466893, "learning_rate": 7.494456984385084e-06, "loss": 1.5251, "step": 11760 }, { "epoch": 0.34689665339820136, "grad_norm": 3.704536972169863, "learning_rate": 7.494421966203491e-06, "loss": 1.5262, "step": 11765 }, { "epoch": 0.3470440807902108, "grad_norm": 3.6345227165710114, "learning_rate": 7.494386837838006e-06, "loss": 1.4888, "step": 11770 }, { "epoch": 0.3471915081822203, "grad_norm": 3.6554865347657026, "learning_rate": 7.494351599289663e-06, "loss": 1.5742, "step": 11775 }, { "epoch": 0.3473389355742297, "grad_norm": 3.552130207992302, "learning_rate": 7.494316250559499e-06, "loss": 1.5563, "step": 11780 }, { "epoch": 0.34748636296623914, "grad_norm": 3.5411472101226993, "learning_rate": 7.4942807916485535e-06, "loss": 1.5315, "step": 11785 }, { "epoch": 0.34763379035824854, "grad_norm": 3.5272918744881676, "learning_rate": 7.494245222557869e-06, "loss": 1.5874, "step": 11790 }, { "epoch": 0.347781217750258, "grad_norm": 3.2828864946719123, "learning_rate": 7.494209543288495e-06, "loss": 1.4895, "step": 11795 }, { "epoch": 0.34792864514226746, "grad_norm": 3.409965326740287, "learning_rate": 7.494173753841479e-06, "loss": 1.509, "step": 11800 }, { "epoch": 0.34807607253427686, "grad_norm": 3.433349856758238, "learning_rate": 7.494137854217875e-06, "loss": 1.5242, "step": 11805 }, { "epoch": 0.3482234999262863, "grad_norm": 3.617086590376336, "learning_rate": 7.4941018444187395e-06, "loss": 1.5335, "step": 11810 }, { "epoch": 0.3483709273182957, "grad_norm": 3.528156676899986, "learning_rate": 7.494065724445132e-06, "loss": 1.5904, "step": 11815 }, { "epoch": 0.3485183547103052, "grad_norm": 3.5907282492324217, "learning_rate": 7.494029494298114e-06, "loss": 1.5396, "step": 11820 }, { "epoch": 0.34866578210231464, "grad_norm": 3.694146080247811, "learning_rate": 7.493993153978754e-06, "loss": 1.5711, "step": 11825 }, { "epoch": 0.34881320949432404, "grad_norm": 3.5270890130765, "learning_rate": 7.4939567034881204e-06, "loss": 1.534, "step": 11830 }, { "epoch": 0.3489606368863335, "grad_norm": 3.2805432800071714, "learning_rate": 7.493920142827284e-06, "loss": 1.4806, "step": 11835 }, { "epoch": 0.3491080642783429, "grad_norm": 3.582229541596619, "learning_rate": 7.493883471997325e-06, "loss": 1.5143, "step": 11840 }, { "epoch": 0.34925549167035236, "grad_norm": 3.685495110726003, "learning_rate": 7.493846690999318e-06, "loss": 1.5464, "step": 11845 }, { "epoch": 0.34940291906236176, "grad_norm": 3.5613729587503204, "learning_rate": 7.493809799834348e-06, "loss": 1.5229, "step": 11850 }, { "epoch": 0.3495503464543712, "grad_norm": 3.7117512838699067, "learning_rate": 7.493772798503499e-06, "loss": 1.541, "step": 11855 }, { "epoch": 0.3496977738463807, "grad_norm": 3.6543762618406155, "learning_rate": 7.49373568700786e-06, "loss": 1.5453, "step": 11860 }, { "epoch": 0.3498452012383901, "grad_norm": 3.53720053210883, "learning_rate": 7.493698465348525e-06, "loss": 1.5277, "step": 11865 }, { "epoch": 0.34999262863039954, "grad_norm": 3.4603546466530246, "learning_rate": 7.493661133526586e-06, "loss": 1.5122, "step": 11870 }, { "epoch": 0.35014005602240894, "grad_norm": 3.736063051568664, "learning_rate": 7.493623691543145e-06, "loss": 1.5506, "step": 11875 }, { "epoch": 0.3502874834144184, "grad_norm": 3.738953553253794, "learning_rate": 7.493586139399301e-06, "loss": 1.5447, "step": 11880 }, { "epoch": 0.35043491080642786, "grad_norm": 3.496121130656935, "learning_rate": 7.493548477096161e-06, "loss": 1.6026, "step": 11885 }, { "epoch": 0.35058233819843726, "grad_norm": 3.484989725861026, "learning_rate": 7.493510704634831e-06, "loss": 1.5223, "step": 11890 }, { "epoch": 0.3507297655904467, "grad_norm": 3.6169063016763223, "learning_rate": 7.493472822016426e-06, "loss": 1.5765, "step": 11895 }, { "epoch": 0.3508771929824561, "grad_norm": 3.4784745453233117, "learning_rate": 7.493434829242057e-06, "loss": 1.5336, "step": 11900 }, { "epoch": 0.3510246203744656, "grad_norm": 3.5106669324468753, "learning_rate": 7.493396726312844e-06, "loss": 1.4812, "step": 11905 }, { "epoch": 0.35117204776647504, "grad_norm": 3.4578477051607477, "learning_rate": 7.4933585132299075e-06, "loss": 1.5746, "step": 11910 }, { "epoch": 0.35131947515848444, "grad_norm": 3.3870552549670654, "learning_rate": 7.493320189994373e-06, "loss": 1.5372, "step": 11915 }, { "epoch": 0.3514669025504939, "grad_norm": 3.4385852838122544, "learning_rate": 7.493281756607366e-06, "loss": 1.5324, "step": 11920 }, { "epoch": 0.3516143299425033, "grad_norm": 3.363774138168263, "learning_rate": 7.4932432130700205e-06, "loss": 1.4465, "step": 11925 }, { "epoch": 0.35176175733451276, "grad_norm": 3.5834968073094466, "learning_rate": 7.493204559383468e-06, "loss": 1.5554, "step": 11930 }, { "epoch": 0.35190918472652216, "grad_norm": 3.4572434791817104, "learning_rate": 7.493165795548848e-06, "loss": 1.5636, "step": 11935 }, { "epoch": 0.3520566121185316, "grad_norm": 3.3645842101391086, "learning_rate": 7.4931269215673e-06, "loss": 1.5803, "step": 11940 }, { "epoch": 0.3522040395105411, "grad_norm": 3.654242848492029, "learning_rate": 7.493087937439968e-06, "loss": 1.5064, "step": 11945 }, { "epoch": 0.3523514669025505, "grad_norm": 3.440163906298264, "learning_rate": 7.493048843167999e-06, "loss": 1.5725, "step": 11950 }, { "epoch": 0.35249889429455994, "grad_norm": 3.4323116731807475, "learning_rate": 7.4930096387525436e-06, "loss": 1.504, "step": 11955 }, { "epoch": 0.35264632168656934, "grad_norm": 3.2879171716969737, "learning_rate": 7.492970324194756e-06, "loss": 1.5095, "step": 11960 }, { "epoch": 0.3527937490785788, "grad_norm": 3.577828680616783, "learning_rate": 7.492930899495792e-06, "loss": 1.5816, "step": 11965 }, { "epoch": 0.35294117647058826, "grad_norm": 3.567069025081746, "learning_rate": 7.492891364656813e-06, "loss": 1.5102, "step": 11970 }, { "epoch": 0.35308860386259766, "grad_norm": 3.687768871172511, "learning_rate": 7.492851719678982e-06, "loss": 1.5197, "step": 11975 }, { "epoch": 0.3532360312546071, "grad_norm": 3.35513072844111, "learning_rate": 7.492811964563464e-06, "loss": 1.5367, "step": 11980 }, { "epoch": 0.3533834586466165, "grad_norm": 3.3515331402306567, "learning_rate": 7.492772099311432e-06, "loss": 1.5218, "step": 11985 }, { "epoch": 0.353530886038626, "grad_norm": 3.5402198466027075, "learning_rate": 7.492732123924055e-06, "loss": 1.5593, "step": 11990 }, { "epoch": 0.35367831343063544, "grad_norm": 3.398245882960147, "learning_rate": 7.492692038402513e-06, "loss": 1.5354, "step": 11995 }, { "epoch": 0.35382574082264484, "grad_norm": 3.4327620182759215, "learning_rate": 7.4926518427479825e-06, "loss": 1.5534, "step": 12000 }, { "epoch": 0.35382574082264484, "eval_loss": 1.231826663017273, "eval_runtime": 4.2573, "eval_samples_per_second": 93.016, "eval_steps_per_second": 3.054, "step": 12000 }, { "epoch": 0.3539731682146543, "grad_norm": 3.487791776670241, "learning_rate": 7.49261153696165e-06, "loss": 1.5437, "step": 12005 }, { "epoch": 0.3541205956066637, "grad_norm": 3.5031581982664846, "learning_rate": 7.492571121044698e-06, "loss": 1.4707, "step": 12010 }, { "epoch": 0.35426802299867316, "grad_norm": 3.531818745936706, "learning_rate": 7.492530594998318e-06, "loss": 1.5215, "step": 12015 }, { "epoch": 0.35441545039068256, "grad_norm": 3.777046873442222, "learning_rate": 7.492489958823701e-06, "loss": 1.5712, "step": 12020 }, { "epoch": 0.354562877782692, "grad_norm": 3.4870795132046237, "learning_rate": 7.492449212522044e-06, "loss": 1.5571, "step": 12025 }, { "epoch": 0.3547103051747015, "grad_norm": 3.564881399045572, "learning_rate": 7.492408356094545e-06, "loss": 1.5194, "step": 12030 }, { "epoch": 0.3548577325667109, "grad_norm": 5.197269070085782, "learning_rate": 7.492367389542406e-06, "loss": 1.5395, "step": 12035 }, { "epoch": 0.35500515995872034, "grad_norm": 3.44263175400265, "learning_rate": 7.492326312866835e-06, "loss": 1.5423, "step": 12040 }, { "epoch": 0.35515258735072974, "grad_norm": 3.7076130508759184, "learning_rate": 7.492285126069038e-06, "loss": 1.5359, "step": 12045 }, { "epoch": 0.3553000147427392, "grad_norm": 3.7910374520287458, "learning_rate": 7.492243829150229e-06, "loss": 1.5068, "step": 12050 }, { "epoch": 0.35544744213474866, "grad_norm": 3.4325173143216285, "learning_rate": 7.49220242211162e-06, "loss": 1.5078, "step": 12055 }, { "epoch": 0.35559486952675806, "grad_norm": 3.5483175274021783, "learning_rate": 7.492160904954433e-06, "loss": 1.5031, "step": 12060 }, { "epoch": 0.3557422969187675, "grad_norm": 3.7008890845136126, "learning_rate": 7.492119277679887e-06, "loss": 1.5674, "step": 12065 }, { "epoch": 0.3558897243107769, "grad_norm": 3.526784179943535, "learning_rate": 7.49207754028921e-06, "loss": 1.5591, "step": 12070 }, { "epoch": 0.3560371517027864, "grad_norm": 3.5059322132393893, "learning_rate": 7.492035692783628e-06, "loss": 1.5573, "step": 12075 }, { "epoch": 0.35618457909479584, "grad_norm": 3.8157375743454307, "learning_rate": 7.491993735164372e-06, "loss": 1.5348, "step": 12080 }, { "epoch": 0.35633200648680524, "grad_norm": 3.3546886122299178, "learning_rate": 7.491951667432678e-06, "loss": 1.512, "step": 12085 }, { "epoch": 0.3564794338788147, "grad_norm": 3.5047574121041816, "learning_rate": 7.4919094895897825e-06, "loss": 1.5808, "step": 12090 }, { "epoch": 0.3566268612708241, "grad_norm": 3.768336370547949, "learning_rate": 7.491867201636928e-06, "loss": 1.5407, "step": 12095 }, { "epoch": 0.35677428866283356, "grad_norm": 3.556794233610835, "learning_rate": 7.491824803575359e-06, "loss": 1.5476, "step": 12100 }, { "epoch": 0.35692171605484296, "grad_norm": 3.4426991378636513, "learning_rate": 7.491782295406322e-06, "loss": 1.5089, "step": 12105 }, { "epoch": 0.3570691434468524, "grad_norm": 3.610523465188863, "learning_rate": 7.491739677131068e-06, "loss": 1.5331, "step": 12110 }, { "epoch": 0.3572165708388619, "grad_norm": 3.545242740856067, "learning_rate": 7.491696948750852e-06, "loss": 1.5136, "step": 12115 }, { "epoch": 0.3573639982308713, "grad_norm": 3.7307544965404493, "learning_rate": 7.49165411026693e-06, "loss": 1.5357, "step": 12120 }, { "epoch": 0.35751142562288074, "grad_norm": 3.473891057972407, "learning_rate": 7.4916111616805636e-06, "loss": 1.5258, "step": 12125 }, { "epoch": 0.35765885301489014, "grad_norm": 3.9309712307165943, "learning_rate": 7.491568102993016e-06, "loss": 1.5466, "step": 12130 }, { "epoch": 0.3578062804068996, "grad_norm": 3.7240369617244684, "learning_rate": 7.491524934205555e-06, "loss": 1.539, "step": 12135 }, { "epoch": 0.35795370779890906, "grad_norm": 3.363950037947942, "learning_rate": 7.49148165531945e-06, "loss": 1.5142, "step": 12140 }, { "epoch": 0.35810113519091846, "grad_norm": 3.7186141026519417, "learning_rate": 7.491438266335975e-06, "loss": 1.5359, "step": 12145 }, { "epoch": 0.3582485625829279, "grad_norm": 3.4654755575938148, "learning_rate": 7.491394767256407e-06, "loss": 1.4812, "step": 12150 }, { "epoch": 0.3583959899749373, "grad_norm": 3.4919720967406485, "learning_rate": 7.491351158082025e-06, "loss": 1.5617, "step": 12155 }, { "epoch": 0.3585434173669468, "grad_norm": 3.3651884648532926, "learning_rate": 7.4913074388141145e-06, "loss": 1.5459, "step": 12160 }, { "epoch": 0.35869084475895624, "grad_norm": 3.4709010194393413, "learning_rate": 7.49126360945396e-06, "loss": 1.5041, "step": 12165 }, { "epoch": 0.35883827215096564, "grad_norm": 3.5175943243186234, "learning_rate": 7.491219670002851e-06, "loss": 1.566, "step": 12170 }, { "epoch": 0.3589856995429751, "grad_norm": 3.4050843374580526, "learning_rate": 7.4911756204620815e-06, "loss": 1.495, "step": 12175 }, { "epoch": 0.3591331269349845, "grad_norm": 3.5365073216065985, "learning_rate": 7.491131460832947e-06, "loss": 1.5661, "step": 12180 }, { "epoch": 0.35928055432699396, "grad_norm": 3.3967023891594827, "learning_rate": 7.491087191116748e-06, "loss": 1.5577, "step": 12185 }, { "epoch": 0.35942798171900336, "grad_norm": 3.9017387553486205, "learning_rate": 7.4910428113147866e-06, "loss": 1.5544, "step": 12190 }, { "epoch": 0.3595754091110128, "grad_norm": 3.4108279500144882, "learning_rate": 7.490998321428368e-06, "loss": 1.528, "step": 12195 }, { "epoch": 0.3597228365030223, "grad_norm": 3.4664931160769505, "learning_rate": 7.490953721458803e-06, "loss": 1.5403, "step": 12200 }, { "epoch": 0.3598702638950317, "grad_norm": 3.573676263600548, "learning_rate": 7.490909011407403e-06, "loss": 1.5073, "step": 12205 }, { "epoch": 0.36001769128704114, "grad_norm": 3.434738852268743, "learning_rate": 7.490864191275482e-06, "loss": 1.5429, "step": 12210 }, { "epoch": 0.36016511867905054, "grad_norm": 3.562240483424839, "learning_rate": 7.490819261064362e-06, "loss": 1.4955, "step": 12215 }, { "epoch": 0.36031254607106, "grad_norm": 3.586811959997335, "learning_rate": 7.490774220775363e-06, "loss": 1.5584, "step": 12220 }, { "epoch": 0.36045997346306946, "grad_norm": 3.35041242018194, "learning_rate": 7.490729070409812e-06, "loss": 1.5381, "step": 12225 }, { "epoch": 0.36060740085507886, "grad_norm": 3.585301383186257, "learning_rate": 7.4906838099690355e-06, "loss": 1.5889, "step": 12230 }, { "epoch": 0.3607548282470883, "grad_norm": 3.3383671234780894, "learning_rate": 7.490638439454367e-06, "loss": 1.5429, "step": 12235 }, { "epoch": 0.3609022556390977, "grad_norm": 3.484899914478219, "learning_rate": 7.4905929588671415e-06, "loss": 1.5302, "step": 12240 }, { "epoch": 0.3610496830311072, "grad_norm": 3.453289450945932, "learning_rate": 7.490547368208697e-06, "loss": 1.5421, "step": 12245 }, { "epoch": 0.36119711042311664, "grad_norm": 3.3994318194553963, "learning_rate": 7.4905016674803735e-06, "loss": 1.5817, "step": 12250 }, { "epoch": 0.36134453781512604, "grad_norm": 3.6118217732803335, "learning_rate": 7.490455856683518e-06, "loss": 1.6049, "step": 12255 }, { "epoch": 0.3614919652071355, "grad_norm": 3.5624541126153555, "learning_rate": 7.490409935819478e-06, "loss": 1.5125, "step": 12260 }, { "epoch": 0.3616393925991449, "grad_norm": 3.5602581449036865, "learning_rate": 7.490363904889605e-06, "loss": 1.5487, "step": 12265 }, { "epoch": 0.36178681999115436, "grad_norm": 3.9168110919958807, "learning_rate": 7.490317763895251e-06, "loss": 1.5074, "step": 12270 }, { "epoch": 0.36193424738316377, "grad_norm": 3.6199170946526964, "learning_rate": 7.490271512837778e-06, "loss": 1.5284, "step": 12275 }, { "epoch": 0.3620816747751732, "grad_norm": 3.5540735838776913, "learning_rate": 7.490225151718544e-06, "loss": 1.5331, "step": 12280 }, { "epoch": 0.3622291021671827, "grad_norm": 3.4382911542450367, "learning_rate": 7.490178680538914e-06, "loss": 1.5229, "step": 12285 }, { "epoch": 0.3623765295591921, "grad_norm": 3.440895752657619, "learning_rate": 7.4901320993002556e-06, "loss": 1.578, "step": 12290 }, { "epoch": 0.36252395695120154, "grad_norm": 3.5983766448279044, "learning_rate": 7.490085408003939e-06, "loss": 1.5267, "step": 12295 }, { "epoch": 0.36267138434321095, "grad_norm": 3.358992010868909, "learning_rate": 7.490038606651339e-06, "loss": 1.5527, "step": 12300 }, { "epoch": 0.3628188117352204, "grad_norm": 3.512328087104938, "learning_rate": 7.4899916952438315e-06, "loss": 1.556, "step": 12305 }, { "epoch": 0.36296623912722986, "grad_norm": 3.4815420931226644, "learning_rate": 7.4899446737828e-06, "loss": 1.5127, "step": 12310 }, { "epoch": 0.36311366651923926, "grad_norm": 3.4125064295891363, "learning_rate": 7.489897542269624e-06, "loss": 1.5769, "step": 12315 }, { "epoch": 0.3632610939112487, "grad_norm": 3.523345292040992, "learning_rate": 7.4898503007056926e-06, "loss": 1.5309, "step": 12320 }, { "epoch": 0.3634085213032581, "grad_norm": 3.441336222148018, "learning_rate": 7.4898029490923955e-06, "loss": 1.5107, "step": 12325 }, { "epoch": 0.3635559486952676, "grad_norm": 3.346601049536743, "learning_rate": 7.489755487431126e-06, "loss": 1.4829, "step": 12330 }, { "epoch": 0.36370337608727704, "grad_norm": 3.590355672943787, "learning_rate": 7.489707915723281e-06, "loss": 1.5182, "step": 12335 }, { "epoch": 0.36385080347928644, "grad_norm": 3.3034950229853557, "learning_rate": 7.4896602339702615e-06, "loss": 1.5081, "step": 12340 }, { "epoch": 0.3639982308712959, "grad_norm": 3.2586197744117893, "learning_rate": 7.489612442173468e-06, "loss": 1.5071, "step": 12345 }, { "epoch": 0.3641456582633053, "grad_norm": 3.4399818756297065, "learning_rate": 7.4895645403343095e-06, "loss": 1.5634, "step": 12350 }, { "epoch": 0.36429308565531476, "grad_norm": 3.3842322150952246, "learning_rate": 7.489516528454193e-06, "loss": 1.565, "step": 12355 }, { "epoch": 0.36444051304732417, "grad_norm": 3.2795173815874508, "learning_rate": 7.489468406534533e-06, "loss": 1.5535, "step": 12360 }, { "epoch": 0.3645879404393336, "grad_norm": 3.419862305817055, "learning_rate": 7.489420174576745e-06, "loss": 1.5545, "step": 12365 }, { "epoch": 0.3647353678313431, "grad_norm": 3.541663649412574, "learning_rate": 7.489371832582249e-06, "loss": 1.5197, "step": 12370 }, { "epoch": 0.3648827952233525, "grad_norm": 3.4163148955355114, "learning_rate": 7.4893233805524655e-06, "loss": 1.5287, "step": 12375 }, { "epoch": 0.36503022261536194, "grad_norm": 3.4529083638857667, "learning_rate": 7.489274818488824e-06, "loss": 1.5455, "step": 12380 }, { "epoch": 0.36517765000737135, "grad_norm": 3.521378143085934, "learning_rate": 7.489226146392749e-06, "loss": 1.5156, "step": 12385 }, { "epoch": 0.3653250773993808, "grad_norm": 3.4181001346929447, "learning_rate": 7.489177364265676e-06, "loss": 1.5461, "step": 12390 }, { "epoch": 0.36547250479139026, "grad_norm": 3.412926810392608, "learning_rate": 7.489128472109039e-06, "loss": 1.5799, "step": 12395 }, { "epoch": 0.36561993218339967, "grad_norm": 3.527137225115546, "learning_rate": 7.489079469924276e-06, "loss": 1.5623, "step": 12400 }, { "epoch": 0.3657673595754091, "grad_norm": 3.358614331035373, "learning_rate": 7.489030357712831e-06, "loss": 1.5266, "step": 12405 }, { "epoch": 0.3659147869674185, "grad_norm": 3.575829401309199, "learning_rate": 7.488981135476149e-06, "loss": 1.5875, "step": 12410 }, { "epoch": 0.366062214359428, "grad_norm": 3.7430015318037073, "learning_rate": 7.4889318032156765e-06, "loss": 1.5656, "step": 12415 }, { "epoch": 0.36620964175143744, "grad_norm": 3.5966526359106545, "learning_rate": 7.488882360932867e-06, "loss": 1.581, "step": 12420 }, { "epoch": 0.36635706914344685, "grad_norm": 3.3884498255927804, "learning_rate": 7.488832808629175e-06, "loss": 1.5043, "step": 12425 }, { "epoch": 0.3665044965354563, "grad_norm": 3.5544786753457185, "learning_rate": 7.488783146306058e-06, "loss": 1.5314, "step": 12430 }, { "epoch": 0.3666519239274657, "grad_norm": 3.383546559195495, "learning_rate": 7.488733373964977e-06, "loss": 1.5349, "step": 12435 }, { "epoch": 0.36679935131947516, "grad_norm": 3.5429274098108703, "learning_rate": 7.4886834916073975e-06, "loss": 1.531, "step": 12440 }, { "epoch": 0.36694677871148457, "grad_norm": 3.3159486578200927, "learning_rate": 7.488633499234788e-06, "loss": 1.5474, "step": 12445 }, { "epoch": 0.367094206103494, "grad_norm": 3.673022050343487, "learning_rate": 7.488583396848617e-06, "loss": 1.5319, "step": 12450 }, { "epoch": 0.3672416334955035, "grad_norm": 3.65669530983046, "learning_rate": 7.488533184450362e-06, "loss": 1.4829, "step": 12455 }, { "epoch": 0.3673890608875129, "grad_norm": 3.5895679791908823, "learning_rate": 7.488482862041498e-06, "loss": 1.5062, "step": 12460 }, { "epoch": 0.36753648827952234, "grad_norm": 3.388882262852509, "learning_rate": 7.488432429623508e-06, "loss": 1.5773, "step": 12465 }, { "epoch": 0.36768391567153175, "grad_norm": 3.513353223535232, "learning_rate": 7.488381887197875e-06, "loss": 1.5291, "step": 12470 }, { "epoch": 0.3678313430635412, "grad_norm": 3.5071241650111125, "learning_rate": 7.488331234766086e-06, "loss": 1.5125, "step": 12475 }, { "epoch": 0.36797877045555066, "grad_norm": 3.2852190370233014, "learning_rate": 7.48828047232963e-06, "loss": 1.5262, "step": 12480 }, { "epoch": 0.36812619784756007, "grad_norm": 3.5981511022390666, "learning_rate": 7.488229599890004e-06, "loss": 1.5169, "step": 12485 }, { "epoch": 0.3682736252395695, "grad_norm": 3.547718520310967, "learning_rate": 7.488178617448703e-06, "loss": 1.5527, "step": 12490 }, { "epoch": 0.3684210526315789, "grad_norm": 3.2749767747608174, "learning_rate": 7.488127525007228e-06, "loss": 1.4989, "step": 12495 }, { "epoch": 0.3685684800235884, "grad_norm": 3.3923453817866855, "learning_rate": 7.4880763225670824e-06, "loss": 1.467, "step": 12500 }, { "epoch": 0.3685684800235884, "eval_loss": 1.2277395725250244, "eval_runtime": 4.2194, "eval_samples_per_second": 93.853, "eval_steps_per_second": 3.081, "step": 12500 }, { "epoch": 0.36871590741559784, "grad_norm": 3.7598151689638186, "learning_rate": 7.488025010129772e-06, "loss": 1.5486, "step": 12505 }, { "epoch": 0.36886333480760725, "grad_norm": 3.8485335405579724, "learning_rate": 7.487973587696807e-06, "loss": 1.5644, "step": 12510 }, { "epoch": 0.3690107621996167, "grad_norm": 3.465333705705749, "learning_rate": 7.487922055269701e-06, "loss": 1.5388, "step": 12515 }, { "epoch": 0.3691581895916261, "grad_norm": 3.522492837461366, "learning_rate": 7.487870412849971e-06, "loss": 1.5911, "step": 12520 }, { "epoch": 0.36930561698363557, "grad_norm": 3.4071929994415733, "learning_rate": 7.487818660439135e-06, "loss": 1.5641, "step": 12525 }, { "epoch": 0.36945304437564497, "grad_norm": 3.3431567376007765, "learning_rate": 7.487766798038717e-06, "loss": 1.5136, "step": 12530 }, { "epoch": 0.3696004717676544, "grad_norm": 3.352922869903466, "learning_rate": 7.487714825650243e-06, "loss": 1.5632, "step": 12535 }, { "epoch": 0.3697478991596639, "grad_norm": 3.2406925247491256, "learning_rate": 7.487662743275243e-06, "loss": 1.5218, "step": 12540 }, { "epoch": 0.3698953265516733, "grad_norm": 3.365495814763033, "learning_rate": 7.487610550915247e-06, "loss": 1.5263, "step": 12545 }, { "epoch": 0.37004275394368275, "grad_norm": 3.3847346402222915, "learning_rate": 7.487558248571794e-06, "loss": 1.4924, "step": 12550 }, { "epoch": 0.37019018133569215, "grad_norm": 3.458527339043396, "learning_rate": 7.487505836246421e-06, "loss": 1.5195, "step": 12555 }, { "epoch": 0.3703376087277016, "grad_norm": 3.536753239442468, "learning_rate": 7.487453313940672e-06, "loss": 1.5724, "step": 12560 }, { "epoch": 0.37048503611971106, "grad_norm": 3.340692955042262, "learning_rate": 7.4874006816560905e-06, "loss": 1.6022, "step": 12565 }, { "epoch": 0.37063246351172047, "grad_norm": 3.381542801028286, "learning_rate": 7.487347939394227e-06, "loss": 1.5102, "step": 12570 }, { "epoch": 0.3707798909037299, "grad_norm": 3.58589436447813, "learning_rate": 7.4872950871566324e-06, "loss": 1.5938, "step": 12575 }, { "epoch": 0.37092731829573933, "grad_norm": 3.489365410135356, "learning_rate": 7.487242124944862e-06, "loss": 1.5498, "step": 12580 }, { "epoch": 0.3710747456877488, "grad_norm": 3.4805177097066444, "learning_rate": 7.487189052760474e-06, "loss": 1.5158, "step": 12585 }, { "epoch": 0.37122217307975824, "grad_norm": 3.531466955552598, "learning_rate": 7.487135870605034e-06, "loss": 1.5798, "step": 12590 }, { "epoch": 0.37136960047176765, "grad_norm": 3.646329589750166, "learning_rate": 7.487082578480101e-06, "loss": 1.5157, "step": 12595 }, { "epoch": 0.3715170278637771, "grad_norm": 3.137294302018913, "learning_rate": 7.487029176387245e-06, "loss": 1.5225, "step": 12600 }, { "epoch": 0.3716644552557865, "grad_norm": 3.324056216710954, "learning_rate": 7.48697566432804e-06, "loss": 1.5845, "step": 12605 }, { "epoch": 0.37181188264779597, "grad_norm": 3.4622797218263877, "learning_rate": 7.486922042304058e-06, "loss": 1.5746, "step": 12610 }, { "epoch": 0.37195931003980537, "grad_norm": 3.275764134176832, "learning_rate": 7.486868310316878e-06, "loss": 1.5316, "step": 12615 }, { "epoch": 0.3721067374318148, "grad_norm": 3.3972158150106098, "learning_rate": 7.48681446836808e-06, "loss": 1.4979, "step": 12620 }, { "epoch": 0.3722541648238243, "grad_norm": 3.3706973565589067, "learning_rate": 7.4867605164592515e-06, "loss": 1.5486, "step": 12625 }, { "epoch": 0.3724015922158337, "grad_norm": 3.4807869259619273, "learning_rate": 7.486706454591977e-06, "loss": 1.517, "step": 12630 }, { "epoch": 0.37254901960784315, "grad_norm": 3.8090564563506897, "learning_rate": 7.486652282767847e-06, "loss": 1.505, "step": 12635 }, { "epoch": 0.37269644699985255, "grad_norm": 3.633253950502218, "learning_rate": 7.486598000988458e-06, "loss": 1.5427, "step": 12640 }, { "epoch": 0.372843874391862, "grad_norm": 3.3926286819823117, "learning_rate": 7.486543609255405e-06, "loss": 1.5272, "step": 12645 }, { "epoch": 0.37299130178387147, "grad_norm": 3.358094030805765, "learning_rate": 7.486489107570291e-06, "loss": 1.5103, "step": 12650 }, { "epoch": 0.37313872917588087, "grad_norm": 3.303793619681517, "learning_rate": 7.486434495934718e-06, "loss": 1.5111, "step": 12655 }, { "epoch": 0.3732861565678903, "grad_norm": 3.484242603749758, "learning_rate": 7.486379774350293e-06, "loss": 1.5723, "step": 12660 }, { "epoch": 0.37343358395989973, "grad_norm": 3.4255138808084946, "learning_rate": 7.486324942818626e-06, "loss": 1.4923, "step": 12665 }, { "epoch": 0.3735810113519092, "grad_norm": 3.7914130085844993, "learning_rate": 7.486270001341332e-06, "loss": 1.5801, "step": 12670 }, { "epoch": 0.37372843874391865, "grad_norm": 3.4189944061501776, "learning_rate": 7.486214949920026e-06, "loss": 1.4701, "step": 12675 }, { "epoch": 0.37387586613592805, "grad_norm": 3.2875784470731975, "learning_rate": 7.48615978855633e-06, "loss": 1.5564, "step": 12680 }, { "epoch": 0.3740232935279375, "grad_norm": 3.4384270942937256, "learning_rate": 7.486104517251866e-06, "loss": 1.5326, "step": 12685 }, { "epoch": 0.3741707209199469, "grad_norm": 3.478781018432233, "learning_rate": 7.48604913600826e-06, "loss": 1.5463, "step": 12690 }, { "epoch": 0.37431814831195637, "grad_norm": 3.472089591297506, "learning_rate": 7.485993644827142e-06, "loss": 1.5666, "step": 12695 }, { "epoch": 0.37446557570396577, "grad_norm": 4.113355139445544, "learning_rate": 7.485938043710145e-06, "loss": 1.55, "step": 12700 }, { "epoch": 0.3746130030959752, "grad_norm": 3.4171968085048317, "learning_rate": 7.485882332658904e-06, "loss": 1.4995, "step": 12705 }, { "epoch": 0.3747604304879847, "grad_norm": 3.380522018514312, "learning_rate": 7.48582651167506e-06, "loss": 1.5383, "step": 12710 }, { "epoch": 0.3749078578799941, "grad_norm": 3.358392778827605, "learning_rate": 7.485770580760256e-06, "loss": 1.4964, "step": 12715 }, { "epoch": 0.37505528527200355, "grad_norm": 3.5335000889636627, "learning_rate": 7.485714539916136e-06, "loss": 1.5488, "step": 12720 }, { "epoch": 0.37520271266401295, "grad_norm": 3.2036999951708536, "learning_rate": 7.48565838914435e-06, "loss": 1.545, "step": 12725 }, { "epoch": 0.3753501400560224, "grad_norm": 3.4318388422469184, "learning_rate": 7.485602128446551e-06, "loss": 1.4666, "step": 12730 }, { "epoch": 0.37549756744803187, "grad_norm": 3.3117973822959206, "learning_rate": 7.485545757824393e-06, "loss": 1.5232, "step": 12735 }, { "epoch": 0.37564499484004127, "grad_norm": 3.7439132197568092, "learning_rate": 7.485489277279535e-06, "loss": 1.5453, "step": 12740 }, { "epoch": 0.3757924222320507, "grad_norm": 3.5340676517338707, "learning_rate": 7.48543268681364e-06, "loss": 1.5302, "step": 12745 }, { "epoch": 0.37593984962406013, "grad_norm": 3.467440795949254, "learning_rate": 7.485375986428374e-06, "loss": 1.5226, "step": 12750 }, { "epoch": 0.3760872770160696, "grad_norm": 3.4206004195045967, "learning_rate": 7.4853191761254035e-06, "loss": 1.5401, "step": 12755 }, { "epoch": 0.37623470440807905, "grad_norm": 3.3144786536377815, "learning_rate": 7.4852622559064e-06, "loss": 1.585, "step": 12760 }, { "epoch": 0.37638213180008845, "grad_norm": 3.388751808705405, "learning_rate": 7.48520522577304e-06, "loss": 1.501, "step": 12765 }, { "epoch": 0.3765295591920979, "grad_norm": 3.3130971796748816, "learning_rate": 7.485148085727001e-06, "loss": 1.5034, "step": 12770 }, { "epoch": 0.3766769865841073, "grad_norm": 3.538029967651289, "learning_rate": 7.485090835769964e-06, "loss": 1.5829, "step": 12775 }, { "epoch": 0.37682441397611677, "grad_norm": 3.628736752481526, "learning_rate": 7.485033475903616e-06, "loss": 1.4611, "step": 12780 }, { "epoch": 0.37697184136812617, "grad_norm": 3.42626006787866, "learning_rate": 7.484976006129642e-06, "loss": 1.5227, "step": 12785 }, { "epoch": 0.37711926876013563, "grad_norm": 3.330425883353919, "learning_rate": 7.484918426449733e-06, "loss": 1.5777, "step": 12790 }, { "epoch": 0.3772666961521451, "grad_norm": 3.519016392322275, "learning_rate": 7.4848607368655855e-06, "loss": 1.5009, "step": 12795 }, { "epoch": 0.3774141235441545, "grad_norm": 3.354802868813107, "learning_rate": 7.4848029373788955e-06, "loss": 1.5153, "step": 12800 }, { "epoch": 0.37756155093616395, "grad_norm": 3.4937308146903105, "learning_rate": 7.484745027991365e-06, "loss": 1.5438, "step": 12805 }, { "epoch": 0.37770897832817335, "grad_norm": 3.3154977170323168, "learning_rate": 7.484687008704697e-06, "loss": 1.5656, "step": 12810 }, { "epoch": 0.3778564057201828, "grad_norm": 3.325674517476737, "learning_rate": 7.4846288795206e-06, "loss": 1.499, "step": 12815 }, { "epoch": 0.37800383311219227, "grad_norm": 3.44184130704463, "learning_rate": 7.484570640440783e-06, "loss": 1.4987, "step": 12820 }, { "epoch": 0.37815126050420167, "grad_norm": 3.330210563947821, "learning_rate": 7.4845122914669615e-06, "loss": 1.5004, "step": 12825 }, { "epoch": 0.3782986878962111, "grad_norm": 3.400017964158382, "learning_rate": 7.48445383260085e-06, "loss": 1.5492, "step": 12830 }, { "epoch": 0.37844611528822053, "grad_norm": 3.5000605983538944, "learning_rate": 7.48439526384417e-06, "loss": 1.527, "step": 12835 }, { "epoch": 0.37859354268023, "grad_norm": 3.254026736005149, "learning_rate": 7.484336585198646e-06, "loss": 1.5473, "step": 12840 }, { "epoch": 0.37874097007223945, "grad_norm": 3.396939539155509, "learning_rate": 7.484277796666004e-06, "loss": 1.5669, "step": 12845 }, { "epoch": 0.37888839746424885, "grad_norm": 3.2900875456988743, "learning_rate": 7.484218898247973e-06, "loss": 1.5779, "step": 12850 }, { "epoch": 0.3790358248562583, "grad_norm": 3.271023547464153, "learning_rate": 7.484159889946288e-06, "loss": 1.5335, "step": 12855 }, { "epoch": 0.3791832522482677, "grad_norm": 3.5386254072577814, "learning_rate": 7.484100771762683e-06, "loss": 1.5571, "step": 12860 }, { "epoch": 0.37933067964027717, "grad_norm": 3.2570982283047645, "learning_rate": 7.4840415436989e-06, "loss": 1.5441, "step": 12865 }, { "epoch": 0.37947810703228657, "grad_norm": 3.364031586219434, "learning_rate": 7.4839822057566795e-06, "loss": 1.6311, "step": 12870 }, { "epoch": 0.37962553442429603, "grad_norm": 3.475993487819938, "learning_rate": 7.48392275793777e-06, "loss": 1.5389, "step": 12875 }, { "epoch": 0.3797729618163055, "grad_norm": 3.462287756238062, "learning_rate": 7.483863200243919e-06, "loss": 1.5629, "step": 12880 }, { "epoch": 0.3799203892083149, "grad_norm": 3.332454829122881, "learning_rate": 7.48380353267688e-06, "loss": 1.5274, "step": 12885 }, { "epoch": 0.38006781660032435, "grad_norm": 3.360633125224178, "learning_rate": 7.483743755238409e-06, "loss": 1.5049, "step": 12890 }, { "epoch": 0.38021524399233375, "grad_norm": 3.4517369225614605, "learning_rate": 7.483683867930263e-06, "loss": 1.5708, "step": 12895 }, { "epoch": 0.3803626713843432, "grad_norm": 3.474891269861306, "learning_rate": 7.483623870754208e-06, "loss": 1.5714, "step": 12900 }, { "epoch": 0.38051009877635267, "grad_norm": 3.3835636999243475, "learning_rate": 7.4835637637120055e-06, "loss": 1.5273, "step": 12905 }, { "epoch": 0.38065752616836207, "grad_norm": 3.457568390231317, "learning_rate": 7.483503546805426e-06, "loss": 1.5272, "step": 12910 }, { "epoch": 0.38080495356037153, "grad_norm": 3.4198877029479546, "learning_rate": 7.483443220036242e-06, "loss": 1.5226, "step": 12915 }, { "epoch": 0.38095238095238093, "grad_norm": 3.3612349184475585, "learning_rate": 7.483382783406228e-06, "loss": 1.5534, "step": 12920 }, { "epoch": 0.3810998083443904, "grad_norm": 3.4823472313264823, "learning_rate": 7.483322236917163e-06, "loss": 1.5724, "step": 12925 }, { "epoch": 0.38124723573639985, "grad_norm": 3.4023059737541077, "learning_rate": 7.483261580570827e-06, "loss": 1.4746, "step": 12930 }, { "epoch": 0.38139466312840925, "grad_norm": 3.4342179918501405, "learning_rate": 7.483200814369007e-06, "loss": 1.4998, "step": 12935 }, { "epoch": 0.3815420905204187, "grad_norm": 3.424663710116279, "learning_rate": 7.483139938313489e-06, "loss": 1.4552, "step": 12940 }, { "epoch": 0.3816895179124281, "grad_norm": 3.380238065169392, "learning_rate": 7.483078952406067e-06, "loss": 1.5432, "step": 12945 }, { "epoch": 0.38183694530443757, "grad_norm": 3.271312979166834, "learning_rate": 7.483017856648534e-06, "loss": 1.4816, "step": 12950 }, { "epoch": 0.38198437269644697, "grad_norm": 3.412595132997465, "learning_rate": 7.482956651042687e-06, "loss": 1.5553, "step": 12955 }, { "epoch": 0.38213180008845643, "grad_norm": 3.4739438607752953, "learning_rate": 7.482895335590328e-06, "loss": 1.5179, "step": 12960 }, { "epoch": 0.3822792274804659, "grad_norm": 3.361962201550858, "learning_rate": 7.482833910293262e-06, "loss": 1.542, "step": 12965 }, { "epoch": 0.3824266548724753, "grad_norm": 3.354232114728803, "learning_rate": 7.482772375153295e-06, "loss": 1.5373, "step": 12970 }, { "epoch": 0.38257408226448475, "grad_norm": 3.407276738764041, "learning_rate": 7.482710730172239e-06, "loss": 1.5183, "step": 12975 }, { "epoch": 0.38272150965649415, "grad_norm": 3.490551139234799, "learning_rate": 7.482648975351907e-06, "loss": 1.5768, "step": 12980 }, { "epoch": 0.3828689370485036, "grad_norm": 3.3608929933547187, "learning_rate": 7.482587110694117e-06, "loss": 1.4752, "step": 12985 }, { "epoch": 0.38301636444051307, "grad_norm": 3.469061812321119, "learning_rate": 7.48252513620069e-06, "loss": 1.5805, "step": 12990 }, { "epoch": 0.38316379183252247, "grad_norm": 3.2723660268467656, "learning_rate": 7.4824630518734475e-06, "loss": 1.5223, "step": 12995 }, { "epoch": 0.38331121922453193, "grad_norm": 3.601115850021539, "learning_rate": 7.482400857714218e-06, "loss": 1.4511, "step": 13000 }, { "epoch": 0.38331121922453193, "eval_loss": 1.2228535413742065, "eval_runtime": 4.2549, "eval_samples_per_second": 93.069, "eval_steps_per_second": 3.055, "step": 13000 }, { "epoch": 0.38345864661654133, "grad_norm": 3.4630666385216964, "learning_rate": 7.48233855372483e-06, "loss": 1.533, "step": 13005 }, { "epoch": 0.3836060740085508, "grad_norm": 3.505207757247327, "learning_rate": 7.48227613990712e-06, "loss": 1.5658, "step": 13010 }, { "epoch": 0.38375350140056025, "grad_norm": 3.530710415757165, "learning_rate": 7.482213616262922e-06, "loss": 1.5602, "step": 13015 }, { "epoch": 0.38390092879256965, "grad_norm": 3.978209724659967, "learning_rate": 7.482150982794078e-06, "loss": 1.5334, "step": 13020 }, { "epoch": 0.3840483561845791, "grad_norm": 3.26872300532909, "learning_rate": 7.482088239502427e-06, "loss": 1.5581, "step": 13025 }, { "epoch": 0.3841957835765885, "grad_norm": 3.3720717846857937, "learning_rate": 7.48202538638982e-06, "loss": 1.5448, "step": 13030 }, { "epoch": 0.38434321096859797, "grad_norm": 3.416182959759139, "learning_rate": 7.4819624234581045e-06, "loss": 1.519, "step": 13035 }, { "epoch": 0.38449063836060743, "grad_norm": 3.409260219999777, "learning_rate": 7.4818993507091325e-06, "loss": 1.515, "step": 13040 }, { "epoch": 0.38463806575261683, "grad_norm": 3.4258795272568427, "learning_rate": 7.48183616814476e-06, "loss": 1.5383, "step": 13045 }, { "epoch": 0.3847854931446263, "grad_norm": 3.3263790013772256, "learning_rate": 7.481772875766848e-06, "loss": 1.5208, "step": 13050 }, { "epoch": 0.3849329205366357, "grad_norm": 3.5410929095597417, "learning_rate": 7.481709473577258e-06, "loss": 1.5238, "step": 13055 }, { "epoch": 0.38508034792864515, "grad_norm": 3.536165132133658, "learning_rate": 7.481645961577855e-06, "loss": 1.5578, "step": 13060 }, { "epoch": 0.38522777532065455, "grad_norm": 3.494309594811533, "learning_rate": 7.481582339770509e-06, "loss": 1.5346, "step": 13065 }, { "epoch": 0.385375202712664, "grad_norm": 3.5671234903880498, "learning_rate": 7.481518608157091e-06, "loss": 1.5701, "step": 13070 }, { "epoch": 0.38552263010467347, "grad_norm": 3.33560954602345, "learning_rate": 7.4814547667394774e-06, "loss": 1.4974, "step": 13075 }, { "epoch": 0.38567005749668287, "grad_norm": 3.4820337439865052, "learning_rate": 7.4813908155195474e-06, "loss": 1.5019, "step": 13080 }, { "epoch": 0.38581748488869233, "grad_norm": 3.5787986170710537, "learning_rate": 7.48132675449918e-06, "loss": 1.5408, "step": 13085 }, { "epoch": 0.38596491228070173, "grad_norm": 3.3796150705046575, "learning_rate": 7.481262583680263e-06, "loss": 1.5191, "step": 13090 }, { "epoch": 0.3861123396727112, "grad_norm": 3.385657730725355, "learning_rate": 7.481198303064684e-06, "loss": 1.4854, "step": 13095 }, { "epoch": 0.38625976706472065, "grad_norm": 3.366201591415335, "learning_rate": 7.481133912654334e-06, "loss": 1.5189, "step": 13100 }, { "epoch": 0.38640719445673005, "grad_norm": 3.468910317815596, "learning_rate": 7.481069412451108e-06, "loss": 1.5347, "step": 13105 }, { "epoch": 0.3865546218487395, "grad_norm": 3.3606306968633373, "learning_rate": 7.481004802456904e-06, "loss": 1.5246, "step": 13110 }, { "epoch": 0.3867020492407489, "grad_norm": 3.4489971426769057, "learning_rate": 7.480940082673624e-06, "loss": 1.5939, "step": 13115 }, { "epoch": 0.38684947663275837, "grad_norm": 3.6412854518683595, "learning_rate": 7.480875253103172e-06, "loss": 1.5867, "step": 13120 }, { "epoch": 0.38699690402476783, "grad_norm": 3.2059644138684074, "learning_rate": 7.480810313747456e-06, "loss": 1.4987, "step": 13125 }, { "epoch": 0.38714433141677723, "grad_norm": 3.2352363547626526, "learning_rate": 7.480745264608384e-06, "loss": 1.5383, "step": 13130 }, { "epoch": 0.3872917588087867, "grad_norm": 3.4660557763812316, "learning_rate": 7.480680105687874e-06, "loss": 1.5433, "step": 13135 }, { "epoch": 0.3874391862007961, "grad_norm": 3.373642228091164, "learning_rate": 7.480614836987841e-06, "loss": 1.5354, "step": 13140 }, { "epoch": 0.38758661359280555, "grad_norm": 3.4759580955805864, "learning_rate": 7.480549458510208e-06, "loss": 1.5226, "step": 13145 }, { "epoch": 0.38773404098481495, "grad_norm": 3.4520244978138996, "learning_rate": 7.480483970256897e-06, "loss": 1.5812, "step": 13150 }, { "epoch": 0.3878814683768244, "grad_norm": 3.4102780253850615, "learning_rate": 7.480418372229835e-06, "loss": 1.5813, "step": 13155 }, { "epoch": 0.38802889576883387, "grad_norm": 3.2590010539820797, "learning_rate": 7.480352664430952e-06, "loss": 1.482, "step": 13160 }, { "epoch": 0.3881763231608433, "grad_norm": 3.256829444515701, "learning_rate": 7.480286846862184e-06, "loss": 1.537, "step": 13165 }, { "epoch": 0.38832375055285273, "grad_norm": 3.7034879852082323, "learning_rate": 7.480220919525464e-06, "loss": 1.5926, "step": 13170 }, { "epoch": 0.38847117794486213, "grad_norm": 3.39682062525005, "learning_rate": 7.480154882422735e-06, "loss": 1.5549, "step": 13175 }, { "epoch": 0.3886186053368716, "grad_norm": 4.32018463739763, "learning_rate": 7.480088735555939e-06, "loss": 1.5098, "step": 13180 }, { "epoch": 0.38876603272888105, "grad_norm": 3.363830745320937, "learning_rate": 7.480022478927023e-06, "loss": 1.5236, "step": 13185 }, { "epoch": 0.38891346012089045, "grad_norm": 3.495040205233331, "learning_rate": 7.479956112537936e-06, "loss": 1.5495, "step": 13190 }, { "epoch": 0.3890608875128999, "grad_norm": 3.3428160130286706, "learning_rate": 7.479889636390631e-06, "loss": 1.542, "step": 13195 }, { "epoch": 0.3892083149049093, "grad_norm": 3.2825128905444547, "learning_rate": 7.4798230504870655e-06, "loss": 1.5449, "step": 13200 }, { "epoch": 0.38935574229691877, "grad_norm": 3.204418992196148, "learning_rate": 7.479756354829197e-06, "loss": 1.5169, "step": 13205 }, { "epoch": 0.38950316968892823, "grad_norm": 3.5636839050083693, "learning_rate": 7.479689549418988e-06, "loss": 1.4964, "step": 13210 }, { "epoch": 0.38965059708093763, "grad_norm": 3.536129909409823, "learning_rate": 7.479622634258405e-06, "loss": 1.5272, "step": 13215 }, { "epoch": 0.3897980244729471, "grad_norm": 3.4036890933498625, "learning_rate": 7.4795556093494195e-06, "loss": 1.5437, "step": 13220 }, { "epoch": 0.3899454518649565, "grad_norm": 3.220995287161542, "learning_rate": 7.479488474694e-06, "loss": 1.5447, "step": 13225 }, { "epoch": 0.39009287925696595, "grad_norm": 3.468170156017405, "learning_rate": 7.479421230294123e-06, "loss": 1.5597, "step": 13230 }, { "epoch": 0.39024030664897535, "grad_norm": 3.3691033340587824, "learning_rate": 7.4793538761517696e-06, "loss": 1.4938, "step": 13235 }, { "epoch": 0.3903877340409848, "grad_norm": 3.2018130415159507, "learning_rate": 7.479286412268918e-06, "loss": 1.5007, "step": 13240 }, { "epoch": 0.39053516143299427, "grad_norm": 3.3294888966969345, "learning_rate": 7.479218838647556e-06, "loss": 1.5161, "step": 13245 }, { "epoch": 0.3906825888250037, "grad_norm": 3.7049180045316885, "learning_rate": 7.479151155289671e-06, "loss": 1.5569, "step": 13250 }, { "epoch": 0.39083001621701313, "grad_norm": 3.4333253716772334, "learning_rate": 7.479083362197255e-06, "loss": 1.5344, "step": 13255 }, { "epoch": 0.39097744360902253, "grad_norm": 3.3886927771428668, "learning_rate": 7.479015459372303e-06, "loss": 1.5483, "step": 13260 }, { "epoch": 0.391124871001032, "grad_norm": 3.2326143516769714, "learning_rate": 7.478947446816814e-06, "loss": 1.5016, "step": 13265 }, { "epoch": 0.39127229839304145, "grad_norm": 3.246497186142251, "learning_rate": 7.478879324532787e-06, "loss": 1.5389, "step": 13270 }, { "epoch": 0.39141972578505085, "grad_norm": 3.295856290755383, "learning_rate": 7.478811092522228e-06, "loss": 1.5558, "step": 13275 }, { "epoch": 0.3915671531770603, "grad_norm": 3.3485279907308785, "learning_rate": 7.4787427507871455e-06, "loss": 1.5231, "step": 13280 }, { "epoch": 0.3917145805690697, "grad_norm": 3.3462214185079597, "learning_rate": 7.47867429932955e-06, "loss": 1.4729, "step": 13285 }, { "epoch": 0.3918620079610792, "grad_norm": 3.396118796261197, "learning_rate": 7.4786057381514555e-06, "loss": 1.4939, "step": 13290 }, { "epoch": 0.39200943535308863, "grad_norm": 3.340775827207737, "learning_rate": 7.4785370672548785e-06, "loss": 1.5928, "step": 13295 }, { "epoch": 0.39215686274509803, "grad_norm": 3.526460662108266, "learning_rate": 7.478468286641842e-06, "loss": 1.5147, "step": 13300 }, { "epoch": 0.3923042901371075, "grad_norm": 3.5676277623506683, "learning_rate": 7.478399396314368e-06, "loss": 1.5214, "step": 13305 }, { "epoch": 0.3924517175291169, "grad_norm": 3.5020356839617635, "learning_rate": 7.478330396274484e-06, "loss": 1.5522, "step": 13310 }, { "epoch": 0.39259914492112635, "grad_norm": 3.3686959023580347, "learning_rate": 7.478261286524222e-06, "loss": 1.5547, "step": 13315 }, { "epoch": 0.39274657231313576, "grad_norm": 3.32887849455298, "learning_rate": 7.478192067065613e-06, "loss": 1.5138, "step": 13320 }, { "epoch": 0.3928939997051452, "grad_norm": 3.096055228907842, "learning_rate": 7.478122737900696e-06, "loss": 1.4685, "step": 13325 }, { "epoch": 0.39304142709715467, "grad_norm": 3.4290597401732756, "learning_rate": 7.478053299031511e-06, "loss": 1.5231, "step": 13330 }, { "epoch": 0.3931888544891641, "grad_norm": 3.348211588852573, "learning_rate": 7.4779837504601e-06, "loss": 1.5131, "step": 13335 }, { "epoch": 0.39333628188117353, "grad_norm": 3.4339458053933463, "learning_rate": 7.47791409218851e-06, "loss": 1.4867, "step": 13340 }, { "epoch": 0.39348370927318294, "grad_norm": 3.4517650319905595, "learning_rate": 7.477844324218791e-06, "loss": 1.5749, "step": 13345 }, { "epoch": 0.3936311366651924, "grad_norm": 3.569592999785816, "learning_rate": 7.477774446552996e-06, "loss": 1.5878, "step": 13350 }, { "epoch": 0.39377856405720185, "grad_norm": 3.3406363076288854, "learning_rate": 7.477704459193182e-06, "loss": 1.5015, "step": 13355 }, { "epoch": 0.39392599144921125, "grad_norm": 3.2681489499994107, "learning_rate": 7.477634362141407e-06, "loss": 1.5394, "step": 13360 }, { "epoch": 0.3940734188412207, "grad_norm": 3.611163460294444, "learning_rate": 7.477564155399734e-06, "loss": 1.5168, "step": 13365 }, { "epoch": 0.3942208462332301, "grad_norm": 3.4709610154018664, "learning_rate": 7.47749383897023e-06, "loss": 1.5127, "step": 13370 }, { "epoch": 0.3943682736252396, "grad_norm": 3.283792622096102, "learning_rate": 7.477423412854963e-06, "loss": 1.524, "step": 13375 }, { "epoch": 0.39451570101724903, "grad_norm": 3.4591580985897807, "learning_rate": 7.4773528770560065e-06, "loss": 1.5706, "step": 13380 }, { "epoch": 0.39466312840925843, "grad_norm": 3.3602379474541766, "learning_rate": 7.477282231575435e-06, "loss": 1.5504, "step": 13385 }, { "epoch": 0.3948105558012679, "grad_norm": 3.4674119273921318, "learning_rate": 7.477211476415329e-06, "loss": 1.5155, "step": 13390 }, { "epoch": 0.3949579831932773, "grad_norm": 3.3830513520725436, "learning_rate": 7.477140611577767e-06, "loss": 1.5388, "step": 13395 }, { "epoch": 0.39510541058528675, "grad_norm": 3.271505705173392, "learning_rate": 7.477069637064838e-06, "loss": 1.5454, "step": 13400 }, { "epoch": 0.39525283797729616, "grad_norm": 3.1401266425849594, "learning_rate": 7.4769985528786285e-06, "loss": 1.5216, "step": 13405 }, { "epoch": 0.3954002653693056, "grad_norm": 3.192270939577187, "learning_rate": 7.47692735902123e-06, "loss": 1.4533, "step": 13410 }, { "epoch": 0.3955476927613151, "grad_norm": 3.317288823133087, "learning_rate": 7.47685605549474e-06, "loss": 1.5285, "step": 13415 }, { "epoch": 0.3956951201533245, "grad_norm": 3.673405146513781, "learning_rate": 7.476784642301253e-06, "loss": 1.5108, "step": 13420 }, { "epoch": 0.39584254754533393, "grad_norm": 3.4920546376580086, "learning_rate": 7.476713119442874e-06, "loss": 1.531, "step": 13425 }, { "epoch": 0.39598997493734334, "grad_norm": 3.4788481449303257, "learning_rate": 7.476641486921706e-06, "loss": 1.5449, "step": 13430 }, { "epoch": 0.3961374023293528, "grad_norm": 3.2808625488621947, "learning_rate": 7.476569744739856e-06, "loss": 1.5095, "step": 13435 }, { "epoch": 0.39628482972136225, "grad_norm": 3.2587182451384202, "learning_rate": 7.476497892899437e-06, "loss": 1.6062, "step": 13440 }, { "epoch": 0.39643225711337166, "grad_norm": 3.6191091169017033, "learning_rate": 7.476425931402561e-06, "loss": 1.5798, "step": 13445 }, { "epoch": 0.3965796845053811, "grad_norm": 3.327633683703285, "learning_rate": 7.476353860251348e-06, "loss": 1.5028, "step": 13450 }, { "epoch": 0.3967271118973905, "grad_norm": 6.647781882998038, "learning_rate": 7.476281679447917e-06, "loss": 1.5504, "step": 13455 }, { "epoch": 0.3968745392894, "grad_norm": 3.6480706979486013, "learning_rate": 7.476209388994392e-06, "loss": 1.5828, "step": 13460 }, { "epoch": 0.39702196668140943, "grad_norm": 3.297021334584016, "learning_rate": 7.476136988892902e-06, "loss": 1.507, "step": 13465 }, { "epoch": 0.39716939407341884, "grad_norm": 3.340835877960588, "learning_rate": 7.476064479145576e-06, "loss": 1.5843, "step": 13470 }, { "epoch": 0.3973168214654283, "grad_norm": 3.3315418777791788, "learning_rate": 7.475991859754547e-06, "loss": 1.4808, "step": 13475 }, { "epoch": 0.3974642488574377, "grad_norm": 3.2420626620112203, "learning_rate": 7.4759191307219546e-06, "loss": 1.5086, "step": 13480 }, { "epoch": 0.39761167624944715, "grad_norm": 3.3922105695647344, "learning_rate": 7.475846292049936e-06, "loss": 1.5708, "step": 13485 }, { "epoch": 0.39775910364145656, "grad_norm": 3.2219504311090144, "learning_rate": 7.475773343740636e-06, "loss": 1.5417, "step": 13490 }, { "epoch": 0.397906531033466, "grad_norm": 3.2234181910022897, "learning_rate": 7.475700285796201e-06, "loss": 1.5222, "step": 13495 }, { "epoch": 0.3980539584254755, "grad_norm": 3.3756648100053974, "learning_rate": 7.4756271182187805e-06, "loss": 1.5157, "step": 13500 }, { "epoch": 0.3980539584254755, "eval_loss": 1.2188664674758911, "eval_runtime": 4.1798, "eval_samples_per_second": 94.742, "eval_steps_per_second": 3.11, "step": 13500 }, { "epoch": 0.3982013858174849, "grad_norm": 3.3945937457484394, "learning_rate": 7.475553841010529e-06, "loss": 1.5299, "step": 13505 }, { "epoch": 0.39834881320949433, "grad_norm": 3.2085717522146955, "learning_rate": 7.4754804541736005e-06, "loss": 1.5171, "step": 13510 }, { "epoch": 0.39849624060150374, "grad_norm": 3.2329879719521157, "learning_rate": 7.475406957710156e-06, "loss": 1.5318, "step": 13515 }, { "epoch": 0.3986436679935132, "grad_norm": 3.3498523285015764, "learning_rate": 7.475333351622357e-06, "loss": 1.5739, "step": 13520 }, { "epoch": 0.39879109538552265, "grad_norm": 3.3331072515976943, "learning_rate": 7.475259635912372e-06, "loss": 1.5448, "step": 13525 }, { "epoch": 0.39893852277753206, "grad_norm": 3.3704604466225225, "learning_rate": 7.4751858105823665e-06, "loss": 1.561, "step": 13530 }, { "epoch": 0.3990859501695415, "grad_norm": 3.33017234776542, "learning_rate": 7.475111875634516e-06, "loss": 1.5451, "step": 13535 }, { "epoch": 0.3992333775615509, "grad_norm": 3.169262751007958, "learning_rate": 7.475037831070995e-06, "loss": 1.5587, "step": 13540 }, { "epoch": 0.3993808049535604, "grad_norm": 3.4321281301771567, "learning_rate": 7.474963676893982e-06, "loss": 1.5906, "step": 13545 }, { "epoch": 0.39952823234556983, "grad_norm": 3.4674180320107184, "learning_rate": 7.474889413105659e-06, "loss": 1.5285, "step": 13550 }, { "epoch": 0.39967565973757924, "grad_norm": 3.3994580910210392, "learning_rate": 7.474815039708211e-06, "loss": 1.5037, "step": 13555 }, { "epoch": 0.3998230871295887, "grad_norm": 3.1906886583734795, "learning_rate": 7.4747405567038276e-06, "loss": 1.4954, "step": 13560 }, { "epoch": 0.3999705145215981, "grad_norm": 3.5649562223619, "learning_rate": 7.474665964094702e-06, "loss": 1.5619, "step": 13565 }, { "epoch": 0.40011794191360756, "grad_norm": 3.3855986631339245, "learning_rate": 7.474591261883025e-06, "loss": 1.5459, "step": 13570 }, { "epoch": 0.40026536930561696, "grad_norm": 3.3811855546343934, "learning_rate": 7.474516450070999e-06, "loss": 1.5634, "step": 13575 }, { "epoch": 0.4004127966976264, "grad_norm": 3.2532622237926767, "learning_rate": 7.474441528660821e-06, "loss": 1.5669, "step": 13580 }, { "epoch": 0.4005602240896359, "grad_norm": 4.069412957504052, "learning_rate": 7.4743664976547e-06, "loss": 1.4568, "step": 13585 }, { "epoch": 0.4007076514816453, "grad_norm": 3.4051286043789597, "learning_rate": 7.474291357054842e-06, "loss": 1.5524, "step": 13590 }, { "epoch": 0.40085507887365474, "grad_norm": 3.2796755404896736, "learning_rate": 7.474216106863457e-06, "loss": 1.5214, "step": 13595 }, { "epoch": 0.40100250626566414, "grad_norm": 4.607669255134082, "learning_rate": 7.474140747082762e-06, "loss": 1.5169, "step": 13600 }, { "epoch": 0.4011499336576736, "grad_norm": 3.439340152303973, "learning_rate": 7.4740652777149715e-06, "loss": 1.5124, "step": 13605 }, { "epoch": 0.40129736104968305, "grad_norm": 3.3912724774526444, "learning_rate": 7.473989698762309e-06, "loss": 1.541, "step": 13610 }, { "epoch": 0.40144478844169246, "grad_norm": 3.3780430042373917, "learning_rate": 7.473914010226996e-06, "loss": 1.555, "step": 13615 }, { "epoch": 0.4015922158337019, "grad_norm": 3.366496728090895, "learning_rate": 7.473838212111262e-06, "loss": 1.5039, "step": 13620 }, { "epoch": 0.4017396432257113, "grad_norm": 3.2456253531230526, "learning_rate": 7.473762304417336e-06, "loss": 1.4894, "step": 13625 }, { "epoch": 0.4018870706177208, "grad_norm": 4.005041716849575, "learning_rate": 7.4736862871474515e-06, "loss": 1.5587, "step": 13630 }, { "epoch": 0.40203449800973023, "grad_norm": 3.2859404183970415, "learning_rate": 7.473610160303847e-06, "loss": 1.5243, "step": 13635 }, { "epoch": 0.40218192540173964, "grad_norm": 3.4282647999955174, "learning_rate": 7.473533923888761e-06, "loss": 1.4572, "step": 13640 }, { "epoch": 0.4023293527937491, "grad_norm": 3.478942797510351, "learning_rate": 7.4734575779044375e-06, "loss": 1.5429, "step": 13645 }, { "epoch": 0.4024767801857585, "grad_norm": 3.371144046114234, "learning_rate": 7.473381122353123e-06, "loss": 1.5658, "step": 13650 }, { "epoch": 0.40262420757776796, "grad_norm": 3.542011752240271, "learning_rate": 7.473304557237068e-06, "loss": 1.5105, "step": 13655 }, { "epoch": 0.40277163496977736, "grad_norm": 3.419685530752677, "learning_rate": 7.473227882558525e-06, "loss": 1.4989, "step": 13660 }, { "epoch": 0.4029190623617868, "grad_norm": 3.357591261665918, "learning_rate": 7.473151098319749e-06, "loss": 1.5176, "step": 13665 }, { "epoch": 0.4030664897537963, "grad_norm": 3.486286298553994, "learning_rate": 7.473074204523001e-06, "loss": 1.5301, "step": 13670 }, { "epoch": 0.4032139171458057, "grad_norm": 3.4511843934991484, "learning_rate": 7.472997201170543e-06, "loss": 1.5341, "step": 13675 }, { "epoch": 0.40336134453781514, "grad_norm": 3.1888919343042907, "learning_rate": 7.472920088264642e-06, "loss": 1.4942, "step": 13680 }, { "epoch": 0.40350877192982454, "grad_norm": 3.546604278008118, "learning_rate": 7.472842865807565e-06, "loss": 1.536, "step": 13685 }, { "epoch": 0.403656199321834, "grad_norm": 3.13159502822004, "learning_rate": 7.472765533801586e-06, "loss": 1.5533, "step": 13690 }, { "epoch": 0.40380362671384346, "grad_norm": 3.298686836086956, "learning_rate": 7.472688092248981e-06, "loss": 1.5083, "step": 13695 }, { "epoch": 0.40395105410585286, "grad_norm": 3.275597237280035, "learning_rate": 7.4726105411520274e-06, "loss": 1.4903, "step": 13700 }, { "epoch": 0.4040984814978623, "grad_norm": 3.2105665768310927, "learning_rate": 7.472532880513009e-06, "loss": 1.5281, "step": 13705 }, { "epoch": 0.4042459088898717, "grad_norm": 3.3907419672141352, "learning_rate": 7.472455110334209e-06, "loss": 1.493, "step": 13710 }, { "epoch": 0.4043933362818812, "grad_norm": 3.165892241751782, "learning_rate": 7.472377230617916e-06, "loss": 1.5132, "step": 13715 }, { "epoch": 0.40454076367389064, "grad_norm": 3.5099960100010836, "learning_rate": 7.472299241366423e-06, "loss": 1.5181, "step": 13720 }, { "epoch": 0.40468819106590004, "grad_norm": 3.4215371820136333, "learning_rate": 7.472221142582024e-06, "loss": 1.5223, "step": 13725 }, { "epoch": 0.4048356184579095, "grad_norm": 3.3858238673413017, "learning_rate": 7.472142934267019e-06, "loss": 1.5629, "step": 13730 }, { "epoch": 0.4049830458499189, "grad_norm": 3.4132318393318486, "learning_rate": 7.472064616423707e-06, "loss": 1.5157, "step": 13735 }, { "epoch": 0.40513047324192836, "grad_norm": 3.4312894332433643, "learning_rate": 7.471986189054393e-06, "loss": 1.5195, "step": 13740 }, { "epoch": 0.40527790063393776, "grad_norm": 3.213333972883759, "learning_rate": 7.471907652161386e-06, "loss": 1.5624, "step": 13745 }, { "epoch": 0.4054253280259472, "grad_norm": 3.2165063793313164, "learning_rate": 7.471829005746996e-06, "loss": 1.512, "step": 13750 }, { "epoch": 0.4055727554179567, "grad_norm": 3.466297234395691, "learning_rate": 7.471750249813538e-06, "loss": 1.5357, "step": 13755 }, { "epoch": 0.4057201828099661, "grad_norm": 3.350389598075034, "learning_rate": 7.471671384363329e-06, "loss": 1.5156, "step": 13760 }, { "epoch": 0.40586761020197554, "grad_norm": 3.175331883137094, "learning_rate": 7.4715924093986896e-06, "loss": 1.4905, "step": 13765 }, { "epoch": 0.40601503759398494, "grad_norm": 3.29869047064406, "learning_rate": 7.4715133249219445e-06, "loss": 1.551, "step": 13770 }, { "epoch": 0.4061624649859944, "grad_norm": 3.6044878705811154, "learning_rate": 7.47143413093542e-06, "loss": 1.5073, "step": 13775 }, { "epoch": 0.40630989237800386, "grad_norm": 3.3325185592865103, "learning_rate": 7.471354827441446e-06, "loss": 1.4579, "step": 13780 }, { "epoch": 0.40645731977001326, "grad_norm": 3.343840556038477, "learning_rate": 7.471275414442358e-06, "loss": 1.4902, "step": 13785 }, { "epoch": 0.4066047471620227, "grad_norm": 3.3273939935308894, "learning_rate": 7.471195891940491e-06, "loss": 1.5302, "step": 13790 }, { "epoch": 0.4067521745540321, "grad_norm": 3.2138357496461287, "learning_rate": 7.4711162599381865e-06, "loss": 1.5649, "step": 13795 }, { "epoch": 0.4068996019460416, "grad_norm": 3.175094984263679, "learning_rate": 7.471036518437785e-06, "loss": 1.5369, "step": 13800 }, { "epoch": 0.40704702933805104, "grad_norm": 3.3901017169095495, "learning_rate": 7.470956667441637e-06, "loss": 1.5193, "step": 13805 }, { "epoch": 0.40719445673006044, "grad_norm": 3.393058191249385, "learning_rate": 7.4708767069520895e-06, "loss": 1.4999, "step": 13810 }, { "epoch": 0.4073418841220699, "grad_norm": 3.198008456714863, "learning_rate": 7.470796636971497e-06, "loss": 1.5245, "step": 13815 }, { "epoch": 0.4074893115140793, "grad_norm": 3.4288325475335104, "learning_rate": 7.470716457502214e-06, "loss": 1.5924, "step": 13820 }, { "epoch": 0.40763673890608876, "grad_norm": 3.4003656872114743, "learning_rate": 7.4706361685466005e-06, "loss": 1.5082, "step": 13825 }, { "epoch": 0.40778416629809816, "grad_norm": 3.3335028492782453, "learning_rate": 7.47055577010702e-06, "loss": 1.5504, "step": 13830 }, { "epoch": 0.4079315936901076, "grad_norm": 3.468885725729742, "learning_rate": 7.470475262185836e-06, "loss": 1.5065, "step": 13835 }, { "epoch": 0.4080790210821171, "grad_norm": 3.3142584701174194, "learning_rate": 7.470394644785419e-06, "loss": 1.5393, "step": 13840 }, { "epoch": 0.4082264484741265, "grad_norm": 3.1832302018521976, "learning_rate": 7.470313917908143e-06, "loss": 1.539, "step": 13845 }, { "epoch": 0.40837387586613594, "grad_norm": 3.4581742365737753, "learning_rate": 7.4702330815563805e-06, "loss": 1.5164, "step": 13850 }, { "epoch": 0.40852130325814534, "grad_norm": 3.4497579072536504, "learning_rate": 7.470152135732512e-06, "loss": 1.5871, "step": 13855 }, { "epoch": 0.4086687306501548, "grad_norm": 3.465143095879969, "learning_rate": 7.470071080438919e-06, "loss": 1.5367, "step": 13860 }, { "epoch": 0.40881615804216426, "grad_norm": 3.3289955978352372, "learning_rate": 7.469989915677986e-06, "loss": 1.4934, "step": 13865 }, { "epoch": 0.40896358543417366, "grad_norm": 3.094519346522396, "learning_rate": 7.4699086414521036e-06, "loss": 1.4941, "step": 13870 }, { "epoch": 0.4091110128261831, "grad_norm": 3.2845205521367054, "learning_rate": 7.46982725776366e-06, "loss": 1.5178, "step": 13875 }, { "epoch": 0.4092584402181925, "grad_norm": 3.2200100361455575, "learning_rate": 7.469745764615052e-06, "loss": 1.5518, "step": 13880 }, { "epoch": 0.409405867610202, "grad_norm": 3.2125811593503286, "learning_rate": 7.469664162008679e-06, "loss": 1.5187, "step": 13885 }, { "epoch": 0.40955329500221144, "grad_norm": 3.419122480688375, "learning_rate": 7.4695824499469386e-06, "loss": 1.5211, "step": 13890 }, { "epoch": 0.40970072239422084, "grad_norm": 3.389961187854673, "learning_rate": 7.469500628432238e-06, "loss": 1.4936, "step": 13895 }, { "epoch": 0.4098481497862303, "grad_norm": 3.4875753322783454, "learning_rate": 7.469418697466984e-06, "loss": 1.5263, "step": 13900 }, { "epoch": 0.4099955771782397, "grad_norm": 3.282427557209931, "learning_rate": 7.4693366570535885e-06, "loss": 1.4942, "step": 13905 }, { "epoch": 0.41014300457024916, "grad_norm": 3.207461384633382, "learning_rate": 7.4692545071944636e-06, "loss": 1.4851, "step": 13910 }, { "epoch": 0.41029043196225856, "grad_norm": 3.4642166619894037, "learning_rate": 7.469172247892028e-06, "loss": 1.5715, "step": 13915 }, { "epoch": 0.410437859354268, "grad_norm": 3.3261986265072068, "learning_rate": 7.469089879148704e-06, "loss": 1.5184, "step": 13920 }, { "epoch": 0.4105852867462775, "grad_norm": 3.6330117048072452, "learning_rate": 7.4690074009669115e-06, "loss": 1.5149, "step": 13925 }, { "epoch": 0.4107327141382869, "grad_norm": 3.4372202909717156, "learning_rate": 7.46892481334908e-06, "loss": 1.5029, "step": 13930 }, { "epoch": 0.41088014153029634, "grad_norm": 3.2002919614383845, "learning_rate": 7.46884211629764e-06, "loss": 1.5629, "step": 13935 }, { "epoch": 0.41102756892230574, "grad_norm": 3.2870673354787203, "learning_rate": 7.4687593098150245e-06, "loss": 1.5501, "step": 13940 }, { "epoch": 0.4111749963143152, "grad_norm": 3.2464580193200137, "learning_rate": 7.468676393903669e-06, "loss": 1.5289, "step": 13945 }, { "epoch": 0.41132242370632466, "grad_norm": 3.348346641884142, "learning_rate": 7.468593368566014e-06, "loss": 1.4766, "step": 13950 }, { "epoch": 0.41146985109833406, "grad_norm": 3.5795642746481753, "learning_rate": 7.468510233804504e-06, "loss": 1.5344, "step": 13955 }, { "epoch": 0.4116172784903435, "grad_norm": 3.3812884853521994, "learning_rate": 7.468426989621584e-06, "loss": 1.5297, "step": 13960 }, { "epoch": 0.4117647058823529, "grad_norm": 3.43258844675811, "learning_rate": 7.468343636019703e-06, "loss": 1.5176, "step": 13965 }, { "epoch": 0.4119121332743624, "grad_norm": 3.333375128342099, "learning_rate": 7.468260173001316e-06, "loss": 1.524, "step": 13970 }, { "epoch": 0.41205956066637184, "grad_norm": 3.281625005518112, "learning_rate": 7.468176600568877e-06, "loss": 1.4695, "step": 13975 }, { "epoch": 0.41220698805838124, "grad_norm": 3.4169675842779608, "learning_rate": 7.4680929187248455e-06, "loss": 1.4696, "step": 13980 }, { "epoch": 0.4123544154503907, "grad_norm": 3.4753992651796475, "learning_rate": 7.468009127471684e-06, "loss": 1.5567, "step": 13985 }, { "epoch": 0.4125018428424001, "grad_norm": 3.8084407029100547, "learning_rate": 7.467925226811859e-06, "loss": 1.5954, "step": 13990 }, { "epoch": 0.41264927023440956, "grad_norm": 3.5041146621234662, "learning_rate": 7.467841216747839e-06, "loss": 1.4915, "step": 13995 }, { "epoch": 0.41279669762641896, "grad_norm": 3.126457262488052, "learning_rate": 7.4677570972820954e-06, "loss": 1.4941, "step": 14000 }, { "epoch": 0.41279669762641896, "eval_loss": 1.2161051034927368, "eval_runtime": 4.2315, "eval_samples_per_second": 93.584, "eval_steps_per_second": 3.072, "step": 14000 }, { "epoch": 0.4129441250184284, "grad_norm": 3.2285800243777265, "learning_rate": 7.4676728684171035e-06, "loss": 1.5302, "step": 14005 }, { "epoch": 0.4130915524104379, "grad_norm": 3.236524854357448, "learning_rate": 7.467588530155343e-06, "loss": 1.5196, "step": 14010 }, { "epoch": 0.4132389798024473, "grad_norm": 3.2938996442328823, "learning_rate": 7.467504082499296e-06, "loss": 1.4976, "step": 14015 }, { "epoch": 0.41338640719445674, "grad_norm": 3.1703008580464167, "learning_rate": 7.467419525451444e-06, "loss": 1.5425, "step": 14020 }, { "epoch": 0.41353383458646614, "grad_norm": 3.466252866508389, "learning_rate": 7.467334859014279e-06, "loss": 1.5487, "step": 14025 }, { "epoch": 0.4136812619784756, "grad_norm": 3.207058832756397, "learning_rate": 7.467250083190292e-06, "loss": 1.5708, "step": 14030 }, { "epoch": 0.41382868937048506, "grad_norm": 3.1337873755537613, "learning_rate": 7.467165197981975e-06, "loss": 1.4784, "step": 14035 }, { "epoch": 0.41397611676249446, "grad_norm": 3.4891645035352714, "learning_rate": 7.4670802033918285e-06, "loss": 1.5266, "step": 14040 }, { "epoch": 0.4141235441545039, "grad_norm": 3.3510568394440132, "learning_rate": 7.466995099422352e-06, "loss": 1.5328, "step": 14045 }, { "epoch": 0.4142709715465133, "grad_norm": 3.4456512375936588, "learning_rate": 7.46690988607605e-06, "loss": 1.5729, "step": 14050 }, { "epoch": 0.4144183989385228, "grad_norm": 3.351643978423389, "learning_rate": 7.466824563355431e-06, "loss": 1.5185, "step": 14055 }, { "epoch": 0.41456582633053224, "grad_norm": 3.2731227743665356, "learning_rate": 7.466739131263005e-06, "loss": 1.5086, "step": 14060 }, { "epoch": 0.41471325372254164, "grad_norm": 3.5074278520360966, "learning_rate": 7.466653589801286e-06, "loss": 1.5152, "step": 14065 }, { "epoch": 0.4148606811145511, "grad_norm": 3.2480324321204526, "learning_rate": 7.46656793897279e-06, "loss": 1.5235, "step": 14070 }, { "epoch": 0.4150081085065605, "grad_norm": 3.563713349801375, "learning_rate": 7.46648217878004e-06, "loss": 1.5936, "step": 14075 }, { "epoch": 0.41515553589856996, "grad_norm": 3.2193795001146928, "learning_rate": 7.4663963092255575e-06, "loss": 1.5421, "step": 14080 }, { "epoch": 0.41530296329057936, "grad_norm": 3.255328667883466, "learning_rate": 7.46631033031187e-06, "loss": 1.5724, "step": 14085 }, { "epoch": 0.4154503906825888, "grad_norm": 3.238644501359267, "learning_rate": 7.466224242041507e-06, "loss": 1.5366, "step": 14090 }, { "epoch": 0.4155978180745983, "grad_norm": 3.305343960726532, "learning_rate": 7.466138044417003e-06, "loss": 1.4951, "step": 14095 }, { "epoch": 0.4157452454666077, "grad_norm": 5.581109755611932, "learning_rate": 7.4660517374408935e-06, "loss": 1.5585, "step": 14100 }, { "epoch": 0.41589267285861714, "grad_norm": 3.2065845157986517, "learning_rate": 7.465965321115718e-06, "loss": 1.5473, "step": 14105 }, { "epoch": 0.41604010025062654, "grad_norm": 3.2740780556512283, "learning_rate": 7.465878795444021e-06, "loss": 1.5491, "step": 14110 }, { "epoch": 0.416187527642636, "grad_norm": 3.385871894827695, "learning_rate": 7.4657921604283455e-06, "loss": 1.5347, "step": 14115 }, { "epoch": 0.41633495503464546, "grad_norm": 3.2863062391872195, "learning_rate": 7.465705416071244e-06, "loss": 1.4982, "step": 14120 }, { "epoch": 0.41648238242665486, "grad_norm": 3.302845362836085, "learning_rate": 7.4656185623752664e-06, "loss": 1.5639, "step": 14125 }, { "epoch": 0.4166298098186643, "grad_norm": 3.2997386971522875, "learning_rate": 7.465531599342971e-06, "loss": 1.5064, "step": 14130 }, { "epoch": 0.4167772372106737, "grad_norm": 3.27258938161462, "learning_rate": 7.465444526976915e-06, "loss": 1.5349, "step": 14135 }, { "epoch": 0.4169246646026832, "grad_norm": 3.344329232273585, "learning_rate": 7.465357345279661e-06, "loss": 1.506, "step": 14140 }, { "epoch": 0.41707209199469264, "grad_norm": 3.8070477120200574, "learning_rate": 7.465270054253775e-06, "loss": 1.5429, "step": 14145 }, { "epoch": 0.41721951938670204, "grad_norm": 3.215342487485494, "learning_rate": 7.465182653901826e-06, "loss": 1.5636, "step": 14150 }, { "epoch": 0.4173669467787115, "grad_norm": 3.3226137838734813, "learning_rate": 7.465095144226384e-06, "loss": 1.4817, "step": 14155 }, { "epoch": 0.4175143741707209, "grad_norm": 3.190706483148749, "learning_rate": 7.465007525230026e-06, "loss": 1.5539, "step": 14160 }, { "epoch": 0.41766180156273036, "grad_norm": 3.369523063985476, "learning_rate": 7.464919796915329e-06, "loss": 1.5482, "step": 14165 }, { "epoch": 0.41780922895473976, "grad_norm": 3.5489357247579565, "learning_rate": 7.464831959284875e-06, "loss": 1.5473, "step": 14170 }, { "epoch": 0.4179566563467492, "grad_norm": 3.4483915040936655, "learning_rate": 7.464744012341248e-06, "loss": 1.5831, "step": 14175 }, { "epoch": 0.4181040837387587, "grad_norm": 3.6127472365451396, "learning_rate": 7.4646559560870375e-06, "loss": 1.5385, "step": 14180 }, { "epoch": 0.4182515111307681, "grad_norm": 3.3118505224229287, "learning_rate": 7.464567790524834e-06, "loss": 1.4679, "step": 14185 }, { "epoch": 0.41839893852277754, "grad_norm": 3.254191008598484, "learning_rate": 7.464479515657232e-06, "loss": 1.5332, "step": 14190 }, { "epoch": 0.41854636591478694, "grad_norm": 3.1844750866486162, "learning_rate": 7.464391131486828e-06, "loss": 1.5133, "step": 14195 }, { "epoch": 0.4186937933067964, "grad_norm": 3.410771777098903, "learning_rate": 7.4643026380162235e-06, "loss": 1.5494, "step": 14200 }, { "epoch": 0.41884122069880586, "grad_norm": 3.6006405053054555, "learning_rate": 7.464214035248023e-06, "loss": 1.5342, "step": 14205 }, { "epoch": 0.41898864809081526, "grad_norm": 3.475726815934641, "learning_rate": 7.464125323184832e-06, "loss": 1.5159, "step": 14210 }, { "epoch": 0.4191360754828247, "grad_norm": 3.171901747958695, "learning_rate": 7.464036501829264e-06, "loss": 1.519, "step": 14215 }, { "epoch": 0.4192835028748341, "grad_norm": 4.403100178408928, "learning_rate": 7.46394757118393e-06, "loss": 1.5231, "step": 14220 }, { "epoch": 0.4194309302668436, "grad_norm": 3.26413146031754, "learning_rate": 7.463858531251449e-06, "loss": 1.5228, "step": 14225 }, { "epoch": 0.41957835765885304, "grad_norm": 3.3672877161194115, "learning_rate": 7.463769382034438e-06, "loss": 1.5617, "step": 14230 }, { "epoch": 0.41972578505086244, "grad_norm": 3.2472891283140286, "learning_rate": 7.463680123535525e-06, "loss": 1.55, "step": 14235 }, { "epoch": 0.4198732124428719, "grad_norm": 3.9777073639391536, "learning_rate": 7.463590755757332e-06, "loss": 1.5702, "step": 14240 }, { "epoch": 0.4200206398348813, "grad_norm": 3.292171520882462, "learning_rate": 7.46350127870249e-06, "loss": 1.4873, "step": 14245 }, { "epoch": 0.42016806722689076, "grad_norm": 3.2423815197696855, "learning_rate": 7.463411692373633e-06, "loss": 1.5565, "step": 14250 }, { "epoch": 0.42031549461890017, "grad_norm": 3.2795840655737636, "learning_rate": 7.463321996773397e-06, "loss": 1.5596, "step": 14255 }, { "epoch": 0.4204629220109096, "grad_norm": 3.222331508263172, "learning_rate": 7.463232191904421e-06, "loss": 1.5, "step": 14260 }, { "epoch": 0.4206103494029191, "grad_norm": 3.2164262878747376, "learning_rate": 7.463142277769347e-06, "loss": 1.5206, "step": 14265 }, { "epoch": 0.4207577767949285, "grad_norm": 3.3624861827683152, "learning_rate": 7.463052254370823e-06, "loss": 1.5639, "step": 14270 }, { "epoch": 0.42090520418693794, "grad_norm": 3.2705903335065907, "learning_rate": 7.4629621217114955e-06, "loss": 1.483, "step": 14275 }, { "epoch": 0.42105263157894735, "grad_norm": 3.333129689865413, "learning_rate": 7.462871879794018e-06, "loss": 1.4814, "step": 14280 }, { "epoch": 0.4212000589709568, "grad_norm": 3.3754777984015085, "learning_rate": 7.462781528621046e-06, "loss": 1.5261, "step": 14285 }, { "epoch": 0.42134748636296626, "grad_norm": 3.2178980580401997, "learning_rate": 7.462691068195238e-06, "loss": 1.4993, "step": 14290 }, { "epoch": 0.42149491375497566, "grad_norm": 3.4280074576130244, "learning_rate": 7.462600498519255e-06, "loss": 1.5401, "step": 14295 }, { "epoch": 0.4216423411469851, "grad_norm": 3.322517295398224, "learning_rate": 7.462509819595764e-06, "loss": 1.5125, "step": 14300 }, { "epoch": 0.4217897685389945, "grad_norm": 3.284909951972764, "learning_rate": 7.4624190314274315e-06, "loss": 1.5557, "step": 14305 }, { "epoch": 0.421937195931004, "grad_norm": 3.276943004601162, "learning_rate": 7.462328134016931e-06, "loss": 1.4965, "step": 14310 }, { "epoch": 0.42208462332301344, "grad_norm": 3.3232626462616506, "learning_rate": 7.462237127366935e-06, "loss": 1.5029, "step": 14315 }, { "epoch": 0.42223205071502284, "grad_norm": 3.231243384731207, "learning_rate": 7.462146011480122e-06, "loss": 1.4457, "step": 14320 }, { "epoch": 0.4223794781070323, "grad_norm": 3.282283806518266, "learning_rate": 7.462054786359175e-06, "loss": 1.5274, "step": 14325 }, { "epoch": 0.4225269054990417, "grad_norm": 3.207184714287689, "learning_rate": 7.461963452006778e-06, "loss": 1.5348, "step": 14330 }, { "epoch": 0.42267433289105116, "grad_norm": 3.364473689320174, "learning_rate": 7.461872008425617e-06, "loss": 1.5846, "step": 14335 }, { "epoch": 0.42282176028306057, "grad_norm": 3.1969079927701016, "learning_rate": 7.461780455618383e-06, "loss": 1.5445, "step": 14340 }, { "epoch": 0.42296918767507, "grad_norm": 3.348146654076765, "learning_rate": 7.461688793587771e-06, "loss": 1.5334, "step": 14345 }, { "epoch": 0.4231166150670795, "grad_norm": 3.248293600844691, "learning_rate": 7.461597022336477e-06, "loss": 1.5589, "step": 14350 }, { "epoch": 0.4232640424590889, "grad_norm": 3.2498793104505586, "learning_rate": 7.4615051418672046e-06, "loss": 1.5428, "step": 14355 }, { "epoch": 0.42341146985109834, "grad_norm": 3.3397279730966445, "learning_rate": 7.461413152182654e-06, "loss": 1.5526, "step": 14360 }, { "epoch": 0.42355889724310775, "grad_norm": 3.167877120364172, "learning_rate": 7.461321053285534e-06, "loss": 1.5495, "step": 14365 }, { "epoch": 0.4237063246351172, "grad_norm": 3.2358859743607256, "learning_rate": 7.461228845178553e-06, "loss": 1.5113, "step": 14370 }, { "epoch": 0.42385375202712666, "grad_norm": 3.2466493914028214, "learning_rate": 7.461136527864426e-06, "loss": 1.5608, "step": 14375 }, { "epoch": 0.42400117941913607, "grad_norm": 3.418299760866348, "learning_rate": 7.46104410134587e-06, "loss": 1.5452, "step": 14380 }, { "epoch": 0.4241486068111455, "grad_norm": 3.246217727409137, "learning_rate": 7.460951565625603e-06, "loss": 1.5548, "step": 14385 }, { "epoch": 0.4242960342031549, "grad_norm": 3.237622415202046, "learning_rate": 7.4608589207063485e-06, "loss": 1.4639, "step": 14390 }, { "epoch": 0.4244434615951644, "grad_norm": 3.226446785823296, "learning_rate": 7.460766166590833e-06, "loss": 1.58, "step": 14395 }, { "epoch": 0.42459088898717384, "grad_norm": 3.049403906559593, "learning_rate": 7.460673303281786e-06, "loss": 1.5372, "step": 14400 }, { "epoch": 0.42473831637918324, "grad_norm": 2.963963731274923, "learning_rate": 7.460580330781939e-06, "loss": 1.4995, "step": 14405 }, { "epoch": 0.4248857437711927, "grad_norm": 3.428678010744318, "learning_rate": 7.46048724909403e-06, "loss": 1.4883, "step": 14410 }, { "epoch": 0.4250331711632021, "grad_norm": 3.38745686407169, "learning_rate": 7.460394058220796e-06, "loss": 1.5766, "step": 14415 }, { "epoch": 0.42518059855521156, "grad_norm": 3.2063710645159684, "learning_rate": 7.460300758164981e-06, "loss": 1.5131, "step": 14420 }, { "epoch": 0.42532802594722097, "grad_norm": 3.138367220862971, "learning_rate": 7.460207348929327e-06, "loss": 1.5441, "step": 14425 }, { "epoch": 0.4254754533392304, "grad_norm": 3.294106057964088, "learning_rate": 7.460113830516588e-06, "loss": 1.4892, "step": 14430 }, { "epoch": 0.4256228807312399, "grad_norm": 3.1435151622417514, "learning_rate": 7.460020202929512e-06, "loss": 1.4834, "step": 14435 }, { "epoch": 0.4257703081232493, "grad_norm": 3.5833150373390206, "learning_rate": 7.4599264661708554e-06, "loss": 1.5236, "step": 14440 }, { "epoch": 0.42591773551525874, "grad_norm": 3.1279180644855864, "learning_rate": 7.459832620243376e-06, "loss": 1.5458, "step": 14445 }, { "epoch": 0.42606516290726815, "grad_norm": 3.4033484503699847, "learning_rate": 7.459738665149834e-06, "loss": 1.5111, "step": 14450 }, { "epoch": 0.4262125902992776, "grad_norm": 3.1678308448759287, "learning_rate": 7.459644600892998e-06, "loss": 1.5182, "step": 14455 }, { "epoch": 0.42636001769128706, "grad_norm": 3.6038510759504767, "learning_rate": 7.459550427475633e-06, "loss": 1.5641, "step": 14460 }, { "epoch": 0.42650744508329647, "grad_norm": 3.3301009561232235, "learning_rate": 7.45945614490051e-06, "loss": 1.5715, "step": 14465 }, { "epoch": 0.4266548724753059, "grad_norm": 3.3772928489712464, "learning_rate": 7.459361753170406e-06, "loss": 1.5849, "step": 14470 }, { "epoch": 0.4268022998673153, "grad_norm": 3.478390091800879, "learning_rate": 7.459267252288095e-06, "loss": 1.5193, "step": 14475 }, { "epoch": 0.4269497272593248, "grad_norm": 3.4970911760464234, "learning_rate": 7.45917264225636e-06, "loss": 1.5134, "step": 14480 }, { "epoch": 0.42709715465133424, "grad_norm": 3.4375755040425804, "learning_rate": 7.4590779230779846e-06, "loss": 1.5378, "step": 14485 }, { "epoch": 0.42724458204334365, "grad_norm": 3.2592080881565066, "learning_rate": 7.458983094755756e-06, "loss": 1.5026, "step": 14490 }, { "epoch": 0.4273920094353531, "grad_norm": 3.2763459850859893, "learning_rate": 7.458888157292464e-06, "loss": 1.4604, "step": 14495 }, { "epoch": 0.4275394368273625, "grad_norm": 3.3787508678507874, "learning_rate": 7.458793110690905e-06, "loss": 1.5154, "step": 14500 }, { "epoch": 0.4275394368273625, "eval_loss": 1.2132644653320312, "eval_runtime": 4.1858, "eval_samples_per_second": 94.605, "eval_steps_per_second": 3.106, "step": 14500 }, { "epoch": 0.42768686421937196, "grad_norm": 3.3773019534932875, "learning_rate": 7.458697954953872e-06, "loss": 1.4916, "step": 14505 }, { "epoch": 0.42783429161138137, "grad_norm": 3.2402925615732907, "learning_rate": 7.4586026900841665e-06, "loss": 1.5604, "step": 14510 }, { "epoch": 0.4279817190033908, "grad_norm": 3.3414394488642345, "learning_rate": 7.458507316084593e-06, "loss": 1.5139, "step": 14515 }, { "epoch": 0.4281291463954003, "grad_norm": 3.4595658035448995, "learning_rate": 7.458411832957956e-06, "loss": 1.5254, "step": 14520 }, { "epoch": 0.4282765737874097, "grad_norm": 3.1324255491182704, "learning_rate": 7.458316240707067e-06, "loss": 1.5391, "step": 14525 }, { "epoch": 0.42842400117941914, "grad_norm": 3.432790349972317, "learning_rate": 7.458220539334738e-06, "loss": 1.5398, "step": 14530 }, { "epoch": 0.42857142857142855, "grad_norm": 3.2391905034392665, "learning_rate": 7.458124728843786e-06, "loss": 1.5048, "step": 14535 }, { "epoch": 0.428718855963438, "grad_norm": 3.539210278223281, "learning_rate": 7.458028809237029e-06, "loss": 1.4958, "step": 14540 }, { "epoch": 0.42886628335544746, "grad_norm": 3.4025441987773335, "learning_rate": 7.45793278051729e-06, "loss": 1.5879, "step": 14545 }, { "epoch": 0.42901371074745687, "grad_norm": 3.33973701885931, "learning_rate": 7.457836642687395e-06, "loss": 1.565, "step": 14550 }, { "epoch": 0.4291611381394663, "grad_norm": 3.2736132595941116, "learning_rate": 7.457740395750173e-06, "loss": 1.5494, "step": 14555 }, { "epoch": 0.4293085655314757, "grad_norm": 3.3922453756660693, "learning_rate": 7.4576440397084556e-06, "loss": 1.5692, "step": 14560 }, { "epoch": 0.4294559929234852, "grad_norm": 3.5293471292421583, "learning_rate": 7.457547574565079e-06, "loss": 1.5416, "step": 14565 }, { "epoch": 0.42960342031549464, "grad_norm": 3.2926245567610297, "learning_rate": 7.4574510003228825e-06, "loss": 1.492, "step": 14570 }, { "epoch": 0.42975084770750405, "grad_norm": 3.4266939738456, "learning_rate": 7.457354316984705e-06, "loss": 1.5252, "step": 14575 }, { "epoch": 0.4298982750995135, "grad_norm": 3.4147542081319653, "learning_rate": 7.457257524553395e-06, "loss": 1.5306, "step": 14580 }, { "epoch": 0.4300457024915229, "grad_norm": 3.465513182822443, "learning_rate": 7.457160623031798e-06, "loss": 1.4912, "step": 14585 }, { "epoch": 0.43019312988353237, "grad_norm": 3.2651139980663366, "learning_rate": 7.457063612422767e-06, "loss": 1.4894, "step": 14590 }, { "epoch": 0.43034055727554177, "grad_norm": 3.249474250258343, "learning_rate": 7.4569664927291555e-06, "loss": 1.4945, "step": 14595 }, { "epoch": 0.4304879846675512, "grad_norm": 3.400345416338405, "learning_rate": 7.456869263953822e-06, "loss": 1.4912, "step": 14600 }, { "epoch": 0.4306354120595607, "grad_norm": 3.2674215774409308, "learning_rate": 7.456771926099629e-06, "loss": 1.5071, "step": 14605 }, { "epoch": 0.4307828394515701, "grad_norm": 2.990169215331368, "learning_rate": 7.456674479169439e-06, "loss": 1.5377, "step": 14610 }, { "epoch": 0.43093026684357955, "grad_norm": 3.3202240613504936, "learning_rate": 7.4565769231661186e-06, "loss": 1.5252, "step": 14615 }, { "epoch": 0.43107769423558895, "grad_norm": 3.29630969086542, "learning_rate": 7.4564792580925405e-06, "loss": 1.5619, "step": 14620 }, { "epoch": 0.4312251216275984, "grad_norm": 3.5652720368798234, "learning_rate": 7.456381483951578e-06, "loss": 1.5744, "step": 14625 }, { "epoch": 0.43137254901960786, "grad_norm": 3.2500041365078878, "learning_rate": 7.456283600746109e-06, "loss": 1.5264, "step": 14630 }, { "epoch": 0.43151997641161727, "grad_norm": 3.2603757578409476, "learning_rate": 7.456185608479012e-06, "loss": 1.5144, "step": 14635 }, { "epoch": 0.4316674038036267, "grad_norm": 3.2536517102652978, "learning_rate": 7.456087507153171e-06, "loss": 1.5362, "step": 14640 }, { "epoch": 0.43181483119563613, "grad_norm": 3.273537277234294, "learning_rate": 7.455989296771473e-06, "loss": 1.5528, "step": 14645 }, { "epoch": 0.4319622585876456, "grad_norm": 3.092728226861189, "learning_rate": 7.45589097733681e-06, "loss": 1.5351, "step": 14650 }, { "epoch": 0.43210968597965504, "grad_norm": 3.2193016875768232, "learning_rate": 7.4557925488520714e-06, "loss": 1.5481, "step": 14655 }, { "epoch": 0.43225711337166445, "grad_norm": 3.3343634073033463, "learning_rate": 7.4556940113201564e-06, "loss": 1.5131, "step": 14660 }, { "epoch": 0.4324045407636739, "grad_norm": 3.216149290978993, "learning_rate": 7.455595364743963e-06, "loss": 1.4779, "step": 14665 }, { "epoch": 0.4325519681556833, "grad_norm": 3.237564685187472, "learning_rate": 7.455496609126396e-06, "loss": 1.5217, "step": 14670 }, { "epoch": 0.43269939554769277, "grad_norm": 3.3502267297295067, "learning_rate": 7.455397744470358e-06, "loss": 1.5632, "step": 14675 }, { "epoch": 0.43284682293970217, "grad_norm": 3.261801528860382, "learning_rate": 7.455298770778762e-06, "loss": 1.5051, "step": 14680 }, { "epoch": 0.4329942503317116, "grad_norm": 3.249743297096111, "learning_rate": 7.4551996880545175e-06, "loss": 1.5188, "step": 14685 }, { "epoch": 0.4331416777237211, "grad_norm": 3.2338605594612173, "learning_rate": 7.455100496300543e-06, "loss": 1.4903, "step": 14690 }, { "epoch": 0.4332891051157305, "grad_norm": 3.2857119335681766, "learning_rate": 7.455001195519753e-06, "loss": 1.5268, "step": 14695 }, { "epoch": 0.43343653250773995, "grad_norm": 3.491754843156003, "learning_rate": 7.454901785715074e-06, "loss": 1.4895, "step": 14700 }, { "epoch": 0.43358395989974935, "grad_norm": 3.3638727199607334, "learning_rate": 7.45480226688943e-06, "loss": 1.5335, "step": 14705 }, { "epoch": 0.4337313872917588, "grad_norm": 3.4992635281748155, "learning_rate": 7.4547026390457465e-06, "loss": 1.5665, "step": 14710 }, { "epoch": 0.43387881468376827, "grad_norm": 3.2321030310625405, "learning_rate": 7.45460290218696e-06, "loss": 1.4918, "step": 14715 }, { "epoch": 0.43402624207577767, "grad_norm": 3.3919242187057614, "learning_rate": 7.454503056316001e-06, "loss": 1.4896, "step": 14720 }, { "epoch": 0.4341736694677871, "grad_norm": 3.2189579639350367, "learning_rate": 7.4544031014358114e-06, "loss": 1.5356, "step": 14725 }, { "epoch": 0.43432109685979653, "grad_norm": 3.1498074258677367, "learning_rate": 7.454303037549329e-06, "loss": 1.5236, "step": 14730 }, { "epoch": 0.434468524251806, "grad_norm": 3.1919472689899746, "learning_rate": 7.4542028646595e-06, "loss": 1.5079, "step": 14735 }, { "epoch": 0.43461595164381545, "grad_norm": 3.31495340761154, "learning_rate": 7.454102582769272e-06, "loss": 1.5374, "step": 14740 }, { "epoch": 0.43476337903582485, "grad_norm": 3.214157322305166, "learning_rate": 7.454002191881596e-06, "loss": 1.5041, "step": 14745 }, { "epoch": 0.4349108064278343, "grad_norm": 3.244918366786722, "learning_rate": 7.453901691999426e-06, "loss": 1.4896, "step": 14750 }, { "epoch": 0.4350582338198437, "grad_norm": 3.196159087533993, "learning_rate": 7.4538010831257195e-06, "loss": 1.4914, "step": 14755 }, { "epoch": 0.43520566121185317, "grad_norm": 3.2274309891543544, "learning_rate": 7.453700365263436e-06, "loss": 1.4805, "step": 14760 }, { "epoch": 0.43535308860386257, "grad_norm": 3.418368751207219, "learning_rate": 7.4535995384155416e-06, "loss": 1.5379, "step": 14765 }, { "epoch": 0.43550051599587203, "grad_norm": 3.3983961313571838, "learning_rate": 7.453498602585e-06, "loss": 1.5042, "step": 14770 }, { "epoch": 0.4356479433878815, "grad_norm": 3.314358385850596, "learning_rate": 7.4533975577747844e-06, "loss": 1.5232, "step": 14775 }, { "epoch": 0.4357953707798909, "grad_norm": 3.4975359378434026, "learning_rate": 7.453296403987866e-06, "loss": 1.5295, "step": 14780 }, { "epoch": 0.43594279817190035, "grad_norm": 3.338897439038144, "learning_rate": 7.453195141227223e-06, "loss": 1.5478, "step": 14785 }, { "epoch": 0.43609022556390975, "grad_norm": 3.2633524694131384, "learning_rate": 7.453093769495833e-06, "loss": 1.5163, "step": 14790 }, { "epoch": 0.4362376529559192, "grad_norm": 3.4137552962175226, "learning_rate": 7.452992288796682e-06, "loss": 1.5185, "step": 14795 }, { "epoch": 0.43638508034792867, "grad_norm": 3.144315204193428, "learning_rate": 7.4528906991327534e-06, "loss": 1.5216, "step": 14800 }, { "epoch": 0.43653250773993807, "grad_norm": 3.353288865857677, "learning_rate": 7.452789000507038e-06, "loss": 1.5264, "step": 14805 }, { "epoch": 0.4366799351319475, "grad_norm": 3.1962595578205804, "learning_rate": 7.452687192922528e-06, "loss": 1.5016, "step": 14810 }, { "epoch": 0.43682736252395693, "grad_norm": 3.4118094030942117, "learning_rate": 7.45258527638222e-06, "loss": 1.5615, "step": 14815 }, { "epoch": 0.4369747899159664, "grad_norm": 3.409255479685228, "learning_rate": 7.452483250889112e-06, "loss": 1.5883, "step": 14820 }, { "epoch": 0.43712221730797585, "grad_norm": 3.239733591325414, "learning_rate": 7.4523811164462065e-06, "loss": 1.5055, "step": 14825 }, { "epoch": 0.43726964469998525, "grad_norm": 3.456481186355342, "learning_rate": 7.452278873056509e-06, "loss": 1.5283, "step": 14830 }, { "epoch": 0.4374170720919947, "grad_norm": 3.203994929814341, "learning_rate": 7.452176520723027e-06, "loss": 1.5362, "step": 14835 }, { "epoch": 0.4375644994840041, "grad_norm": 3.1157405942398873, "learning_rate": 7.452074059448776e-06, "loss": 1.5222, "step": 14840 }, { "epoch": 0.43771192687601357, "grad_norm": 3.182319016444764, "learning_rate": 7.451971489236766e-06, "loss": 1.5206, "step": 14845 }, { "epoch": 0.43785935426802297, "grad_norm": 3.239144084463073, "learning_rate": 7.451868810090018e-06, "loss": 1.5177, "step": 14850 }, { "epoch": 0.43800678166003243, "grad_norm": 3.203250441938573, "learning_rate": 7.4517660220115545e-06, "loss": 1.5438, "step": 14855 }, { "epoch": 0.4381542090520419, "grad_norm": 3.3351063358767146, "learning_rate": 7.451663125004398e-06, "loss": 1.4893, "step": 14860 }, { "epoch": 0.4383016364440513, "grad_norm": 3.182406873347497, "learning_rate": 7.4515601190715775e-06, "loss": 1.5482, "step": 14865 }, { "epoch": 0.43844906383606075, "grad_norm": 3.280530796539232, "learning_rate": 7.451457004216124e-06, "loss": 1.5305, "step": 14870 }, { "epoch": 0.43859649122807015, "grad_norm": 3.0315552265792354, "learning_rate": 7.45135378044107e-06, "loss": 1.5156, "step": 14875 }, { "epoch": 0.4387439186200796, "grad_norm": 3.2432830075858106, "learning_rate": 7.451250447749456e-06, "loss": 1.4874, "step": 14880 }, { "epoch": 0.43889134601208907, "grad_norm": 3.1259612838307067, "learning_rate": 7.451147006144322e-06, "loss": 1.5171, "step": 14885 }, { "epoch": 0.43903877340409847, "grad_norm": 3.553827618409663, "learning_rate": 7.4510434556287094e-06, "loss": 1.5199, "step": 14890 }, { "epoch": 0.43918620079610793, "grad_norm": 3.747792210245014, "learning_rate": 7.450939796205667e-06, "loss": 1.5209, "step": 14895 }, { "epoch": 0.43933362818811733, "grad_norm": 3.3026138047749116, "learning_rate": 7.450836027878245e-06, "loss": 1.5751, "step": 14900 }, { "epoch": 0.4394810555801268, "grad_norm": 3.361133529797527, "learning_rate": 7.450732150649498e-06, "loss": 1.471, "step": 14905 }, { "epoch": 0.43962848297213625, "grad_norm": 3.180504445357127, "learning_rate": 7.450628164522481e-06, "loss": 1.4948, "step": 14910 }, { "epoch": 0.43977591036414565, "grad_norm": 3.044996596223047, "learning_rate": 7.450524069500255e-06, "loss": 1.5046, "step": 14915 }, { "epoch": 0.4399233377561551, "grad_norm": 3.065984662935774, "learning_rate": 7.450419865585882e-06, "loss": 1.4551, "step": 14920 }, { "epoch": 0.4400707651481645, "grad_norm": 3.420964611651224, "learning_rate": 7.45031555278243e-06, "loss": 1.5665, "step": 14925 }, { "epoch": 0.44021819254017397, "grad_norm": 3.3831586048842444, "learning_rate": 7.450211131092966e-06, "loss": 1.513, "step": 14930 }, { "epoch": 0.44036561993218337, "grad_norm": 3.137467882150807, "learning_rate": 7.450106600520566e-06, "loss": 1.546, "step": 14935 }, { "epoch": 0.44051304732419283, "grad_norm": 3.150066822841559, "learning_rate": 7.4500019610683035e-06, "loss": 1.5283, "step": 14940 }, { "epoch": 0.4406604747162023, "grad_norm": 3.2011522144481184, "learning_rate": 7.449897212739258e-06, "loss": 1.5347, "step": 14945 }, { "epoch": 0.4408079021082117, "grad_norm": 3.2259283867418707, "learning_rate": 7.449792355536512e-06, "loss": 1.5016, "step": 14950 }, { "epoch": 0.44095532950022115, "grad_norm": 3.3487194391618567, "learning_rate": 7.4496873894631524e-06, "loss": 1.5353, "step": 14955 }, { "epoch": 0.44110275689223055, "grad_norm": 3.2278895496008944, "learning_rate": 7.449582314522266e-06, "loss": 1.4935, "step": 14960 }, { "epoch": 0.44125018428424, "grad_norm": 3.3899169952489463, "learning_rate": 7.449477130716945e-06, "loss": 1.5158, "step": 14965 }, { "epoch": 0.44139761167624947, "grad_norm": 3.312560123227704, "learning_rate": 7.4493718380502866e-06, "loss": 1.5481, "step": 14970 }, { "epoch": 0.44154503906825887, "grad_norm": 3.2441711287977317, "learning_rate": 7.449266436525386e-06, "loss": 1.5322, "step": 14975 }, { "epoch": 0.44169246646026833, "grad_norm": 3.2513826083242416, "learning_rate": 7.449160926145347e-06, "loss": 1.5188, "step": 14980 }, { "epoch": 0.44183989385227773, "grad_norm": 3.303628650725777, "learning_rate": 7.449055306913274e-06, "loss": 1.5296, "step": 14985 }, { "epoch": 0.4419873212442872, "grad_norm": 3.4171177669410002, "learning_rate": 7.448949578832275e-06, "loss": 1.5474, "step": 14990 }, { "epoch": 0.44213474863629665, "grad_norm": 3.12866781566196, "learning_rate": 7.44884374190546e-06, "loss": 1.4395, "step": 14995 }, { "epoch": 0.44228217602830605, "grad_norm": 3.083022599838842, "learning_rate": 7.448737796135945e-06, "loss": 1.5121, "step": 15000 }, { "epoch": 0.44228217602830605, "eval_loss": 1.20900297164917, "eval_runtime": 4.3063, "eval_samples_per_second": 91.959, "eval_steps_per_second": 3.019, "step": 15000 }, { "epoch": 0.4424296034203155, "grad_norm": 3.1837145240622577, "learning_rate": 7.448631741526847e-06, "loss": 1.5049, "step": 15005 }, { "epoch": 0.4425770308123249, "grad_norm": 3.3477778976186343, "learning_rate": 7.448525578081286e-06, "loss": 1.485, "step": 15010 }, { "epoch": 0.44272445820433437, "grad_norm": 3.2799808998627977, "learning_rate": 7.4484193058023864e-06, "loss": 1.5392, "step": 15015 }, { "epoch": 0.44287188559634383, "grad_norm": 3.3533251919690987, "learning_rate": 7.4483129246932755e-06, "loss": 1.516, "step": 15020 }, { "epoch": 0.44301931298835323, "grad_norm": 3.178794207062517, "learning_rate": 7.448206434757084e-06, "loss": 1.5878, "step": 15025 }, { "epoch": 0.4431667403803627, "grad_norm": 3.5301889563906506, "learning_rate": 7.448099835996945e-06, "loss": 1.536, "step": 15030 }, { "epoch": 0.4433141677723721, "grad_norm": 3.184834901823788, "learning_rate": 7.447993128415997e-06, "loss": 1.5746, "step": 15035 }, { "epoch": 0.44346159516438155, "grad_norm": 3.2808119964930316, "learning_rate": 7.447886312017377e-06, "loss": 1.5128, "step": 15040 }, { "epoch": 0.44360902255639095, "grad_norm": 3.26966711200968, "learning_rate": 7.44777938680423e-06, "loss": 1.5542, "step": 15045 }, { "epoch": 0.4437564499484004, "grad_norm": 3.3828557218344035, "learning_rate": 7.447672352779703e-06, "loss": 1.5233, "step": 15050 }, { "epoch": 0.44390387734040987, "grad_norm": 3.1747779306454227, "learning_rate": 7.447565209946943e-06, "loss": 1.4871, "step": 15055 }, { "epoch": 0.44405130473241927, "grad_norm": 3.0325165002393764, "learning_rate": 7.447457958309106e-06, "loss": 1.4725, "step": 15060 }, { "epoch": 0.44419873212442873, "grad_norm": 3.3418427544781073, "learning_rate": 7.447350597869346e-06, "loss": 1.5527, "step": 15065 }, { "epoch": 0.44434615951643813, "grad_norm": 3.3824030039158575, "learning_rate": 7.447243128630822e-06, "loss": 1.5742, "step": 15070 }, { "epoch": 0.4444935869084476, "grad_norm": 3.293437058614101, "learning_rate": 7.447135550596699e-06, "loss": 1.5122, "step": 15075 }, { "epoch": 0.44464101430045705, "grad_norm": 3.365892721865735, "learning_rate": 7.447027863770139e-06, "loss": 1.5326, "step": 15080 }, { "epoch": 0.44478844169246645, "grad_norm": 3.1232698793277254, "learning_rate": 7.4469200681543135e-06, "loss": 1.5442, "step": 15085 }, { "epoch": 0.4449358690844759, "grad_norm": 3.8241462121532166, "learning_rate": 7.446812163752394e-06, "loss": 1.5005, "step": 15090 }, { "epoch": 0.4450832964764853, "grad_norm": 3.1080802983019726, "learning_rate": 7.446704150567554e-06, "loss": 1.4592, "step": 15095 }, { "epoch": 0.44523072386849477, "grad_norm": 3.247271070977478, "learning_rate": 7.4465960286029735e-06, "loss": 1.4947, "step": 15100 }, { "epoch": 0.44537815126050423, "grad_norm": 3.2879420787316302, "learning_rate": 7.446487797861834e-06, "loss": 1.5242, "step": 15105 }, { "epoch": 0.44552557865251363, "grad_norm": 3.3739247720060987, "learning_rate": 7.44637945834732e-06, "loss": 1.5307, "step": 15110 }, { "epoch": 0.4456730060445231, "grad_norm": 3.232422285317319, "learning_rate": 7.44627101006262e-06, "loss": 1.4697, "step": 15115 }, { "epoch": 0.4458204334365325, "grad_norm": 3.1812358411227115, "learning_rate": 7.446162453010924e-06, "loss": 1.5488, "step": 15120 }, { "epoch": 0.44596786082854195, "grad_norm": 3.1384351575518323, "learning_rate": 7.446053787195429e-06, "loss": 1.5093, "step": 15125 }, { "epoch": 0.44611528822055135, "grad_norm": 3.204245582598812, "learning_rate": 7.445945012619329e-06, "loss": 1.5503, "step": 15130 }, { "epoch": 0.4462627156125608, "grad_norm": 3.3249945935393015, "learning_rate": 7.445836129285828e-06, "loss": 1.4913, "step": 15135 }, { "epoch": 0.44641014300457027, "grad_norm": 3.404370321736934, "learning_rate": 7.445727137198128e-06, "loss": 1.5467, "step": 15140 }, { "epoch": 0.4465575703965797, "grad_norm": 3.0199273027297497, "learning_rate": 7.445618036359436e-06, "loss": 1.5579, "step": 15145 }, { "epoch": 0.44670499778858913, "grad_norm": 3.205512783650425, "learning_rate": 7.445508826772965e-06, "loss": 1.5519, "step": 15150 }, { "epoch": 0.44685242518059853, "grad_norm": 3.146979274474127, "learning_rate": 7.445399508441926e-06, "loss": 1.527, "step": 15155 }, { "epoch": 0.446999852572608, "grad_norm": 3.1163936829306893, "learning_rate": 7.445290081369536e-06, "loss": 1.4839, "step": 15160 }, { "epoch": 0.44714727996461745, "grad_norm": 3.272559032622815, "learning_rate": 7.4451805455590175e-06, "loss": 1.543, "step": 15165 }, { "epoch": 0.44729470735662685, "grad_norm": 3.2112827158469845, "learning_rate": 7.445070901013592e-06, "loss": 1.5821, "step": 15170 }, { "epoch": 0.4474421347486363, "grad_norm": 3.0956716293067843, "learning_rate": 7.444961147736485e-06, "loss": 1.5323, "step": 15175 }, { "epoch": 0.4475895621406457, "grad_norm": 3.2238046470072867, "learning_rate": 7.444851285730927e-06, "loss": 1.4995, "step": 15180 }, { "epoch": 0.44773698953265517, "grad_norm": 3.1190928102571003, "learning_rate": 7.44474131500015e-06, "loss": 1.5153, "step": 15185 }, { "epoch": 0.44788441692466463, "grad_norm": 3.2282786132959864, "learning_rate": 7.444631235547392e-06, "loss": 1.5096, "step": 15190 }, { "epoch": 0.44803184431667403, "grad_norm": 3.3370330119379545, "learning_rate": 7.4445210473758915e-06, "loss": 1.5183, "step": 15195 }, { "epoch": 0.4481792717086835, "grad_norm": 3.2419477724550445, "learning_rate": 7.44441075048889e-06, "loss": 1.5582, "step": 15200 }, { "epoch": 0.4483266991006929, "grad_norm": 3.130615446101494, "learning_rate": 7.444300344889633e-06, "loss": 1.4877, "step": 15205 }, { "epoch": 0.44847412649270235, "grad_norm": 3.204161534213817, "learning_rate": 7.444189830581371e-06, "loss": 1.5929, "step": 15210 }, { "epoch": 0.44862155388471175, "grad_norm": 3.095490052499681, "learning_rate": 7.444079207567354e-06, "loss": 1.5403, "step": 15215 }, { "epoch": 0.4487689812767212, "grad_norm": 3.0350052578796376, "learning_rate": 7.443968475850839e-06, "loss": 1.5215, "step": 15220 }, { "epoch": 0.44891640866873067, "grad_norm": 3.9884431902755826, "learning_rate": 7.443857635435083e-06, "loss": 1.4874, "step": 15225 }, { "epoch": 0.4490638360607401, "grad_norm": 3.196205880036092, "learning_rate": 7.443746686323348e-06, "loss": 1.537, "step": 15230 }, { "epoch": 0.44921126345274953, "grad_norm": 3.170671789634363, "learning_rate": 7.4436356285188986e-06, "loss": 1.556, "step": 15235 }, { "epoch": 0.44935869084475893, "grad_norm": 3.2545226326777654, "learning_rate": 7.4435244620250035e-06, "loss": 1.5312, "step": 15240 }, { "epoch": 0.4495061182367684, "grad_norm": 3.380644862386046, "learning_rate": 7.443413186844933e-06, "loss": 1.5242, "step": 15245 }, { "epoch": 0.44965354562877785, "grad_norm": 3.2887003871236127, "learning_rate": 7.443301802981963e-06, "loss": 1.4962, "step": 15250 }, { "epoch": 0.44980097302078725, "grad_norm": 3.377513478398912, "learning_rate": 7.44319031043937e-06, "loss": 1.5833, "step": 15255 }, { "epoch": 0.4499484004127967, "grad_norm": 3.340360539769145, "learning_rate": 7.443078709220434e-06, "loss": 1.5406, "step": 15260 }, { "epoch": 0.4500958278048061, "grad_norm": 3.2004101500338034, "learning_rate": 7.442966999328441e-06, "loss": 1.5145, "step": 15265 }, { "epoch": 0.4502432551968156, "grad_norm": 3.2442929671410696, "learning_rate": 7.4428551807666774e-06, "loss": 1.4792, "step": 15270 }, { "epoch": 0.45039068258882503, "grad_norm": 3.3508200253507336, "learning_rate": 7.442743253538432e-06, "loss": 1.5159, "step": 15275 }, { "epoch": 0.45053810998083443, "grad_norm": 3.19475881422139, "learning_rate": 7.442631217647002e-06, "loss": 1.5496, "step": 15280 }, { "epoch": 0.4506855373728439, "grad_norm": 3.3495428233698012, "learning_rate": 7.4425190730956795e-06, "loss": 1.5303, "step": 15285 }, { "epoch": 0.4508329647648533, "grad_norm": 3.3646221979453568, "learning_rate": 7.442406819887767e-06, "loss": 1.5215, "step": 15290 }, { "epoch": 0.45098039215686275, "grad_norm": 3.3039618196294662, "learning_rate": 7.442294458026569e-06, "loss": 1.4928, "step": 15295 }, { "epoch": 0.45112781954887216, "grad_norm": 3.1857383129845447, "learning_rate": 7.442181987515389e-06, "loss": 1.5423, "step": 15300 }, { "epoch": 0.4512752469408816, "grad_norm": 3.3644090625531065, "learning_rate": 7.442069408357538e-06, "loss": 1.5335, "step": 15305 }, { "epoch": 0.45142267433289107, "grad_norm": 3.051300006826778, "learning_rate": 7.441956720556329e-06, "loss": 1.487, "step": 15310 }, { "epoch": 0.4515701017249005, "grad_norm": 3.344850167890309, "learning_rate": 7.441843924115079e-06, "loss": 1.5053, "step": 15315 }, { "epoch": 0.45171752911690993, "grad_norm": 3.12532943837307, "learning_rate": 7.441731019037105e-06, "loss": 1.5373, "step": 15320 }, { "epoch": 0.45186495650891934, "grad_norm": 3.385026793914324, "learning_rate": 7.44161800532573e-06, "loss": 1.4833, "step": 15325 }, { "epoch": 0.4520123839009288, "grad_norm": 3.3012391603457387, "learning_rate": 7.441504882984281e-06, "loss": 1.5537, "step": 15330 }, { "epoch": 0.45215981129293825, "grad_norm": 3.242702699101251, "learning_rate": 7.441391652016084e-06, "loss": 1.5349, "step": 15335 }, { "epoch": 0.45230723868494765, "grad_norm": 3.230028514060556, "learning_rate": 7.441278312424473e-06, "loss": 1.5725, "step": 15340 }, { "epoch": 0.4524546660769571, "grad_norm": 3.274155190639437, "learning_rate": 7.441164864212783e-06, "loss": 1.5303, "step": 15345 }, { "epoch": 0.4526020934689665, "grad_norm": 3.4048239184784195, "learning_rate": 7.441051307384351e-06, "loss": 1.5485, "step": 15350 }, { "epoch": 0.452749520860976, "grad_norm": 3.282001422643027, "learning_rate": 7.4409376419425205e-06, "loss": 1.4802, "step": 15355 }, { "epoch": 0.45289694825298543, "grad_norm": 3.2192383605205475, "learning_rate": 7.440823867890635e-06, "loss": 1.5998, "step": 15360 }, { "epoch": 0.45304437564499483, "grad_norm": 3.2423794611444765, "learning_rate": 7.440709985232043e-06, "loss": 1.5224, "step": 15365 }, { "epoch": 0.4531918030370043, "grad_norm": 3.444904598783545, "learning_rate": 7.440595993970095e-06, "loss": 1.5264, "step": 15370 }, { "epoch": 0.4533392304290137, "grad_norm": 3.1256117808886406, "learning_rate": 7.440481894108146e-06, "loss": 1.5276, "step": 15375 }, { "epoch": 0.45348665782102315, "grad_norm": 3.2655355390958167, "learning_rate": 7.440367685649553e-06, "loss": 1.5102, "step": 15380 }, { "epoch": 0.45363408521303256, "grad_norm": 3.155863648991408, "learning_rate": 7.440253368597676e-06, "loss": 1.5622, "step": 15385 }, { "epoch": 0.453781512605042, "grad_norm": 3.178306249887673, "learning_rate": 7.440138942955881e-06, "loss": 1.516, "step": 15390 }, { "epoch": 0.4539289399970515, "grad_norm": 3.16309939140926, "learning_rate": 7.440024408727533e-06, "loss": 1.5677, "step": 15395 }, { "epoch": 0.4540763673890609, "grad_norm": 3.455951835690794, "learning_rate": 7.439909765916004e-06, "loss": 1.5034, "step": 15400 }, { "epoch": 0.45422379478107033, "grad_norm": 3.226380554712842, "learning_rate": 7.439795014524666e-06, "loss": 1.5398, "step": 15405 }, { "epoch": 0.45437122217307974, "grad_norm": 3.6094662895663707, "learning_rate": 7.439680154556898e-06, "loss": 1.5337, "step": 15410 }, { "epoch": 0.4545186495650892, "grad_norm": 3.440723739336048, "learning_rate": 7.439565186016077e-06, "loss": 1.4907, "step": 15415 }, { "epoch": 0.45466607695709865, "grad_norm": 3.1975838274659414, "learning_rate": 7.439450108905587e-06, "loss": 1.5792, "step": 15420 }, { "epoch": 0.45481350434910806, "grad_norm": 3.097193281250267, "learning_rate": 7.439334923228816e-06, "loss": 1.5241, "step": 15425 }, { "epoch": 0.4549609317411175, "grad_norm": 3.197905183361385, "learning_rate": 7.439219628989151e-06, "loss": 1.5319, "step": 15430 }, { "epoch": 0.4551083591331269, "grad_norm": 3.616680027100249, "learning_rate": 7.439104226189986e-06, "loss": 1.5714, "step": 15435 }, { "epoch": 0.4552557865251364, "grad_norm": 3.2018291743785348, "learning_rate": 7.438988714834716e-06, "loss": 1.5805, "step": 15440 }, { "epoch": 0.45540321391714583, "grad_norm": 3.336364733038659, "learning_rate": 7.438873094926742e-06, "loss": 1.5424, "step": 15445 }, { "epoch": 0.45555064130915524, "grad_norm": 3.2016187611766433, "learning_rate": 7.438757366469463e-06, "loss": 1.5412, "step": 15450 }, { "epoch": 0.4556980687011647, "grad_norm": 3.3573077506691202, "learning_rate": 7.438641529466289e-06, "loss": 1.5456, "step": 15455 }, { "epoch": 0.4558454960931741, "grad_norm": 3.4202292873831235, "learning_rate": 7.438525583920624e-06, "loss": 1.5584, "step": 15460 }, { "epoch": 0.45599292348518355, "grad_norm": 3.2356514084656043, "learning_rate": 7.438409529835882e-06, "loss": 1.5018, "step": 15465 }, { "epoch": 0.45614035087719296, "grad_norm": 3.147897781188512, "learning_rate": 7.438293367215479e-06, "loss": 1.504, "step": 15470 }, { "epoch": 0.4562877782692024, "grad_norm": 3.1383218254930356, "learning_rate": 7.438177096062831e-06, "loss": 1.5263, "step": 15475 }, { "epoch": 0.4564352056612119, "grad_norm": 3.434407505786735, "learning_rate": 7.4380607163813615e-06, "loss": 1.4868, "step": 15480 }, { "epoch": 0.4565826330532213, "grad_norm": 3.61424768288654, "learning_rate": 7.4379442281744925e-06, "loss": 1.5157, "step": 15485 }, { "epoch": 0.45673006044523073, "grad_norm": 3.0479573192699663, "learning_rate": 7.437827631445655e-06, "loss": 1.5703, "step": 15490 }, { "epoch": 0.45687748783724014, "grad_norm": 3.755339718409437, "learning_rate": 7.437710926198278e-06, "loss": 1.5122, "step": 15495 }, { "epoch": 0.4570249152292496, "grad_norm": 3.2731322150270175, "learning_rate": 7.437594112435795e-06, "loss": 1.4698, "step": 15500 }, { "epoch": 0.4570249152292496, "eval_loss": 1.2059788703918457, "eval_runtime": 4.1608, "eval_samples_per_second": 95.174, "eval_steps_per_second": 3.124, "step": 15500 }, { "epoch": 0.45717234262125905, "grad_norm": 3.2135476036616653, "learning_rate": 7.437477190161645e-06, "loss": 1.5432, "step": 15505 }, { "epoch": 0.45731977001326846, "grad_norm": 3.1539137434496824, "learning_rate": 7.437360159379269e-06, "loss": 1.5648, "step": 15510 }, { "epoch": 0.4574671974052779, "grad_norm": 3.2625756628242044, "learning_rate": 7.437243020092108e-06, "loss": 1.5767, "step": 15515 }, { "epoch": 0.4576146247972873, "grad_norm": 3.1663653527726714, "learning_rate": 7.437125772303612e-06, "loss": 1.5232, "step": 15520 }, { "epoch": 0.4577620521892968, "grad_norm": 3.3623246476374233, "learning_rate": 7.4370084160172305e-06, "loss": 1.5331, "step": 15525 }, { "epoch": 0.45790947958130623, "grad_norm": 3.045099888343715, "learning_rate": 7.436890951236415e-06, "loss": 1.4956, "step": 15530 }, { "epoch": 0.45805690697331564, "grad_norm": 3.2654971912723973, "learning_rate": 7.436773377964625e-06, "loss": 1.5586, "step": 15535 }, { "epoch": 0.4582043343653251, "grad_norm": 3.6899630414909605, "learning_rate": 7.436655696205316e-06, "loss": 1.4926, "step": 15540 }, { "epoch": 0.4583517617573345, "grad_norm": 3.2657546493579686, "learning_rate": 7.436537905961955e-06, "loss": 1.5686, "step": 15545 }, { "epoch": 0.45849918914934396, "grad_norm": 3.183963578381144, "learning_rate": 7.436420007238006e-06, "loss": 1.4722, "step": 15550 }, { "epoch": 0.45864661654135336, "grad_norm": 3.288176218015888, "learning_rate": 7.43630200003694e-06, "loss": 1.5014, "step": 15555 }, { "epoch": 0.4587940439333628, "grad_norm": 3.4666611957543916, "learning_rate": 7.4361838843622265e-06, "loss": 1.5487, "step": 15560 }, { "epoch": 0.4589414713253723, "grad_norm": 3.155991765334461, "learning_rate": 7.436065660217344e-06, "loss": 1.5143, "step": 15565 }, { "epoch": 0.4590888987173817, "grad_norm": 3.1202603196892538, "learning_rate": 7.435947327605771e-06, "loss": 1.4831, "step": 15570 }, { "epoch": 0.45923632610939114, "grad_norm": 3.239753710617184, "learning_rate": 7.435828886530988e-06, "loss": 1.4724, "step": 15575 }, { "epoch": 0.45938375350140054, "grad_norm": 3.2170392922110773, "learning_rate": 7.435710336996481e-06, "loss": 1.5409, "step": 15580 }, { "epoch": 0.45953118089341, "grad_norm": 3.262281125213647, "learning_rate": 7.435591679005739e-06, "loss": 1.5329, "step": 15585 }, { "epoch": 0.45967860828541945, "grad_norm": 3.1893787884881752, "learning_rate": 7.435472912562255e-06, "loss": 1.5419, "step": 15590 }, { "epoch": 0.45982603567742886, "grad_norm": 3.3343333071482193, "learning_rate": 7.43535403766952e-06, "loss": 1.4759, "step": 15595 }, { "epoch": 0.4599734630694383, "grad_norm": 3.412831707900455, "learning_rate": 7.435235054331035e-06, "loss": 1.4907, "step": 15600 }, { "epoch": 0.4601208904614477, "grad_norm": 3.063635148807011, "learning_rate": 7.4351159625503e-06, "loss": 1.4622, "step": 15605 }, { "epoch": 0.4602683178534572, "grad_norm": 3.2290237779714404, "learning_rate": 7.43499676233082e-06, "loss": 1.5098, "step": 15610 }, { "epoch": 0.46041574524546663, "grad_norm": 3.4462063760551453, "learning_rate": 7.4348774536761026e-06, "loss": 1.556, "step": 15615 }, { "epoch": 0.46056317263747604, "grad_norm": 3.1871368433780733, "learning_rate": 7.434758036589659e-06, "loss": 1.5347, "step": 15620 }, { "epoch": 0.4607106000294855, "grad_norm": 3.1541485732554686, "learning_rate": 7.434638511075002e-06, "loss": 1.4733, "step": 15625 }, { "epoch": 0.4608580274214949, "grad_norm": 3.253229278013943, "learning_rate": 7.434518877135648e-06, "loss": 1.4918, "step": 15630 }, { "epoch": 0.46100545481350436, "grad_norm": 3.3628801925732774, "learning_rate": 7.4343991347751215e-06, "loss": 1.5648, "step": 15635 }, { "epoch": 0.46115288220551376, "grad_norm": 3.160420374901885, "learning_rate": 7.434279283996942e-06, "loss": 1.5546, "step": 15640 }, { "epoch": 0.4613003095975232, "grad_norm": 3.2278891823782683, "learning_rate": 7.434159324804637e-06, "loss": 1.5032, "step": 15645 }, { "epoch": 0.4614477369895327, "grad_norm": 3.1782297030003424, "learning_rate": 7.4340392572017386e-06, "loss": 1.4788, "step": 15650 }, { "epoch": 0.4615951643815421, "grad_norm": 3.226788510822274, "learning_rate": 7.433919081191777e-06, "loss": 1.5419, "step": 15655 }, { "epoch": 0.46174259177355154, "grad_norm": 3.212147055189453, "learning_rate": 7.4337987967782905e-06, "loss": 1.5516, "step": 15660 }, { "epoch": 0.46189001916556094, "grad_norm": 3.273894920752195, "learning_rate": 7.433678403964817e-06, "loss": 1.4402, "step": 15665 }, { "epoch": 0.4620374465575704, "grad_norm": 3.204630780311977, "learning_rate": 7.4335579027549014e-06, "loss": 1.5063, "step": 15670 }, { "epoch": 0.46218487394957986, "grad_norm": 3.35454777298573, "learning_rate": 7.433437293152088e-06, "loss": 1.5234, "step": 15675 }, { "epoch": 0.46233230134158926, "grad_norm": 3.128529413087895, "learning_rate": 7.433316575159926e-06, "loss": 1.5409, "step": 15680 }, { "epoch": 0.4624797287335987, "grad_norm": 3.13801073768554, "learning_rate": 7.433195748781967e-06, "loss": 1.5147, "step": 15685 }, { "epoch": 0.4626271561256081, "grad_norm": 3.286613843966487, "learning_rate": 7.433074814021768e-06, "loss": 1.5385, "step": 15690 }, { "epoch": 0.4627745835176176, "grad_norm": 3.3043213930884154, "learning_rate": 7.4329537708828875e-06, "loss": 1.5085, "step": 15695 }, { "epoch": 0.46292201090962704, "grad_norm": 3.26441721868365, "learning_rate": 7.432832619368886e-06, "loss": 1.4757, "step": 15700 }, { "epoch": 0.46306943830163644, "grad_norm": 3.132501602032356, "learning_rate": 7.43271135948333e-06, "loss": 1.5272, "step": 15705 }, { "epoch": 0.4632168656936459, "grad_norm": 3.191656960837069, "learning_rate": 7.432589991229787e-06, "loss": 1.4893, "step": 15710 }, { "epoch": 0.4633642930856553, "grad_norm": 3.153702851726028, "learning_rate": 7.4324685146118275e-06, "loss": 1.5291, "step": 15715 }, { "epoch": 0.46351172047766476, "grad_norm": 3.151911766916517, "learning_rate": 7.432346929633028e-06, "loss": 1.5711, "step": 15720 }, { "epoch": 0.46365914786967416, "grad_norm": 3.2113575399092067, "learning_rate": 7.432225236296965e-06, "loss": 1.5315, "step": 15725 }, { "epoch": 0.4638065752616836, "grad_norm": 3.190792071473082, "learning_rate": 7.43210343460722e-06, "loss": 1.5138, "step": 15730 }, { "epoch": 0.4639540026536931, "grad_norm": 3.331026347857657, "learning_rate": 7.431981524567377e-06, "loss": 1.5148, "step": 15735 }, { "epoch": 0.4641014300457025, "grad_norm": 3.2283818734633765, "learning_rate": 7.431859506181022e-06, "loss": 1.6162, "step": 15740 }, { "epoch": 0.46424885743771194, "grad_norm": 3.286014674861282, "learning_rate": 7.431737379451748e-06, "loss": 1.514, "step": 15745 }, { "epoch": 0.46439628482972134, "grad_norm": 3.152308595182871, "learning_rate": 7.431615144383148e-06, "loss": 1.5676, "step": 15750 }, { "epoch": 0.4645437122217308, "grad_norm": 3.256223498542477, "learning_rate": 7.431492800978817e-06, "loss": 1.484, "step": 15755 }, { "epoch": 0.46469113961374026, "grad_norm": 3.2915569698874085, "learning_rate": 7.431370349242357e-06, "loss": 1.4314, "step": 15760 }, { "epoch": 0.46483856700574966, "grad_norm": 3.159884423775461, "learning_rate": 7.431247789177371e-06, "loss": 1.5164, "step": 15765 }, { "epoch": 0.4649859943977591, "grad_norm": 3.2624781091955395, "learning_rate": 7.431125120787466e-06, "loss": 1.527, "step": 15770 }, { "epoch": 0.4651334217897685, "grad_norm": 3.283779440522752, "learning_rate": 7.43100234407625e-06, "loss": 1.4708, "step": 15775 }, { "epoch": 0.465280849181778, "grad_norm": 3.087345737107474, "learning_rate": 7.430879459047337e-06, "loss": 1.5184, "step": 15780 }, { "epoch": 0.46542827657378744, "grad_norm": 3.3333193094081635, "learning_rate": 7.430756465704342e-06, "loss": 1.566, "step": 15785 }, { "epoch": 0.46557570396579684, "grad_norm": 3.3116115268005974, "learning_rate": 7.4306333640508855e-06, "loss": 1.5001, "step": 15790 }, { "epoch": 0.4657231313578063, "grad_norm": 3.187846515619828, "learning_rate": 7.43051015409059e-06, "loss": 1.4509, "step": 15795 }, { "epoch": 0.4658705587498157, "grad_norm": 3.131044067364097, "learning_rate": 7.43038683582708e-06, "loss": 1.5198, "step": 15800 }, { "epoch": 0.46601798614182516, "grad_norm": 3.212157153270396, "learning_rate": 7.430263409263985e-06, "loss": 1.5266, "step": 15805 }, { "epoch": 0.46616541353383456, "grad_norm": 3.0887556207864835, "learning_rate": 7.430139874404937e-06, "loss": 1.4766, "step": 15810 }, { "epoch": 0.466312840925844, "grad_norm": 3.4307049264019205, "learning_rate": 7.43001623125357e-06, "loss": 1.5579, "step": 15815 }, { "epoch": 0.4664602683178535, "grad_norm": 3.156610104190756, "learning_rate": 7.429892479813524e-06, "loss": 1.5129, "step": 15820 }, { "epoch": 0.4666076957098629, "grad_norm": 3.1797185019135283, "learning_rate": 7.429768620088439e-06, "loss": 1.5175, "step": 15825 }, { "epoch": 0.46675512310187234, "grad_norm": 3.337311061274157, "learning_rate": 7.429644652081962e-06, "loss": 1.5711, "step": 15830 }, { "epoch": 0.46690255049388174, "grad_norm": 3.1591034061105994, "learning_rate": 7.4295205757977386e-06, "loss": 1.534, "step": 15835 }, { "epoch": 0.4670499778858912, "grad_norm": 3.1856164357135905, "learning_rate": 7.42939639123942e-06, "loss": 1.583, "step": 15840 }, { "epoch": 0.46719740527790066, "grad_norm": 3.239478760854248, "learning_rate": 7.4292720984106614e-06, "loss": 1.4915, "step": 15845 }, { "epoch": 0.46734483266991006, "grad_norm": 3.4644809812775597, "learning_rate": 7.429147697315121e-06, "loss": 1.5498, "step": 15850 }, { "epoch": 0.4674922600619195, "grad_norm": 3.247253239970954, "learning_rate": 7.429023187956458e-06, "loss": 1.5221, "step": 15855 }, { "epoch": 0.4676396874539289, "grad_norm": 3.1554671664348146, "learning_rate": 7.428898570338337e-06, "loss": 1.5349, "step": 15860 }, { "epoch": 0.4677871148459384, "grad_norm": 3.2751893143723105, "learning_rate": 7.428773844464423e-06, "loss": 1.5738, "step": 15865 }, { "epoch": 0.46793454223794784, "grad_norm": 3.1182305445763454, "learning_rate": 7.42864901033839e-06, "loss": 1.4133, "step": 15870 }, { "epoch": 0.46808196962995724, "grad_norm": 3.3824912920854837, "learning_rate": 7.4285240679639085e-06, "loss": 1.5178, "step": 15875 }, { "epoch": 0.4682293970219667, "grad_norm": 3.222267294196949, "learning_rate": 7.428399017344655e-06, "loss": 1.5058, "step": 15880 }, { "epoch": 0.4683768244139761, "grad_norm": 3.2931446111550313, "learning_rate": 7.428273858484311e-06, "loss": 1.5223, "step": 15885 }, { "epoch": 0.46852425180598556, "grad_norm": 3.107975180927764, "learning_rate": 7.4281485913865594e-06, "loss": 1.5272, "step": 15890 }, { "epoch": 0.46867167919799496, "grad_norm": 3.204180283018544, "learning_rate": 7.428023216055084e-06, "loss": 1.4552, "step": 15895 }, { "epoch": 0.4688191065900044, "grad_norm": 3.305757251664424, "learning_rate": 7.4278977324935766e-06, "loss": 1.5667, "step": 15900 }, { "epoch": 0.4689665339820139, "grad_norm": 4.0857153085907205, "learning_rate": 7.427772140705728e-06, "loss": 1.5062, "step": 15905 }, { "epoch": 0.4691139613740233, "grad_norm": 3.2884151735919467, "learning_rate": 7.427646440695235e-06, "loss": 1.5169, "step": 15910 }, { "epoch": 0.46926138876603274, "grad_norm": 3.115032162108458, "learning_rate": 7.427520632465796e-06, "loss": 1.5046, "step": 15915 }, { "epoch": 0.46940881615804214, "grad_norm": 3.308860222076997, "learning_rate": 7.427394716021113e-06, "loss": 1.539, "step": 15920 }, { "epoch": 0.4695562435500516, "grad_norm": 3.16713232876071, "learning_rate": 7.427268691364891e-06, "loss": 1.5315, "step": 15925 }, { "epoch": 0.46970367094206106, "grad_norm": 3.212882959268721, "learning_rate": 7.427142558500839e-06, "loss": 1.5066, "step": 15930 }, { "epoch": 0.46985109833407046, "grad_norm": 3.042311246937436, "learning_rate": 7.427016317432669e-06, "loss": 1.524, "step": 15935 }, { "epoch": 0.4699985257260799, "grad_norm": 3.17369176391155, "learning_rate": 7.426889968164094e-06, "loss": 1.5063, "step": 15940 }, { "epoch": 0.4701459531180893, "grad_norm": 3.09514267786215, "learning_rate": 7.426763510698835e-06, "loss": 1.5127, "step": 15945 }, { "epoch": 0.4702933805100988, "grad_norm": 3.241677930308239, "learning_rate": 7.426636945040609e-06, "loss": 1.5139, "step": 15950 }, { "epoch": 0.47044080790210824, "grad_norm": 3.3768237629629874, "learning_rate": 7.426510271193144e-06, "loss": 1.4973, "step": 15955 }, { "epoch": 0.47058823529411764, "grad_norm": 3.153842448969168, "learning_rate": 7.426383489160166e-06, "loss": 1.5179, "step": 15960 }, { "epoch": 0.4707356626861271, "grad_norm": 3.3702239236569764, "learning_rate": 7.4262565989454055e-06, "loss": 1.5001, "step": 15965 }, { "epoch": 0.4708830900781365, "grad_norm": 3.471618116572535, "learning_rate": 7.426129600552596e-06, "loss": 1.4944, "step": 15970 }, { "epoch": 0.47103051747014596, "grad_norm": 3.195319726505384, "learning_rate": 7.4260024939854764e-06, "loss": 1.5244, "step": 15975 }, { "epoch": 0.47117794486215536, "grad_norm": 3.2519379891334843, "learning_rate": 7.425875279247786e-06, "loss": 1.507, "step": 15980 }, { "epoch": 0.4713253722541648, "grad_norm": 3.1761458066478165, "learning_rate": 7.4257479563432675e-06, "loss": 1.5193, "step": 15985 }, { "epoch": 0.4714727996461743, "grad_norm": 3.2086999487414287, "learning_rate": 7.425620525275668e-06, "loss": 1.5002, "step": 15990 }, { "epoch": 0.4716202270381837, "grad_norm": 3.335165727998494, "learning_rate": 7.425492986048738e-06, "loss": 1.5172, "step": 15995 }, { "epoch": 0.47176765443019314, "grad_norm": 3.5236303309527566, "learning_rate": 7.42536533866623e-06, "loss": 1.5629, "step": 16000 }, { "epoch": 0.47176765443019314, "eval_loss": 1.2029482126235962, "eval_runtime": 4.2448, "eval_samples_per_second": 93.29, "eval_steps_per_second": 3.063, "step": 16000 }, { "epoch": 0.47191508182220254, "grad_norm": 3.2488439584606263, "learning_rate": 7.4252375831319e-06, "loss": 1.4602, "step": 16005 }, { "epoch": 0.472062509214212, "grad_norm": 3.2310456935101777, "learning_rate": 7.4251097194495065e-06, "loss": 1.5738, "step": 16010 }, { "epoch": 0.47220993660622146, "grad_norm": 4.589670318738517, "learning_rate": 7.424981747622814e-06, "loss": 1.5718, "step": 16015 }, { "epoch": 0.47235736399823086, "grad_norm": 3.157496020534531, "learning_rate": 7.424853667655587e-06, "loss": 1.5544, "step": 16020 }, { "epoch": 0.4725047913902403, "grad_norm": 3.2174669966755527, "learning_rate": 7.424725479551595e-06, "loss": 1.51, "step": 16025 }, { "epoch": 0.4726522187822497, "grad_norm": 3.3927236697490084, "learning_rate": 7.424597183314608e-06, "loss": 1.4692, "step": 16030 }, { "epoch": 0.4727996461742592, "grad_norm": 3.269686469682016, "learning_rate": 7.424468778948405e-06, "loss": 1.4918, "step": 16035 }, { "epoch": 0.47294707356626864, "grad_norm": 3.1442152146389293, "learning_rate": 7.424340266456761e-06, "loss": 1.4935, "step": 16040 }, { "epoch": 0.47309450095827804, "grad_norm": 3.295546208062583, "learning_rate": 7.424211645843459e-06, "loss": 1.5546, "step": 16045 }, { "epoch": 0.4732419283502875, "grad_norm": 3.170614773621203, "learning_rate": 7.424082917112285e-06, "loss": 1.5229, "step": 16050 }, { "epoch": 0.4733893557422969, "grad_norm": 3.3255281185486636, "learning_rate": 7.4239540802670244e-06, "loss": 1.5288, "step": 16055 }, { "epoch": 0.47353678313430636, "grad_norm": 3.3787318046614208, "learning_rate": 7.423825135311471e-06, "loss": 1.492, "step": 16060 }, { "epoch": 0.47368421052631576, "grad_norm": 3.4986195382033913, "learning_rate": 7.423696082249417e-06, "loss": 1.5132, "step": 16065 }, { "epoch": 0.4738316379183252, "grad_norm": 3.1848209272061343, "learning_rate": 7.423566921084661e-06, "loss": 1.5408, "step": 16070 }, { "epoch": 0.4739790653103347, "grad_norm": 3.0992185067002853, "learning_rate": 7.423437651821004e-06, "loss": 1.5599, "step": 16075 }, { "epoch": 0.4741264927023441, "grad_norm": 3.2974602015582497, "learning_rate": 7.4233082744622495e-06, "loss": 1.5249, "step": 16080 }, { "epoch": 0.47427392009435354, "grad_norm": 3.247706135643169, "learning_rate": 7.423178789012204e-06, "loss": 1.5068, "step": 16085 }, { "epoch": 0.47442134748636294, "grad_norm": 3.3418813622839747, "learning_rate": 7.423049195474679e-06, "loss": 1.4844, "step": 16090 }, { "epoch": 0.4745687748783724, "grad_norm": 3.186048998087191, "learning_rate": 7.4229194938534865e-06, "loss": 1.5102, "step": 16095 }, { "epoch": 0.47471620227038186, "grad_norm": 3.079057205635635, "learning_rate": 7.4227896841524445e-06, "loss": 1.4636, "step": 16100 }, { "epoch": 0.47486362966239126, "grad_norm": 3.2127664003550644, "learning_rate": 7.422659766375371e-06, "loss": 1.5361, "step": 16105 }, { "epoch": 0.4750110570544007, "grad_norm": 3.199460404264481, "learning_rate": 7.422529740526092e-06, "loss": 1.4748, "step": 16110 }, { "epoch": 0.4751584844464101, "grad_norm": 3.201866417448465, "learning_rate": 7.4223996066084305e-06, "loss": 1.4983, "step": 16115 }, { "epoch": 0.4753059118384196, "grad_norm": 3.109841558841944, "learning_rate": 7.422269364626217e-06, "loss": 1.4853, "step": 16120 }, { "epoch": 0.47545333923042904, "grad_norm": 3.013668101516045, "learning_rate": 7.4221390145832854e-06, "loss": 1.4871, "step": 16125 }, { "epoch": 0.47560076662243844, "grad_norm": 3.2793491904464043, "learning_rate": 7.422008556483469e-06, "loss": 1.5482, "step": 16130 }, { "epoch": 0.4757481940144479, "grad_norm": 3.3495834379459417, "learning_rate": 7.421877990330608e-06, "loss": 1.5538, "step": 16135 }, { "epoch": 0.4758956214064573, "grad_norm": 3.221350397105594, "learning_rate": 7.421747316128544e-06, "loss": 1.5407, "step": 16140 }, { "epoch": 0.47604304879846676, "grad_norm": 3.1396036538067955, "learning_rate": 7.421616533881123e-06, "loss": 1.522, "step": 16145 }, { "epoch": 0.47619047619047616, "grad_norm": 3.347125896503423, "learning_rate": 7.421485643592193e-06, "loss": 1.5566, "step": 16150 }, { "epoch": 0.4763379035824856, "grad_norm": 3.347534433465796, "learning_rate": 7.421354645265605e-06, "loss": 1.504, "step": 16155 }, { "epoch": 0.4764853309744951, "grad_norm": 3.0796201633187335, "learning_rate": 7.4212235389052155e-06, "loss": 1.49, "step": 16160 }, { "epoch": 0.4766327583665045, "grad_norm": 3.330936271105055, "learning_rate": 7.4210923245148805e-06, "loss": 1.5377, "step": 16165 }, { "epoch": 0.47678018575851394, "grad_norm": 3.2705088327750307, "learning_rate": 7.420961002098462e-06, "loss": 1.5846, "step": 16170 }, { "epoch": 0.47692761315052334, "grad_norm": 3.1303095241474765, "learning_rate": 7.420829571659823e-06, "loss": 1.5128, "step": 16175 }, { "epoch": 0.4770750405425328, "grad_norm": 3.1717741976792646, "learning_rate": 7.420698033202834e-06, "loss": 1.5399, "step": 16180 }, { "epoch": 0.47722246793454226, "grad_norm": 3.2725347615719715, "learning_rate": 7.4205663867313625e-06, "loss": 1.5035, "step": 16185 }, { "epoch": 0.47736989532655166, "grad_norm": 3.223942837610512, "learning_rate": 7.420434632249285e-06, "loss": 1.5113, "step": 16190 }, { "epoch": 0.4775173227185611, "grad_norm": 3.4229079247436185, "learning_rate": 7.420302769760477e-06, "loss": 1.556, "step": 16195 }, { "epoch": 0.4776647501105705, "grad_norm": 3.2877635399906353, "learning_rate": 7.420170799268819e-06, "loss": 1.549, "step": 16200 }, { "epoch": 0.47781217750258, "grad_norm": 3.673868207621624, "learning_rate": 7.420038720778194e-06, "loss": 1.5599, "step": 16205 }, { "epoch": 0.47795960489458944, "grad_norm": 3.08726321911727, "learning_rate": 7.419906534292489e-06, "loss": 1.4668, "step": 16210 }, { "epoch": 0.47810703228659884, "grad_norm": 3.1934208818998395, "learning_rate": 7.419774239815593e-06, "loss": 1.5188, "step": 16215 }, { "epoch": 0.4782544596786083, "grad_norm": 3.2613539779822305, "learning_rate": 7.419641837351399e-06, "loss": 1.5346, "step": 16220 }, { "epoch": 0.4784018870706177, "grad_norm": 3.1220579677137668, "learning_rate": 7.419509326903806e-06, "loss": 1.5236, "step": 16225 }, { "epoch": 0.47854931446262716, "grad_norm": 3.1538224494388616, "learning_rate": 7.41937670847671e-06, "loss": 1.5137, "step": 16230 }, { "epoch": 0.47869674185463656, "grad_norm": 3.495786904005646, "learning_rate": 7.4192439820740135e-06, "loss": 1.5352, "step": 16235 }, { "epoch": 0.478844169246646, "grad_norm": 3.0911545541706187, "learning_rate": 7.4191111476996235e-06, "loss": 1.5089, "step": 16240 }, { "epoch": 0.4789915966386555, "grad_norm": 3.308621772792662, "learning_rate": 7.418978205357449e-06, "loss": 1.5405, "step": 16245 }, { "epoch": 0.4791390240306649, "grad_norm": 3.3863067691823923, "learning_rate": 7.4188451550514e-06, "loss": 1.52, "step": 16250 }, { "epoch": 0.47928645142267434, "grad_norm": 3.1081395395568543, "learning_rate": 7.418711996785393e-06, "loss": 1.4757, "step": 16255 }, { "epoch": 0.47943387881468374, "grad_norm": 3.124346180097064, "learning_rate": 7.418578730563347e-06, "loss": 1.5567, "step": 16260 }, { "epoch": 0.4795813062066932, "grad_norm": 15.980062390612574, "learning_rate": 7.418445356389183e-06, "loss": 1.4893, "step": 16265 }, { "epoch": 0.47972873359870266, "grad_norm": 3.1569252811855213, "learning_rate": 7.418311874266824e-06, "loss": 1.5253, "step": 16270 }, { "epoch": 0.47987616099071206, "grad_norm": 3.097299672639114, "learning_rate": 7.4181782842002004e-06, "loss": 1.5025, "step": 16275 }, { "epoch": 0.4800235883827215, "grad_norm": 3.116303874108732, "learning_rate": 7.418044586193243e-06, "loss": 1.4951, "step": 16280 }, { "epoch": 0.4801710157747309, "grad_norm": 3.189577757037806, "learning_rate": 7.4179107802498835e-06, "loss": 1.5466, "step": 16285 }, { "epoch": 0.4803184431667404, "grad_norm": 3.1427320513154626, "learning_rate": 7.4177768663740625e-06, "loss": 1.5391, "step": 16290 }, { "epoch": 0.48046587055874984, "grad_norm": 3.2153727170995805, "learning_rate": 7.417642844569718e-06, "loss": 1.4941, "step": 16295 }, { "epoch": 0.48061329795075924, "grad_norm": 3.4089203707175413, "learning_rate": 7.417508714840795e-06, "loss": 1.4956, "step": 16300 }, { "epoch": 0.4807607253427687, "grad_norm": 3.159956235727727, "learning_rate": 7.4173744771912405e-06, "loss": 1.5271, "step": 16305 }, { "epoch": 0.4809081527347781, "grad_norm": 3.0781614854097095, "learning_rate": 7.4172401316250035e-06, "loss": 1.4947, "step": 16310 }, { "epoch": 0.48105558012678756, "grad_norm": 3.0756105617084493, "learning_rate": 7.417105678146039e-06, "loss": 1.512, "step": 16315 }, { "epoch": 0.48120300751879697, "grad_norm": 2.941105013612601, "learning_rate": 7.416971116758303e-06, "loss": 1.4503, "step": 16320 }, { "epoch": 0.4813504349108064, "grad_norm": 3.2934612529121123, "learning_rate": 7.416836447465753e-06, "loss": 1.5377, "step": 16325 }, { "epoch": 0.4814978623028159, "grad_norm": 3.3909398374408726, "learning_rate": 7.416701670272354e-06, "loss": 1.5146, "step": 16330 }, { "epoch": 0.4816452896948253, "grad_norm": 3.231101455601761, "learning_rate": 7.416566785182073e-06, "loss": 1.5367, "step": 16335 }, { "epoch": 0.48179271708683474, "grad_norm": 3.1097349325522408, "learning_rate": 7.416431792198876e-06, "loss": 1.5502, "step": 16340 }, { "epoch": 0.48194014447884415, "grad_norm": 3.309350837545056, "learning_rate": 7.416296691326736e-06, "loss": 1.52, "step": 16345 }, { "epoch": 0.4820875718708536, "grad_norm": 3.1353743999132653, "learning_rate": 7.4161614825696306e-06, "loss": 1.5002, "step": 16350 }, { "epoch": 0.48223499926286306, "grad_norm": 3.479562888904591, "learning_rate": 7.416026165931538e-06, "loss": 1.5097, "step": 16355 }, { "epoch": 0.48238242665487246, "grad_norm": 3.06847436515983, "learning_rate": 7.4158907414164384e-06, "loss": 1.5216, "step": 16360 }, { "epoch": 0.4825298540468819, "grad_norm": 4.603820348460621, "learning_rate": 7.415755209028318e-06, "loss": 1.5463, "step": 16365 }, { "epoch": 0.4826772814388913, "grad_norm": 3.385380316173211, "learning_rate": 7.415619568771164e-06, "loss": 1.5343, "step": 16370 }, { "epoch": 0.4828247088309008, "grad_norm": 3.123525485755781, "learning_rate": 7.415483820648969e-06, "loss": 1.5739, "step": 16375 }, { "epoch": 0.48297213622291024, "grad_norm": 3.0009703750677525, "learning_rate": 7.415347964665727e-06, "loss": 1.4886, "step": 16380 }, { "epoch": 0.48311956361491964, "grad_norm": 3.224099050887224, "learning_rate": 7.415212000825436e-06, "loss": 1.5267, "step": 16385 }, { "epoch": 0.4832669910069291, "grad_norm": 3.1493210855639666, "learning_rate": 7.415075929132096e-06, "loss": 1.6005, "step": 16390 }, { "epoch": 0.4834144183989385, "grad_norm": 3.170531737284913, "learning_rate": 7.414939749589713e-06, "loss": 1.5334, "step": 16395 }, { "epoch": 0.48356184579094796, "grad_norm": 3.3655206875427335, "learning_rate": 7.414803462202292e-06, "loss": 1.5475, "step": 16400 }, { "epoch": 0.48370927318295737, "grad_norm": 3.116793517114441, "learning_rate": 7.414667066973845e-06, "loss": 1.5432, "step": 16405 }, { "epoch": 0.4838567005749668, "grad_norm": 3.232224278990464, "learning_rate": 7.4145305639083835e-06, "loss": 1.5076, "step": 16410 }, { "epoch": 0.4840041279669763, "grad_norm": 3.310171465772543, "learning_rate": 7.414393953009927e-06, "loss": 1.5749, "step": 16415 }, { "epoch": 0.4841515553589857, "grad_norm": 3.140103600859583, "learning_rate": 7.414257234282494e-06, "loss": 1.5076, "step": 16420 }, { "epoch": 0.48429898275099514, "grad_norm": 3.2716237809392514, "learning_rate": 7.4141204077301076e-06, "loss": 1.503, "step": 16425 }, { "epoch": 0.48444641014300455, "grad_norm": 3.1313144409585556, "learning_rate": 7.413983473356794e-06, "loss": 1.5517, "step": 16430 }, { "epoch": 0.484593837535014, "grad_norm": 3.3890867153510627, "learning_rate": 7.413846431166583e-06, "loss": 1.5174, "step": 16435 }, { "epoch": 0.48474126492702346, "grad_norm": 3.0785522583976777, "learning_rate": 7.413709281163507e-06, "loss": 1.4932, "step": 16440 }, { "epoch": 0.48488869231903287, "grad_norm": 3.1850358678779522, "learning_rate": 7.413572023351603e-06, "loss": 1.4753, "step": 16445 }, { "epoch": 0.4850361197110423, "grad_norm": 3.090138263647863, "learning_rate": 7.413434657734907e-06, "loss": 1.5308, "step": 16450 }, { "epoch": 0.4851835471030517, "grad_norm": 3.2438336283178857, "learning_rate": 7.413297184317463e-06, "loss": 1.5178, "step": 16455 }, { "epoch": 0.4853309744950612, "grad_norm": 3.1795937043667264, "learning_rate": 7.413159603103318e-06, "loss": 1.5436, "step": 16460 }, { "epoch": 0.48547840188707064, "grad_norm": 3.2426330005018844, "learning_rate": 7.413021914096517e-06, "loss": 1.4954, "step": 16465 }, { "epoch": 0.48562582927908005, "grad_norm": 3.0358554792374193, "learning_rate": 7.412884117301115e-06, "loss": 1.447, "step": 16470 }, { "epoch": 0.4857732566710895, "grad_norm": 3.2716687189591465, "learning_rate": 7.412746212721164e-06, "loss": 1.5378, "step": 16475 }, { "epoch": 0.4859206840630989, "grad_norm": 3.2671370055440483, "learning_rate": 7.412608200360724e-06, "loss": 1.554, "step": 16480 }, { "epoch": 0.48606811145510836, "grad_norm": 3.2154638112588296, "learning_rate": 7.412470080223856e-06, "loss": 1.5837, "step": 16485 }, { "epoch": 0.48621553884711777, "grad_norm": 3.1443060196207333, "learning_rate": 7.412331852314623e-06, "loss": 1.5687, "step": 16490 }, { "epoch": 0.4863629662391272, "grad_norm": 3.281734569477738, "learning_rate": 7.412193516637093e-06, "loss": 1.5275, "step": 16495 }, { "epoch": 0.4865103936311367, "grad_norm": 3.086633005814444, "learning_rate": 7.412055073195337e-06, "loss": 1.5336, "step": 16500 }, { "epoch": 0.4865103936311367, "eval_loss": 1.2004046440124512, "eval_runtime": 4.1926, "eval_samples_per_second": 94.452, "eval_steps_per_second": 3.101, "step": 16500 }, { "epoch": 0.4866578210231461, "grad_norm": 3.06115938079265, "learning_rate": 7.41191652199343e-06, "loss": 1.5913, "step": 16505 }, { "epoch": 0.48680524841515554, "grad_norm": 3.2274415636103715, "learning_rate": 7.411777863035447e-06, "loss": 1.5269, "step": 16510 }, { "epoch": 0.48695267580716495, "grad_norm": 3.220947592104303, "learning_rate": 7.41163909632547e-06, "loss": 1.5008, "step": 16515 }, { "epoch": 0.4871001031991744, "grad_norm": 3.0293171898762012, "learning_rate": 7.41150022186758e-06, "loss": 1.503, "step": 16520 }, { "epoch": 0.48724753059118386, "grad_norm": 3.2091481499622416, "learning_rate": 7.411361239665866e-06, "loss": 1.4917, "step": 16525 }, { "epoch": 0.48739495798319327, "grad_norm": 3.1612184695363106, "learning_rate": 7.411222149724416e-06, "loss": 1.4991, "step": 16530 }, { "epoch": 0.4875423853752027, "grad_norm": 3.173638046075278, "learning_rate": 7.411082952047325e-06, "loss": 1.503, "step": 16535 }, { "epoch": 0.4876898127672121, "grad_norm": 3.1083688838736983, "learning_rate": 7.4109436466386855e-06, "loss": 1.5034, "step": 16540 }, { "epoch": 0.4878372401592216, "grad_norm": 3.1752346896122323, "learning_rate": 7.4108042335025995e-06, "loss": 1.5318, "step": 16545 }, { "epoch": 0.48798466755123104, "grad_norm": 3.012847190381002, "learning_rate": 7.410664712643169e-06, "loss": 1.4847, "step": 16550 }, { "epoch": 0.48813209494324045, "grad_norm": 2.9617183933768025, "learning_rate": 7.4105250840644986e-06, "loss": 1.4356, "step": 16555 }, { "epoch": 0.4882795223352499, "grad_norm": 3.1451968978281855, "learning_rate": 7.4103853477707e-06, "loss": 1.5061, "step": 16560 }, { "epoch": 0.4884269497272593, "grad_norm": 3.209534021050648, "learning_rate": 7.41024550376588e-06, "loss": 1.5199, "step": 16565 }, { "epoch": 0.48857437711926877, "grad_norm": 3.033631850629463, "learning_rate": 7.410105552054159e-06, "loss": 1.5236, "step": 16570 }, { "epoch": 0.48872180451127817, "grad_norm": 3.134968894433361, "learning_rate": 7.409965492639651e-06, "loss": 1.4936, "step": 16575 }, { "epoch": 0.4888692319032876, "grad_norm": 3.179465864438554, "learning_rate": 7.40982532552648e-06, "loss": 1.55, "step": 16580 }, { "epoch": 0.4890166592952971, "grad_norm": 3.247494135841116, "learning_rate": 7.409685050718769e-06, "loss": 1.5263, "step": 16585 }, { "epoch": 0.4891640866873065, "grad_norm": 3.3730045336979346, "learning_rate": 7.409544668220647e-06, "loss": 1.5227, "step": 16590 }, { "epoch": 0.48931151407931595, "grad_norm": 3.2033987700418955, "learning_rate": 7.409404178036244e-06, "loss": 1.5144, "step": 16595 }, { "epoch": 0.48945894147132535, "grad_norm": 3.23157509830734, "learning_rate": 7.409263580169694e-06, "loss": 1.5135, "step": 16600 }, { "epoch": 0.4896063688633348, "grad_norm": 3.1640515897982024, "learning_rate": 7.409122874625135e-06, "loss": 1.4941, "step": 16605 }, { "epoch": 0.48975379625534426, "grad_norm": 3.2524573103708923, "learning_rate": 7.4089820614067085e-06, "loss": 1.5054, "step": 16610 }, { "epoch": 0.48990122364735367, "grad_norm": 3.553766696753031, "learning_rate": 7.408841140518556e-06, "loss": 1.5476, "step": 16615 }, { "epoch": 0.4900486510393631, "grad_norm": 3.2213147525552146, "learning_rate": 7.408700111964824e-06, "loss": 1.5553, "step": 16620 }, { "epoch": 0.49019607843137253, "grad_norm": 2.950124442374718, "learning_rate": 7.4085589757496644e-06, "loss": 1.4848, "step": 16625 }, { "epoch": 0.490343505823382, "grad_norm": 3.2758197902326596, "learning_rate": 7.40841773187723e-06, "loss": 1.5097, "step": 16630 }, { "epoch": 0.49049093321539144, "grad_norm": 3.2588739631313035, "learning_rate": 7.408276380351675e-06, "loss": 1.4818, "step": 16635 }, { "epoch": 0.49063836060740085, "grad_norm": 3.2098181412776325, "learning_rate": 7.4081349211771605e-06, "loss": 1.5188, "step": 16640 }, { "epoch": 0.4907857879994103, "grad_norm": 3.206911897675833, "learning_rate": 7.407993354357849e-06, "loss": 1.5449, "step": 16645 }, { "epoch": 0.4909332153914197, "grad_norm": 3.3159603077201023, "learning_rate": 7.407851679897905e-06, "loss": 1.4869, "step": 16650 }, { "epoch": 0.49108064278342917, "grad_norm": 3.0960433798536258, "learning_rate": 7.4077098978015e-06, "loss": 1.4636, "step": 16655 }, { "epoch": 0.49122807017543857, "grad_norm": 3.091637895001946, "learning_rate": 7.407568008072803e-06, "loss": 1.5145, "step": 16660 }, { "epoch": 0.491375497567448, "grad_norm": 3.2533118184017487, "learning_rate": 7.407426010715992e-06, "loss": 1.522, "step": 16665 }, { "epoch": 0.4915229249594575, "grad_norm": 3.143607510006163, "learning_rate": 7.407283905735245e-06, "loss": 1.5277, "step": 16670 }, { "epoch": 0.4916703523514669, "grad_norm": 3.0012217315182697, "learning_rate": 7.407141693134743e-06, "loss": 1.5045, "step": 16675 }, { "epoch": 0.49181777974347635, "grad_norm": 3.2558023546858803, "learning_rate": 7.4069993729186685e-06, "loss": 1.4929, "step": 16680 }, { "epoch": 0.49196520713548575, "grad_norm": 3.2119058821818345, "learning_rate": 7.406856945091213e-06, "loss": 1.5285, "step": 16685 }, { "epoch": 0.4921126345274952, "grad_norm": 3.2378941570980198, "learning_rate": 7.406714409656566e-06, "loss": 1.5203, "step": 16690 }, { "epoch": 0.49226006191950467, "grad_norm": 3.221402368654827, "learning_rate": 7.406571766618922e-06, "loss": 1.5142, "step": 16695 }, { "epoch": 0.49240748931151407, "grad_norm": 3.333934478266541, "learning_rate": 7.4064290159824784e-06, "loss": 1.5098, "step": 16700 }, { "epoch": 0.4925549167035235, "grad_norm": 3.2026495107280666, "learning_rate": 7.406286157751435e-06, "loss": 1.525, "step": 16705 }, { "epoch": 0.49270234409553293, "grad_norm": 3.161166100627473, "learning_rate": 7.406143191929997e-06, "loss": 1.5546, "step": 16710 }, { "epoch": 0.4928497714875424, "grad_norm": 3.141245683766517, "learning_rate": 7.406000118522372e-06, "loss": 1.5199, "step": 16715 }, { "epoch": 0.49299719887955185, "grad_norm": 3.2261676672500625, "learning_rate": 7.405856937532768e-06, "loss": 1.4958, "step": 16720 }, { "epoch": 0.49314462627156125, "grad_norm": 3.225651974828029, "learning_rate": 7.405713648965398e-06, "loss": 1.5073, "step": 16725 }, { "epoch": 0.4932920536635707, "grad_norm": 3.223040336022563, "learning_rate": 7.40557025282448e-06, "loss": 1.5159, "step": 16730 }, { "epoch": 0.4934394810555801, "grad_norm": 3.130884462909842, "learning_rate": 7.405426749114233e-06, "loss": 1.5198, "step": 16735 }, { "epoch": 0.49358690844758957, "grad_norm": 3.137222810766194, "learning_rate": 7.4052831378388805e-06, "loss": 1.48, "step": 16740 }, { "epoch": 0.49373433583959897, "grad_norm": 3.19256670725332, "learning_rate": 7.405139419002647e-06, "loss": 1.5252, "step": 16745 }, { "epoch": 0.49388176323160843, "grad_norm": 2.869698612603854, "learning_rate": 7.404995592609763e-06, "loss": 1.4154, "step": 16750 }, { "epoch": 0.4940291906236179, "grad_norm": 3.3137555294702365, "learning_rate": 7.404851658664461e-06, "loss": 1.4852, "step": 16755 }, { "epoch": 0.4941766180156273, "grad_norm": 2.9663795294503994, "learning_rate": 7.404707617170973e-06, "loss": 1.5207, "step": 16760 }, { "epoch": 0.49432404540763675, "grad_norm": 3.10727462373693, "learning_rate": 7.404563468133542e-06, "loss": 1.4724, "step": 16765 }, { "epoch": 0.49447147279964615, "grad_norm": 3.176063237136297, "learning_rate": 7.404419211556408e-06, "loss": 1.5527, "step": 16770 }, { "epoch": 0.4946189001916556, "grad_norm": 3.137906811059651, "learning_rate": 7.404274847443815e-06, "loss": 1.5249, "step": 16775 }, { "epoch": 0.49476632758366507, "grad_norm": 3.06714321073592, "learning_rate": 7.404130375800011e-06, "loss": 1.5253, "step": 16780 }, { "epoch": 0.49491375497567447, "grad_norm": 3.242242420599197, "learning_rate": 7.403985796629249e-06, "loss": 1.5215, "step": 16785 }, { "epoch": 0.4950611823676839, "grad_norm": 3.112520231327315, "learning_rate": 7.4038411099357835e-06, "loss": 1.5265, "step": 16790 }, { "epoch": 0.49520860975969333, "grad_norm": 3.119116710299764, "learning_rate": 7.40369631572387e-06, "loss": 1.5537, "step": 16795 }, { "epoch": 0.4953560371517028, "grad_norm": 3.4347030642584646, "learning_rate": 7.403551413997771e-06, "loss": 1.479, "step": 16800 }, { "epoch": 0.49550346454371225, "grad_norm": 3.1979284801841668, "learning_rate": 7.4034064047617495e-06, "loss": 1.5174, "step": 16805 }, { "epoch": 0.49565089193572165, "grad_norm": 3.1941740635187776, "learning_rate": 7.403261288020073e-06, "loss": 1.5264, "step": 16810 }, { "epoch": 0.4957983193277311, "grad_norm": 3.0913871912071578, "learning_rate": 7.403116063777011e-06, "loss": 1.5777, "step": 16815 }, { "epoch": 0.4959457467197405, "grad_norm": 3.3358314778804266, "learning_rate": 7.402970732036838e-06, "loss": 1.5394, "step": 16820 }, { "epoch": 0.49609317411174997, "grad_norm": 3.024627080675958, "learning_rate": 7.4028252928038295e-06, "loss": 1.5109, "step": 16825 }, { "epoch": 0.49624060150375937, "grad_norm": 2.969556679481406, "learning_rate": 7.402679746082265e-06, "loss": 1.4743, "step": 16830 }, { "epoch": 0.49638802889576883, "grad_norm": 3.1311745908362325, "learning_rate": 7.402534091876429e-06, "loss": 1.4622, "step": 16835 }, { "epoch": 0.4965354562877783, "grad_norm": 3.0841099760855677, "learning_rate": 7.402388330190606e-06, "loss": 1.5154, "step": 16840 }, { "epoch": 0.4966828836797877, "grad_norm": 2.991773457454515, "learning_rate": 7.402242461029087e-06, "loss": 1.4921, "step": 16845 }, { "epoch": 0.49683031107179715, "grad_norm": 3.031584287010061, "learning_rate": 7.402096484396162e-06, "loss": 1.5338, "step": 16850 }, { "epoch": 0.49697773846380655, "grad_norm": 3.125541425827449, "learning_rate": 7.401950400296128e-06, "loss": 1.5438, "step": 16855 }, { "epoch": 0.497125165855816, "grad_norm": 3.1080846894764753, "learning_rate": 7.401804208733283e-06, "loss": 1.4997, "step": 16860 }, { "epoch": 0.49727259324782547, "grad_norm": 3.195895491649582, "learning_rate": 7.401657909711929e-06, "loss": 1.5111, "step": 16865 }, { "epoch": 0.49742002063983487, "grad_norm": 3.2976787071685814, "learning_rate": 7.401511503236373e-06, "loss": 1.5654, "step": 16870 }, { "epoch": 0.49756744803184433, "grad_norm": 2.939610818429407, "learning_rate": 7.4013649893109195e-06, "loss": 1.5782, "step": 16875 }, { "epoch": 0.49771487542385373, "grad_norm": 3.2154269515764877, "learning_rate": 7.401218367939883e-06, "loss": 1.5354, "step": 16880 }, { "epoch": 0.4978623028158632, "grad_norm": 3.091624134601798, "learning_rate": 7.401071639127576e-06, "loss": 1.4774, "step": 16885 }, { "epoch": 0.49800973020787265, "grad_norm": 3.1866070447286847, "learning_rate": 7.400924802878318e-06, "loss": 1.4812, "step": 16890 }, { "epoch": 0.49815715759988205, "grad_norm": 3.0799223425453337, "learning_rate": 7.4007778591964276e-06, "loss": 1.4903, "step": 16895 }, { "epoch": 0.4983045849918915, "grad_norm": 3.1381267579622434, "learning_rate": 7.40063080808623e-06, "loss": 1.5188, "step": 16900 }, { "epoch": 0.4984520123839009, "grad_norm": 2.9573663989011143, "learning_rate": 7.400483649552052e-06, "loss": 1.483, "step": 16905 }, { "epoch": 0.49859943977591037, "grad_norm": 3.160639709058574, "learning_rate": 7.400336383598226e-06, "loss": 1.5215, "step": 16910 }, { "epoch": 0.49874686716791977, "grad_norm": 3.1608908704830663, "learning_rate": 7.400189010229082e-06, "loss": 1.5136, "step": 16915 }, { "epoch": 0.49889429455992923, "grad_norm": 3.1343928686921756, "learning_rate": 7.400041529448959e-06, "loss": 1.5097, "step": 16920 }, { "epoch": 0.4990417219519387, "grad_norm": 3.1312271977870663, "learning_rate": 7.399893941262196e-06, "loss": 1.4879, "step": 16925 }, { "epoch": 0.4991891493439481, "grad_norm": 3.0539572971627456, "learning_rate": 7.399746245673135e-06, "loss": 1.522, "step": 16930 }, { "epoch": 0.49933657673595755, "grad_norm": 3.2167383451344893, "learning_rate": 7.399598442686124e-06, "loss": 1.5363, "step": 16935 }, { "epoch": 0.49948400412796695, "grad_norm": 3.1900578019743118, "learning_rate": 7.399450532305513e-06, "loss": 1.5345, "step": 16940 }, { "epoch": 0.4996314315199764, "grad_norm": 3.1920133269003537, "learning_rate": 7.39930251453565e-06, "loss": 1.5555, "step": 16945 }, { "epoch": 0.49977885891198587, "grad_norm": 3.1687966487224677, "learning_rate": 7.399154389380895e-06, "loss": 1.4699, "step": 16950 }, { "epoch": 0.49992628630399527, "grad_norm": 3.114611701493643, "learning_rate": 7.3990061568456045e-06, "loss": 1.4956, "step": 16955 }, { "epoch": 0.5000737136960047, "grad_norm": 2.9735779232915855, "learning_rate": 7.398857816934142e-06, "loss": 1.4713, "step": 16960 }, { "epoch": 0.5002211410880142, "grad_norm": 3.098814726690643, "learning_rate": 7.3987093696508715e-06, "loss": 1.4978, "step": 16965 }, { "epoch": 0.5003685684800235, "grad_norm": 3.0817795823993297, "learning_rate": 7.398560815000161e-06, "loss": 1.4653, "step": 16970 }, { "epoch": 0.500515995872033, "grad_norm": 3.054407458691641, "learning_rate": 7.398412152986383e-06, "loss": 1.5217, "step": 16975 }, { "epoch": 0.5006634232640425, "grad_norm": 3.0600210325086423, "learning_rate": 7.39826338361391e-06, "loss": 1.5061, "step": 16980 }, { "epoch": 0.5008108506560519, "grad_norm": 3.146464973587569, "learning_rate": 7.398114506887122e-06, "loss": 1.5162, "step": 16985 }, { "epoch": 0.5009582780480614, "grad_norm": 3.0890287682859605, "learning_rate": 7.397965522810399e-06, "loss": 1.5808, "step": 16990 }, { "epoch": 0.5011057054400707, "grad_norm": 3.2671206520607647, "learning_rate": 7.3978164313881254e-06, "loss": 1.5396, "step": 16995 }, { "epoch": 0.5012531328320802, "grad_norm": 3.274363590620518, "learning_rate": 7.397667232624686e-06, "loss": 1.5355, "step": 17000 }, { "epoch": 0.5012531328320802, "eval_loss": 1.1980583667755127, "eval_runtime": 4.2661, "eval_samples_per_second": 92.824, "eval_steps_per_second": 3.047, "step": 17000 }, { "epoch": 0.5014005602240896, "grad_norm": 3.1779317787526034, "learning_rate": 7.397517926524475e-06, "loss": 1.5715, "step": 17005 }, { "epoch": 0.5015479876160991, "grad_norm": 3.1213761175988117, "learning_rate": 7.397368513091884e-06, "loss": 1.5528, "step": 17010 }, { "epoch": 0.5016954150081085, "grad_norm": 3.1228563126182225, "learning_rate": 7.397218992331309e-06, "loss": 1.4952, "step": 17015 }, { "epoch": 0.5018428424001179, "grad_norm": 3.1002242825590165, "learning_rate": 7.39706936424715e-06, "loss": 1.5485, "step": 17020 }, { "epoch": 0.5019902697921274, "grad_norm": 3.084677182274556, "learning_rate": 7.396919628843812e-06, "loss": 1.5356, "step": 17025 }, { "epoch": 0.5021376971841368, "grad_norm": 3.2630129593658297, "learning_rate": 7.3967697861256975e-06, "loss": 1.537, "step": 17030 }, { "epoch": 0.5022851245761463, "grad_norm": 3.1506717835755373, "learning_rate": 7.396619836097219e-06, "loss": 1.4644, "step": 17035 }, { "epoch": 0.5024325519681557, "grad_norm": 3.1729869754058786, "learning_rate": 7.3964697787627876e-06, "loss": 1.5381, "step": 17040 }, { "epoch": 0.5025799793601651, "grad_norm": 3.009105727060474, "learning_rate": 7.396319614126819e-06, "loss": 1.5517, "step": 17045 }, { "epoch": 0.5027274067521745, "grad_norm": 3.193393942815562, "learning_rate": 7.396169342193733e-06, "loss": 1.5064, "step": 17050 }, { "epoch": 0.502874834144184, "grad_norm": 3.0829504178568428, "learning_rate": 7.396018962967949e-06, "loss": 1.5218, "step": 17055 }, { "epoch": 0.5030222615361934, "grad_norm": 3.238523527752397, "learning_rate": 7.395868476453894e-06, "loss": 1.5303, "step": 17060 }, { "epoch": 0.5031696889282029, "grad_norm": 3.2395483755118875, "learning_rate": 7.395717882655997e-06, "loss": 1.5024, "step": 17065 }, { "epoch": 0.5033171163202123, "grad_norm": 3.125347107620909, "learning_rate": 7.395567181578689e-06, "loss": 1.4758, "step": 17070 }, { "epoch": 0.5034645437122217, "grad_norm": 3.083248396944322, "learning_rate": 7.395416373226402e-06, "loss": 1.5604, "step": 17075 }, { "epoch": 0.5036119711042312, "grad_norm": 3.029088077455282, "learning_rate": 7.395265457603577e-06, "loss": 1.5218, "step": 17080 }, { "epoch": 0.5037593984962406, "grad_norm": 3.467024400712042, "learning_rate": 7.395114434714652e-06, "loss": 1.4929, "step": 17085 }, { "epoch": 0.5039068258882501, "grad_norm": 3.045437193108359, "learning_rate": 7.394963304564073e-06, "loss": 1.5369, "step": 17090 }, { "epoch": 0.5040542532802594, "grad_norm": 3.083350382733776, "learning_rate": 7.3948120671562875e-06, "loss": 1.5526, "step": 17095 }, { "epoch": 0.5042016806722689, "grad_norm": 3.1647646915863237, "learning_rate": 7.394660722495744e-06, "loss": 1.5424, "step": 17100 }, { "epoch": 0.5043491080642784, "grad_norm": 3.160167554636868, "learning_rate": 7.394509270586897e-06, "loss": 1.5229, "step": 17105 }, { "epoch": 0.5044965354562878, "grad_norm": 3.0268879844916294, "learning_rate": 7.394357711434204e-06, "loss": 1.5486, "step": 17110 }, { "epoch": 0.5046439628482973, "grad_norm": 3.13796099152974, "learning_rate": 7.394206045042123e-06, "loss": 1.5041, "step": 17115 }, { "epoch": 0.5047913902403066, "grad_norm": 3.1780136238761685, "learning_rate": 7.394054271415119e-06, "loss": 1.5395, "step": 17120 }, { "epoch": 0.5049388176323161, "grad_norm": 3.07131209776413, "learning_rate": 7.393902390557656e-06, "loss": 1.5274, "step": 17125 }, { "epoch": 0.5050862450243255, "grad_norm": 3.161251314023054, "learning_rate": 7.393750402474204e-06, "loss": 1.5293, "step": 17130 }, { "epoch": 0.505233672416335, "grad_norm": 3.155455144710618, "learning_rate": 7.393598307169237e-06, "loss": 1.5202, "step": 17135 }, { "epoch": 0.5053810998083443, "grad_norm": 3.3376926617132296, "learning_rate": 7.393446104647228e-06, "loss": 1.5537, "step": 17140 }, { "epoch": 0.5055285272003538, "grad_norm": 3.167634748246442, "learning_rate": 7.393293794912658e-06, "loss": 1.5053, "step": 17145 }, { "epoch": 0.5056759545923633, "grad_norm": 3.2130142994617925, "learning_rate": 7.393141377970006e-06, "loss": 1.5035, "step": 17150 }, { "epoch": 0.5058233819843727, "grad_norm": 3.1273051149370077, "learning_rate": 7.392988853823762e-06, "loss": 1.5233, "step": 17155 }, { "epoch": 0.5059708093763822, "grad_norm": 3.1092013606976994, "learning_rate": 7.392836222478409e-06, "loss": 1.4253, "step": 17160 }, { "epoch": 0.5061182367683915, "grad_norm": 2.9902604683431715, "learning_rate": 7.39268348393844e-06, "loss": 1.4991, "step": 17165 }, { "epoch": 0.506265664160401, "grad_norm": 3.2287237958565784, "learning_rate": 7.392530638208351e-06, "loss": 1.4723, "step": 17170 }, { "epoch": 0.5064130915524104, "grad_norm": 3.0928831958991294, "learning_rate": 7.392377685292639e-06, "loss": 1.5266, "step": 17175 }, { "epoch": 0.5065605189444199, "grad_norm": 3.1566472386361575, "learning_rate": 7.392224625195804e-06, "loss": 1.4787, "step": 17180 }, { "epoch": 0.5067079463364293, "grad_norm": 3.1238835348670855, "learning_rate": 7.3920714579223495e-06, "loss": 1.4989, "step": 17185 }, { "epoch": 0.5068553737284387, "grad_norm": 3.1033788006061447, "learning_rate": 7.391918183476785e-06, "loss": 1.4807, "step": 17190 }, { "epoch": 0.5070028011204482, "grad_norm": 3.1721527820386415, "learning_rate": 7.3917648018636185e-06, "loss": 1.4877, "step": 17195 }, { "epoch": 0.5071502285124576, "grad_norm": 2.9767162429192044, "learning_rate": 7.391611313087365e-06, "loss": 1.5762, "step": 17200 }, { "epoch": 0.5072976559044671, "grad_norm": 3.948880425467743, "learning_rate": 7.391457717152539e-06, "loss": 1.4901, "step": 17205 }, { "epoch": 0.5074450832964765, "grad_norm": 3.1013510549466163, "learning_rate": 7.391304014063662e-06, "loss": 1.5157, "step": 17210 }, { "epoch": 0.5075925106884859, "grad_norm": 3.250966371387294, "learning_rate": 7.391150203825257e-06, "loss": 1.5319, "step": 17215 }, { "epoch": 0.5077399380804953, "grad_norm": 3.252381729939547, "learning_rate": 7.390996286441849e-06, "loss": 1.5417, "step": 17220 }, { "epoch": 0.5078873654725048, "grad_norm": 3.0587219361981117, "learning_rate": 7.39084226191797e-06, "loss": 1.4759, "step": 17225 }, { "epoch": 0.5080347928645143, "grad_norm": 3.173936799031734, "learning_rate": 7.390688130258147e-06, "loss": 1.5259, "step": 17230 }, { "epoch": 0.5081822202565237, "grad_norm": 3.148191925606439, "learning_rate": 7.390533891466919e-06, "loss": 1.5392, "step": 17235 }, { "epoch": 0.5083296476485331, "grad_norm": 3.554191538120281, "learning_rate": 7.390379545548825e-06, "loss": 1.5127, "step": 17240 }, { "epoch": 0.5084770750405425, "grad_norm": 3.022907023225473, "learning_rate": 7.390225092508406e-06, "loss": 1.514, "step": 17245 }, { "epoch": 0.508624502432552, "grad_norm": 3.2014647213038443, "learning_rate": 7.390070532350206e-06, "loss": 1.5165, "step": 17250 }, { "epoch": 0.5087719298245614, "grad_norm": 3.2596127003350404, "learning_rate": 7.389915865078775e-06, "loss": 1.4548, "step": 17255 }, { "epoch": 0.5089193572165709, "grad_norm": 3.293941308929698, "learning_rate": 7.389761090698663e-06, "loss": 1.511, "step": 17260 }, { "epoch": 0.5090667846085802, "grad_norm": 3.1848487551396016, "learning_rate": 7.389606209214426e-06, "loss": 1.4682, "step": 17265 }, { "epoch": 0.5092142120005897, "grad_norm": 3.3068781073284934, "learning_rate": 7.389451220630618e-06, "loss": 1.536, "step": 17270 }, { "epoch": 0.5093616393925992, "grad_norm": 3.171420721036981, "learning_rate": 7.389296124951804e-06, "loss": 1.5325, "step": 17275 }, { "epoch": 0.5095090667846086, "grad_norm": 3.078547560160486, "learning_rate": 7.389140922182544e-06, "loss": 1.4907, "step": 17280 }, { "epoch": 0.5096564941766181, "grad_norm": 3.1075652027395693, "learning_rate": 7.3889856123274085e-06, "loss": 1.4923, "step": 17285 }, { "epoch": 0.5098039215686274, "grad_norm": 2.9707661473112354, "learning_rate": 7.388830195390966e-06, "loss": 1.5026, "step": 17290 }, { "epoch": 0.5099513489606369, "grad_norm": 3.1305499539453265, "learning_rate": 7.388674671377789e-06, "loss": 1.4734, "step": 17295 }, { "epoch": 0.5100987763526463, "grad_norm": 3.2738108078690327, "learning_rate": 7.388519040292455e-06, "loss": 1.5285, "step": 17300 }, { "epoch": 0.5102462037446558, "grad_norm": 3.1425435704225446, "learning_rate": 7.388363302139544e-06, "loss": 1.5014, "step": 17305 }, { "epoch": 0.5103936311366651, "grad_norm": 3.30822088304023, "learning_rate": 7.3882074569236375e-06, "loss": 1.4641, "step": 17310 }, { "epoch": 0.5105410585286746, "grad_norm": 3.102706087215324, "learning_rate": 7.388051504649323e-06, "loss": 1.4899, "step": 17315 }, { "epoch": 0.510688485920684, "grad_norm": 3.0500530444859817, "learning_rate": 7.387895445321188e-06, "loss": 1.5189, "step": 17320 }, { "epoch": 0.5108359133126935, "grad_norm": 3.069165959773882, "learning_rate": 7.387739278943827e-06, "loss": 1.5161, "step": 17325 }, { "epoch": 0.510983340704703, "grad_norm": 3.436288410965421, "learning_rate": 7.387583005521832e-06, "loss": 1.4889, "step": 17330 }, { "epoch": 0.5111307680967123, "grad_norm": 3.160300012956403, "learning_rate": 7.387426625059804e-06, "loss": 1.5107, "step": 17335 }, { "epoch": 0.5112781954887218, "grad_norm": 3.0962008390623823, "learning_rate": 7.387270137562344e-06, "loss": 1.5273, "step": 17340 }, { "epoch": 0.5114256228807312, "grad_norm": 3.2303896938030907, "learning_rate": 7.387113543034057e-06, "loss": 1.5181, "step": 17345 }, { "epoch": 0.5115730502727407, "grad_norm": 3.0937998227247503, "learning_rate": 7.386956841479552e-06, "loss": 1.5172, "step": 17350 }, { "epoch": 0.5117204776647502, "grad_norm": 3.1643745415063615, "learning_rate": 7.386800032903437e-06, "loss": 1.5642, "step": 17355 }, { "epoch": 0.5118679050567595, "grad_norm": 5.128217442282079, "learning_rate": 7.3866431173103285e-06, "loss": 1.5724, "step": 17360 }, { "epoch": 0.512015332448769, "grad_norm": 3.0397411718961784, "learning_rate": 7.386486094704846e-06, "loss": 1.5232, "step": 17365 }, { "epoch": 0.5121627598407784, "grad_norm": 3.201280042575041, "learning_rate": 7.386328965091605e-06, "loss": 1.464, "step": 17370 }, { "epoch": 0.5123101872327879, "grad_norm": 3.0775200880216844, "learning_rate": 7.3861717284752325e-06, "loss": 1.4794, "step": 17375 }, { "epoch": 0.5124576146247973, "grad_norm": 3.1187980416922056, "learning_rate": 7.386014384860356e-06, "loss": 1.5442, "step": 17380 }, { "epoch": 0.5126050420168067, "grad_norm": 3.1925963151501584, "learning_rate": 7.385856934251603e-06, "loss": 1.5131, "step": 17385 }, { "epoch": 0.5127524694088161, "grad_norm": 3.1979176314833784, "learning_rate": 7.385699376653609e-06, "loss": 1.5133, "step": 17390 }, { "epoch": 0.5128998968008256, "grad_norm": 3.171623949343793, "learning_rate": 7.385541712071009e-06, "loss": 1.4983, "step": 17395 }, { "epoch": 0.513047324192835, "grad_norm": 3.260674738545682, "learning_rate": 7.385383940508442e-06, "loss": 1.5796, "step": 17400 }, { "epoch": 0.5131947515848445, "grad_norm": 3.1514177859325447, "learning_rate": 7.385226061970553e-06, "loss": 1.5349, "step": 17405 }, { "epoch": 0.5133421789768539, "grad_norm": 3.087209441219568, "learning_rate": 7.385068076461986e-06, "loss": 1.5344, "step": 17410 }, { "epoch": 0.5134896063688633, "grad_norm": 3.2435238545004688, "learning_rate": 7.3849099839873885e-06, "loss": 1.5278, "step": 17415 }, { "epoch": 0.5136370337608728, "grad_norm": 3.2018959070257194, "learning_rate": 7.384751784551415e-06, "loss": 1.5114, "step": 17420 }, { "epoch": 0.5137844611528822, "grad_norm": 3.2595084447181004, "learning_rate": 7.38459347815872e-06, "loss": 1.4548, "step": 17425 }, { "epoch": 0.5139318885448917, "grad_norm": 2.986060304923525, "learning_rate": 7.384435064813961e-06, "loss": 1.5406, "step": 17430 }, { "epoch": 0.514079315936901, "grad_norm": 3.349168196137793, "learning_rate": 7.3842765445218005e-06, "loss": 1.5539, "step": 17435 }, { "epoch": 0.5142267433289105, "grad_norm": 3.133397362715177, "learning_rate": 7.384117917286902e-06, "loss": 1.5079, "step": 17440 }, { "epoch": 0.51437417072092, "grad_norm": 3.1021859715429225, "learning_rate": 7.383959183113935e-06, "loss": 1.5354, "step": 17445 }, { "epoch": 0.5145215981129294, "grad_norm": 3.048503230399949, "learning_rate": 7.383800342007569e-06, "loss": 1.4751, "step": 17450 }, { "epoch": 0.5146690255049389, "grad_norm": 3.1681652347399356, "learning_rate": 7.383641393972478e-06, "loss": 1.5133, "step": 17455 }, { "epoch": 0.5148164528969482, "grad_norm": 3.1849019910189944, "learning_rate": 7.383482339013342e-06, "loss": 1.5355, "step": 17460 }, { "epoch": 0.5149638802889577, "grad_norm": 3.0363043317485525, "learning_rate": 7.383323177134837e-06, "loss": 1.5564, "step": 17465 }, { "epoch": 0.5151113076809671, "grad_norm": 3.140657498692886, "learning_rate": 7.383163908341649e-06, "loss": 1.4673, "step": 17470 }, { "epoch": 0.5152587350729766, "grad_norm": 3.174075621470999, "learning_rate": 7.383004532638465e-06, "loss": 1.513, "step": 17475 }, { "epoch": 0.5154061624649859, "grad_norm": 2.9851906318254096, "learning_rate": 7.382845050029975e-06, "loss": 1.5042, "step": 17480 }, { "epoch": 0.5155535898569954, "grad_norm": 3.136210120270164, "learning_rate": 7.38268546052087e-06, "loss": 1.5449, "step": 17485 }, { "epoch": 0.5157010172490049, "grad_norm": 3.0199671014491583, "learning_rate": 7.382525764115847e-06, "loss": 1.5086, "step": 17490 }, { "epoch": 0.5158484446410143, "grad_norm": 3.008143973317486, "learning_rate": 7.382365960819606e-06, "loss": 1.4881, "step": 17495 }, { "epoch": 0.5159958720330238, "grad_norm": 3.078404484582791, "learning_rate": 7.382206050636848e-06, "loss": 1.4291, "step": 17500 }, { "epoch": 0.5159958720330238, "eval_loss": 1.1945264339447021, "eval_runtime": 4.1796, "eval_samples_per_second": 94.746, "eval_steps_per_second": 3.11, "step": 17500 }, { "epoch": 0.5161432994250331, "grad_norm": 3.1897091480045243, "learning_rate": 7.38204603357228e-06, "loss": 1.5506, "step": 17505 }, { "epoch": 0.5162907268170426, "grad_norm": 3.0324460440825067, "learning_rate": 7.38188590963061e-06, "loss": 1.4853, "step": 17510 }, { "epoch": 0.516438154209052, "grad_norm": 3.217991924922232, "learning_rate": 7.381725678816551e-06, "loss": 1.5215, "step": 17515 }, { "epoch": 0.5165855816010615, "grad_norm": 3.210639950210781, "learning_rate": 7.3815653411348164e-06, "loss": 1.5105, "step": 17520 }, { "epoch": 0.516733008993071, "grad_norm": 3.159318200962883, "learning_rate": 7.381404896590125e-06, "loss": 1.5371, "step": 17525 }, { "epoch": 0.5168804363850803, "grad_norm": 3.066913739482479, "learning_rate": 7.381244345187196e-06, "loss": 1.5056, "step": 17530 }, { "epoch": 0.5170278637770898, "grad_norm": 3.2094736761751204, "learning_rate": 7.381083686930757e-06, "loss": 1.5362, "step": 17535 }, { "epoch": 0.5171752911690992, "grad_norm": 3.1859012527208166, "learning_rate": 7.380922921825535e-06, "loss": 1.5389, "step": 17540 }, { "epoch": 0.5173227185611087, "grad_norm": 2.9460238347800907, "learning_rate": 7.3807620498762585e-06, "loss": 1.536, "step": 17545 }, { "epoch": 0.5174701459531181, "grad_norm": 3.053222131142207, "learning_rate": 7.380601071087664e-06, "loss": 1.4891, "step": 17550 }, { "epoch": 0.5176175733451275, "grad_norm": 3.360843838429319, "learning_rate": 7.380439985464487e-06, "loss": 1.5294, "step": 17555 }, { "epoch": 0.5177650007371369, "grad_norm": 3.1415195865111643, "learning_rate": 7.380278793011467e-06, "loss": 1.5276, "step": 17560 }, { "epoch": 0.5179124281291464, "grad_norm": 3.1459310166089356, "learning_rate": 7.380117493733349e-06, "loss": 1.4964, "step": 17565 }, { "epoch": 0.5180598555211559, "grad_norm": 3.1870223850200894, "learning_rate": 7.379956087634879e-06, "loss": 1.5441, "step": 17570 }, { "epoch": 0.5182072829131653, "grad_norm": 2.9822923982434144, "learning_rate": 7.379794574720806e-06, "loss": 1.5062, "step": 17575 }, { "epoch": 0.5183547103051747, "grad_norm": 3.104910766463214, "learning_rate": 7.3796329549958816e-06, "loss": 1.482, "step": 17580 }, { "epoch": 0.5185021376971841, "grad_norm": 3.1320027387647893, "learning_rate": 7.379471228464864e-06, "loss": 1.5309, "step": 17585 }, { "epoch": 0.5186495650891936, "grad_norm": 3.1281870202887463, "learning_rate": 7.379309395132511e-06, "loss": 1.5201, "step": 17590 }, { "epoch": 0.518796992481203, "grad_norm": 3.2409823827752224, "learning_rate": 7.379147455003584e-06, "loss": 1.5039, "step": 17595 }, { "epoch": 0.5189444198732125, "grad_norm": 3.1338675009014336, "learning_rate": 7.37898540808285e-06, "loss": 1.5372, "step": 17600 }, { "epoch": 0.5190918472652218, "grad_norm": 3.248143440637519, "learning_rate": 7.378823254375076e-06, "loss": 1.5586, "step": 17605 }, { "epoch": 0.5192392746572313, "grad_norm": 2.9797537756949213, "learning_rate": 7.378660993885035e-06, "loss": 1.4864, "step": 17610 }, { "epoch": 0.5193867020492408, "grad_norm": 3.148278804213638, "learning_rate": 7.378498626617499e-06, "loss": 1.5079, "step": 17615 }, { "epoch": 0.5195341294412502, "grad_norm": 3.0172347847688825, "learning_rate": 7.378336152577249e-06, "loss": 1.4962, "step": 17620 }, { "epoch": 0.5196815568332597, "grad_norm": 3.1176661636994987, "learning_rate": 7.378173571769064e-06, "loss": 1.4921, "step": 17625 }, { "epoch": 0.519828984225269, "grad_norm": 3.1732194625215957, "learning_rate": 7.37801088419773e-06, "loss": 1.5195, "step": 17630 }, { "epoch": 0.5199764116172785, "grad_norm": 3.0602325243710014, "learning_rate": 7.377848089868032e-06, "loss": 1.5382, "step": 17635 }, { "epoch": 0.5201238390092879, "grad_norm": 3.058486753409065, "learning_rate": 7.3776851887847605e-06, "loss": 1.4955, "step": 17640 }, { "epoch": 0.5202712664012974, "grad_norm": 3.0227570918076787, "learning_rate": 7.377522180952711e-06, "loss": 1.4545, "step": 17645 }, { "epoch": 0.5204186937933067, "grad_norm": 3.0814776291484636, "learning_rate": 7.377359066376679e-06, "loss": 1.5047, "step": 17650 }, { "epoch": 0.5205661211853162, "grad_norm": 3.0849630592102244, "learning_rate": 7.3771958450614644e-06, "loss": 1.5496, "step": 17655 }, { "epoch": 0.5207135485773257, "grad_norm": 3.089386440692441, "learning_rate": 7.37703251701187e-06, "loss": 1.5456, "step": 17660 }, { "epoch": 0.5208609759693351, "grad_norm": 3.073799134103323, "learning_rate": 7.376869082232702e-06, "loss": 1.5311, "step": 17665 }, { "epoch": 0.5210084033613446, "grad_norm": 2.8617144371302246, "learning_rate": 7.376705540728771e-06, "loss": 1.4577, "step": 17670 }, { "epoch": 0.5211558307533539, "grad_norm": 3.129225654767523, "learning_rate": 7.376541892504887e-06, "loss": 1.4592, "step": 17675 }, { "epoch": 0.5213032581453634, "grad_norm": 3.2217841696203737, "learning_rate": 7.376378137565867e-06, "loss": 1.498, "step": 17680 }, { "epoch": 0.5214506855373728, "grad_norm": 3.07404139347228, "learning_rate": 7.3762142759165295e-06, "loss": 1.5027, "step": 17685 }, { "epoch": 0.5215981129293823, "grad_norm": 3.0844519399843024, "learning_rate": 7.376050307561696e-06, "loss": 1.4826, "step": 17690 }, { "epoch": 0.5217455403213918, "grad_norm": 3.4644983393021267, "learning_rate": 7.375886232506191e-06, "loss": 1.5292, "step": 17695 }, { "epoch": 0.5218929677134011, "grad_norm": 3.2891308671999453, "learning_rate": 7.375722050754846e-06, "loss": 1.5472, "step": 17700 }, { "epoch": 0.5220403951054106, "grad_norm": 3.291881257761151, "learning_rate": 7.3755577623124874e-06, "loss": 1.5575, "step": 17705 }, { "epoch": 0.52218782249742, "grad_norm": 3.2703967004083774, "learning_rate": 7.375393367183952e-06, "loss": 1.5079, "step": 17710 }, { "epoch": 0.5223352498894295, "grad_norm": 3.088312231938456, "learning_rate": 7.375228865374076e-06, "loss": 1.5405, "step": 17715 }, { "epoch": 0.5224826772814389, "grad_norm": 3.098600980955855, "learning_rate": 7.375064256887703e-06, "loss": 1.5169, "step": 17720 }, { "epoch": 0.5226301046734483, "grad_norm": 3.2406382967425027, "learning_rate": 7.374899541729673e-06, "loss": 1.5029, "step": 17725 }, { "epoch": 0.5227775320654577, "grad_norm": 3.055528721315165, "learning_rate": 7.374734719904836e-06, "loss": 1.5132, "step": 17730 }, { "epoch": 0.5229249594574672, "grad_norm": 3.080022787799531, "learning_rate": 7.37456979141804e-06, "loss": 1.5249, "step": 17735 }, { "epoch": 0.5230723868494767, "grad_norm": 3.1383850738306416, "learning_rate": 7.37440475627414e-06, "loss": 1.496, "step": 17740 }, { "epoch": 0.5232198142414861, "grad_norm": 3.0797541963500357, "learning_rate": 7.374239614477991e-06, "loss": 1.5316, "step": 17745 }, { "epoch": 0.5233672416334955, "grad_norm": 3.1773236140071943, "learning_rate": 7.374074366034453e-06, "loss": 1.5203, "step": 17750 }, { "epoch": 0.5235146690255049, "grad_norm": 3.2265614160400973, "learning_rate": 7.373909010948388e-06, "loss": 1.5181, "step": 17755 }, { "epoch": 0.5236620964175144, "grad_norm": 3.1918898204217108, "learning_rate": 7.373743549224663e-06, "loss": 1.5387, "step": 17760 }, { "epoch": 0.5238095238095238, "grad_norm": 3.1119400950200236, "learning_rate": 7.373577980868146e-06, "loss": 1.5246, "step": 17765 }, { "epoch": 0.5239569512015333, "grad_norm": 3.0538738507647745, "learning_rate": 7.373412305883709e-06, "loss": 1.5089, "step": 17770 }, { "epoch": 0.5241043785935426, "grad_norm": 2.9778455944614106, "learning_rate": 7.373246524276228e-06, "loss": 1.5219, "step": 17775 }, { "epoch": 0.5242518059855521, "grad_norm": 3.206020143224685, "learning_rate": 7.37308063605058e-06, "loss": 1.4522, "step": 17780 }, { "epoch": 0.5243992333775616, "grad_norm": 3.204197574574192, "learning_rate": 7.3729146412116475e-06, "loss": 1.4957, "step": 17785 }, { "epoch": 0.524546660769571, "grad_norm": 3.1306423731648585, "learning_rate": 7.372748539764314e-06, "loss": 1.5228, "step": 17790 }, { "epoch": 0.5246940881615805, "grad_norm": 3.2446420116387316, "learning_rate": 7.372582331713469e-06, "loss": 1.5408, "step": 17795 }, { "epoch": 0.5248415155535898, "grad_norm": 3.0491694494742387, "learning_rate": 7.372416017064002e-06, "loss": 1.5541, "step": 17800 }, { "epoch": 0.5249889429455993, "grad_norm": 3.0351389632549948, "learning_rate": 7.372249595820808e-06, "loss": 1.5175, "step": 17805 }, { "epoch": 0.5251363703376087, "grad_norm": 3.1090028970797885, "learning_rate": 7.372083067988782e-06, "loss": 1.5622, "step": 17810 }, { "epoch": 0.5252837977296182, "grad_norm": 2.9390892712310075, "learning_rate": 7.371916433572826e-06, "loss": 1.5195, "step": 17815 }, { "epoch": 0.5254312251216275, "grad_norm": 3.149424074193049, "learning_rate": 7.371749692577844e-06, "loss": 1.5308, "step": 17820 }, { "epoch": 0.525578652513637, "grad_norm": 3.0478343569699424, "learning_rate": 7.371582845008742e-06, "loss": 1.5317, "step": 17825 }, { "epoch": 0.5257260799056465, "grad_norm": 3.114265815546021, "learning_rate": 7.371415890870428e-06, "loss": 1.5335, "step": 17830 }, { "epoch": 0.5258735072976559, "grad_norm": 2.9982505094890355, "learning_rate": 7.371248830167817e-06, "loss": 1.4995, "step": 17835 }, { "epoch": 0.5260209346896654, "grad_norm": 3.1095649417070543, "learning_rate": 7.371081662905824e-06, "loss": 1.5033, "step": 17840 }, { "epoch": 0.5261683620816747, "grad_norm": 3.2414050921082946, "learning_rate": 7.370914389089368e-06, "loss": 1.5136, "step": 17845 }, { "epoch": 0.5263157894736842, "grad_norm": 3.0842090774921567, "learning_rate": 7.370747008723372e-06, "loss": 1.5355, "step": 17850 }, { "epoch": 0.5264632168656936, "grad_norm": 2.9951586360327926, "learning_rate": 7.370579521812761e-06, "loss": 1.5252, "step": 17855 }, { "epoch": 0.5266106442577031, "grad_norm": 2.9000025507716902, "learning_rate": 7.370411928362462e-06, "loss": 1.5004, "step": 17860 }, { "epoch": 0.5267580716497126, "grad_norm": 3.1905128297375085, "learning_rate": 7.37024422837741e-06, "loss": 1.4947, "step": 17865 }, { "epoch": 0.5269054990417219, "grad_norm": 3.034456044182269, "learning_rate": 7.370076421862536e-06, "loss": 1.5066, "step": 17870 }, { "epoch": 0.5270529264337314, "grad_norm": 3.0021876533312186, "learning_rate": 7.369908508822779e-06, "loss": 1.4894, "step": 17875 }, { "epoch": 0.5272003538257408, "grad_norm": 3.317295992392331, "learning_rate": 7.369740489263082e-06, "loss": 1.4979, "step": 17880 }, { "epoch": 0.5273477812177503, "grad_norm": 3.1607549499290584, "learning_rate": 7.369572363188387e-06, "loss": 1.4903, "step": 17885 }, { "epoch": 0.5274952086097597, "grad_norm": 3.1703930036067365, "learning_rate": 7.369404130603643e-06, "loss": 1.5326, "step": 17890 }, { "epoch": 0.5276426360017691, "grad_norm": 3.3509958846953096, "learning_rate": 7.369235791513799e-06, "loss": 1.5305, "step": 17895 }, { "epoch": 0.5277900633937785, "grad_norm": 3.14921017253208, "learning_rate": 7.369067345923809e-06, "loss": 1.5089, "step": 17900 }, { "epoch": 0.527937490785788, "grad_norm": 3.2096553701129222, "learning_rate": 7.36889879383863e-06, "loss": 1.5304, "step": 17905 }, { "epoch": 0.5280849181777975, "grad_norm": 3.2202185121065696, "learning_rate": 7.368730135263222e-06, "loss": 1.5305, "step": 17910 }, { "epoch": 0.5282323455698069, "grad_norm": 3.2566690442944193, "learning_rate": 7.368561370202548e-06, "loss": 1.5423, "step": 17915 }, { "epoch": 0.5283797729618163, "grad_norm": 3.1135259028272917, "learning_rate": 7.368392498661572e-06, "loss": 1.5213, "step": 17920 }, { "epoch": 0.5285272003538257, "grad_norm": 2.992706509829458, "learning_rate": 7.368223520645266e-06, "loss": 1.5292, "step": 17925 }, { "epoch": 0.5286746277458352, "grad_norm": 3.176578076634251, "learning_rate": 7.368054436158601e-06, "loss": 1.4968, "step": 17930 }, { "epoch": 0.5288220551378446, "grad_norm": 4.965523959081391, "learning_rate": 7.367885245206554e-06, "loss": 1.4806, "step": 17935 }, { "epoch": 0.5289694825298541, "grad_norm": 3.5091835758673495, "learning_rate": 7.367715947794101e-06, "loss": 1.506, "step": 17940 }, { "epoch": 0.5291169099218634, "grad_norm": 3.01525594186265, "learning_rate": 7.3675465439262264e-06, "loss": 1.478, "step": 17945 }, { "epoch": 0.5292643373138729, "grad_norm": 3.0716996159970797, "learning_rate": 7.367377033607913e-06, "loss": 1.5286, "step": 17950 }, { "epoch": 0.5294117647058824, "grad_norm": 3.2205118294352255, "learning_rate": 7.36720741684415e-06, "loss": 1.5023, "step": 17955 }, { "epoch": 0.5295591920978918, "grad_norm": 2.979826381819194, "learning_rate": 7.3670376936399285e-06, "loss": 1.525, "step": 17960 }, { "epoch": 0.5297066194899013, "grad_norm": 3.1716762314496316, "learning_rate": 7.366867864000242e-06, "loss": 1.5163, "step": 17965 }, { "epoch": 0.5298540468819106, "grad_norm": 3.119265213659283, "learning_rate": 7.366697927930089e-06, "loss": 1.5373, "step": 17970 }, { "epoch": 0.5300014742739201, "grad_norm": 3.120525965277424, "learning_rate": 7.366527885434469e-06, "loss": 1.4864, "step": 17975 }, { "epoch": 0.5301489016659295, "grad_norm": 2.89868458727986, "learning_rate": 7.366357736518388e-06, "loss": 1.5288, "step": 17980 }, { "epoch": 0.530296329057939, "grad_norm": 3.110872328086311, "learning_rate": 7.36618748118685e-06, "loss": 1.4677, "step": 17985 }, { "epoch": 0.5304437564499485, "grad_norm": 3.11136942686634, "learning_rate": 7.366017119444867e-06, "loss": 1.5358, "step": 17990 }, { "epoch": 0.5305911838419578, "grad_norm": 3.194325547496019, "learning_rate": 7.365846651297451e-06, "loss": 1.5652, "step": 17995 }, { "epoch": 0.5307386112339673, "grad_norm": 3.0460165236768977, "learning_rate": 7.365676076749617e-06, "loss": 1.5137, "step": 18000 }, { "epoch": 0.5307386112339673, "eval_loss": 1.1933282613754272, "eval_runtime": 4.3276, "eval_samples_per_second": 91.505, "eval_steps_per_second": 3.004, "step": 18000 }, { "epoch": 0.5308860386259767, "grad_norm": 2.991979114603994, "learning_rate": 7.365505395806387e-06, "loss": 1.5042, "step": 18005 }, { "epoch": 0.5310334660179862, "grad_norm": 3.1095968615287637, "learning_rate": 7.365334608472782e-06, "loss": 1.5781, "step": 18010 }, { "epoch": 0.5311808934099955, "grad_norm": 3.088814036275178, "learning_rate": 7.3651637147538285e-06, "loss": 1.4776, "step": 18015 }, { "epoch": 0.531328320802005, "grad_norm": 3.10922764145195, "learning_rate": 7.3649927146545534e-06, "loss": 1.4958, "step": 18020 }, { "epoch": 0.5314757481940144, "grad_norm": 3.1031622496768545, "learning_rate": 7.364821608179991e-06, "loss": 1.507, "step": 18025 }, { "epoch": 0.5316231755860239, "grad_norm": 3.059530569764907, "learning_rate": 7.364650395335174e-06, "loss": 1.4979, "step": 18030 }, { "epoch": 0.5317706029780334, "grad_norm": 3.121093517484379, "learning_rate": 7.364479076125143e-06, "loss": 1.4891, "step": 18035 }, { "epoch": 0.5319180303700427, "grad_norm": 3.0879075937905456, "learning_rate": 7.3643076505549366e-06, "loss": 1.5205, "step": 18040 }, { "epoch": 0.5320654577620522, "grad_norm": 3.042130143168522, "learning_rate": 7.364136118629601e-06, "loss": 1.4953, "step": 18045 }, { "epoch": 0.5322128851540616, "grad_norm": 2.9916645345486397, "learning_rate": 7.363964480354183e-06, "loss": 1.4952, "step": 18050 }, { "epoch": 0.5323603125460711, "grad_norm": 3.2287726577634683, "learning_rate": 7.363792735733733e-06, "loss": 1.5415, "step": 18055 }, { "epoch": 0.5325077399380805, "grad_norm": 3.1099153897842386, "learning_rate": 7.3636208847733056e-06, "loss": 1.5378, "step": 18060 }, { "epoch": 0.5326551673300899, "grad_norm": 3.2895265238457387, "learning_rate": 7.363448927477957e-06, "loss": 1.5091, "step": 18065 }, { "epoch": 0.5328025947220993, "grad_norm": 3.0262970419895865, "learning_rate": 7.3632768638527495e-06, "loss": 1.5074, "step": 18070 }, { "epoch": 0.5329500221141088, "grad_norm": 3.1425990644788713, "learning_rate": 7.363104693902742e-06, "loss": 1.5413, "step": 18075 }, { "epoch": 0.5330974495061183, "grad_norm": 3.1052597926052257, "learning_rate": 7.362932417633004e-06, "loss": 1.5345, "step": 18080 }, { "epoch": 0.5332448768981277, "grad_norm": 3.249425024471753, "learning_rate": 7.3627600350486045e-06, "loss": 1.5021, "step": 18085 }, { "epoch": 0.5333923042901371, "grad_norm": 3.1225285433504895, "learning_rate": 7.362587546154615e-06, "loss": 1.5075, "step": 18090 }, { "epoch": 0.5335397316821465, "grad_norm": 3.12444030245319, "learning_rate": 7.362414950956112e-06, "loss": 1.5525, "step": 18095 }, { "epoch": 0.533687159074156, "grad_norm": 3.1243305681344435, "learning_rate": 7.3622422494581735e-06, "loss": 1.53, "step": 18100 }, { "epoch": 0.5338345864661654, "grad_norm": 3.1026503762574547, "learning_rate": 7.362069441665884e-06, "loss": 1.5041, "step": 18105 }, { "epoch": 0.5339820138581749, "grad_norm": 3.1974808861405974, "learning_rate": 7.361896527584324e-06, "loss": 1.5182, "step": 18110 }, { "epoch": 0.5341294412501842, "grad_norm": 3.0474690665281914, "learning_rate": 7.361723507218587e-06, "loss": 1.5351, "step": 18115 }, { "epoch": 0.5342768686421937, "grad_norm": 3.111999805700497, "learning_rate": 7.36155038057376e-06, "loss": 1.5171, "step": 18120 }, { "epoch": 0.5344242960342032, "grad_norm": 3.157843866236543, "learning_rate": 7.361377147654939e-06, "loss": 1.4984, "step": 18125 }, { "epoch": 0.5345717234262126, "grad_norm": 3.283400151138952, "learning_rate": 7.361203808467223e-06, "loss": 1.5006, "step": 18130 }, { "epoch": 0.5347191508182221, "grad_norm": 3.1532704418336124, "learning_rate": 7.36103036301571e-06, "loss": 1.5226, "step": 18135 }, { "epoch": 0.5348665782102314, "grad_norm": 2.947190721056687, "learning_rate": 7.3608568113055056e-06, "loss": 1.5219, "step": 18140 }, { "epoch": 0.5350140056022409, "grad_norm": 2.9670500606539902, "learning_rate": 7.360683153341717e-06, "loss": 1.5373, "step": 18145 }, { "epoch": 0.5351614329942503, "grad_norm": 3.224212619321178, "learning_rate": 7.360509389129453e-06, "loss": 1.5467, "step": 18150 }, { "epoch": 0.5353088603862598, "grad_norm": 3.0328669737595173, "learning_rate": 7.360335518673827e-06, "loss": 1.522, "step": 18155 }, { "epoch": 0.5354562877782693, "grad_norm": 3.122709183559854, "learning_rate": 7.3601615419799566e-06, "loss": 1.4435, "step": 18160 }, { "epoch": 0.5356037151702786, "grad_norm": 3.135768117741401, "learning_rate": 7.35998745905296e-06, "loss": 1.505, "step": 18165 }, { "epoch": 0.5357511425622881, "grad_norm": 3.129842491754154, "learning_rate": 7.359813269897961e-06, "loss": 1.5488, "step": 18170 }, { "epoch": 0.5358985699542975, "grad_norm": 3.014302387370576, "learning_rate": 7.359638974520084e-06, "loss": 1.5033, "step": 18175 }, { "epoch": 0.536045997346307, "grad_norm": 3.2040071086978927, "learning_rate": 7.359464572924458e-06, "loss": 1.5218, "step": 18180 }, { "epoch": 0.5361934247383163, "grad_norm": 2.9262803373896036, "learning_rate": 7.359290065116215e-06, "loss": 1.5145, "step": 18185 }, { "epoch": 0.5363408521303258, "grad_norm": 3.46135295189097, "learning_rate": 7.359115451100492e-06, "loss": 1.5322, "step": 18190 }, { "epoch": 0.5364882795223352, "grad_norm": 3.08931013354838, "learning_rate": 7.3589407308824255e-06, "loss": 1.5003, "step": 18195 }, { "epoch": 0.5366357069143447, "grad_norm": 3.825251606239988, "learning_rate": 7.358765904467155e-06, "loss": 1.5289, "step": 18200 }, { "epoch": 0.5367831343063542, "grad_norm": 3.0065860841321803, "learning_rate": 7.35859097185983e-06, "loss": 1.5451, "step": 18205 }, { "epoch": 0.5369305616983635, "grad_norm": 3.2454973435079126, "learning_rate": 7.358415933065593e-06, "loss": 1.4653, "step": 18210 }, { "epoch": 0.537077989090373, "grad_norm": 3.3228407516219796, "learning_rate": 7.358240788089597e-06, "loss": 1.4654, "step": 18215 }, { "epoch": 0.5372254164823824, "grad_norm": 3.150034215259933, "learning_rate": 7.3580655369369966e-06, "loss": 1.4921, "step": 18220 }, { "epoch": 0.5373728438743919, "grad_norm": 3.238088438920577, "learning_rate": 7.3578901796129464e-06, "loss": 1.5236, "step": 18225 }, { "epoch": 0.5375202712664013, "grad_norm": 3.2334183434794133, "learning_rate": 7.357714716122608e-06, "loss": 1.5439, "step": 18230 }, { "epoch": 0.5376676986584107, "grad_norm": 3.1408726895609673, "learning_rate": 7.357539146471146e-06, "loss": 1.4948, "step": 18235 }, { "epoch": 0.5378151260504201, "grad_norm": 3.2347124196810007, "learning_rate": 7.357363470663725e-06, "loss": 1.4999, "step": 18240 }, { "epoch": 0.5379625534424296, "grad_norm": 3.1937548237234576, "learning_rate": 7.357187688705513e-06, "loss": 1.5392, "step": 18245 }, { "epoch": 0.5381099808344391, "grad_norm": 3.077001521330917, "learning_rate": 7.357011800601685e-06, "loss": 1.4671, "step": 18250 }, { "epoch": 0.5382574082264485, "grad_norm": 3.039024709674453, "learning_rate": 7.356835806357417e-06, "loss": 1.5253, "step": 18255 }, { "epoch": 0.5384048356184579, "grad_norm": 3.0240362091561734, "learning_rate": 7.356659705977886e-06, "loss": 1.4645, "step": 18260 }, { "epoch": 0.5385522630104673, "grad_norm": 3.1855671356040185, "learning_rate": 7.356483499468275e-06, "loss": 1.4747, "step": 18265 }, { "epoch": 0.5386996904024768, "grad_norm": 3.050314440853793, "learning_rate": 7.356307186833769e-06, "loss": 1.5203, "step": 18270 }, { "epoch": 0.5388471177944862, "grad_norm": 3.029613762065487, "learning_rate": 7.356130768079556e-06, "loss": 1.4917, "step": 18275 }, { "epoch": 0.5389945451864957, "grad_norm": 3.2426939477477905, "learning_rate": 7.355954243210827e-06, "loss": 1.5155, "step": 18280 }, { "epoch": 0.539141972578505, "grad_norm": 3.070775492677494, "learning_rate": 7.355777612232776e-06, "loss": 1.5061, "step": 18285 }, { "epoch": 0.5392893999705145, "grad_norm": 3.0691751126058717, "learning_rate": 7.355600875150603e-06, "loss": 1.5093, "step": 18290 }, { "epoch": 0.539436827362524, "grad_norm": 3.078580218554898, "learning_rate": 7.355424031969507e-06, "loss": 1.4923, "step": 18295 }, { "epoch": 0.5395842547545334, "grad_norm": 3.1218278133494657, "learning_rate": 7.3552470826946905e-06, "loss": 1.5351, "step": 18300 }, { "epoch": 0.5397316821465429, "grad_norm": 3.1122898484010184, "learning_rate": 7.3550700273313634e-06, "loss": 1.4801, "step": 18305 }, { "epoch": 0.5398791095385522, "grad_norm": 3.1567036005987363, "learning_rate": 7.354892865884734e-06, "loss": 1.4912, "step": 18310 }, { "epoch": 0.5400265369305617, "grad_norm": 3.1456290385146146, "learning_rate": 7.3547155983600145e-06, "loss": 1.5217, "step": 18315 }, { "epoch": 0.5401739643225711, "grad_norm": 3.095433373437663, "learning_rate": 7.354538224762423e-06, "loss": 1.4683, "step": 18320 }, { "epoch": 0.5403213917145806, "grad_norm": 2.9998474561562722, "learning_rate": 7.354360745097178e-06, "loss": 1.505, "step": 18325 }, { "epoch": 0.5404688191065901, "grad_norm": 2.9886444428732437, "learning_rate": 7.354183159369502e-06, "loss": 1.49, "step": 18330 }, { "epoch": 0.5406162464985994, "grad_norm": 3.2623935744123376, "learning_rate": 7.354005467584623e-06, "loss": 1.5598, "step": 18335 }, { "epoch": 0.5407636738906089, "grad_norm": 3.070734231521582, "learning_rate": 7.353827669747767e-06, "loss": 1.5397, "step": 18340 }, { "epoch": 0.5409111012826183, "grad_norm": 3.0352043078452415, "learning_rate": 7.353649765864165e-06, "loss": 1.5244, "step": 18345 }, { "epoch": 0.5410585286746278, "grad_norm": 3.0782515215511315, "learning_rate": 7.353471755939055e-06, "loss": 1.517, "step": 18350 }, { "epoch": 0.5412059560666371, "grad_norm": 3.0884264662951835, "learning_rate": 7.353293639977673e-06, "loss": 1.5292, "step": 18355 }, { "epoch": 0.5413533834586466, "grad_norm": 3.023463669527615, "learning_rate": 7.353115417985262e-06, "loss": 1.513, "step": 18360 }, { "epoch": 0.541500810850656, "grad_norm": 3.1380145432844646, "learning_rate": 7.352937089967065e-06, "loss": 1.5616, "step": 18365 }, { "epoch": 0.5416482382426655, "grad_norm": 3.1676244887471046, "learning_rate": 7.352758655928331e-06, "loss": 1.5751, "step": 18370 }, { "epoch": 0.541795665634675, "grad_norm": 3.115094909955631, "learning_rate": 7.352580115874309e-06, "loss": 1.5304, "step": 18375 }, { "epoch": 0.5419430930266843, "grad_norm": 3.163315865148134, "learning_rate": 7.3524014698102525e-06, "loss": 1.5139, "step": 18380 }, { "epoch": 0.5420905204186938, "grad_norm": 3.226467169270424, "learning_rate": 7.3522227177414205e-06, "loss": 1.5229, "step": 18385 }, { "epoch": 0.5422379478107032, "grad_norm": 3.1320476949931075, "learning_rate": 7.352043859673072e-06, "loss": 1.5649, "step": 18390 }, { "epoch": 0.5423853752027127, "grad_norm": 2.9586219469659643, "learning_rate": 7.351864895610468e-06, "loss": 1.4041, "step": 18395 }, { "epoch": 0.5425328025947221, "grad_norm": 3.125946849512396, "learning_rate": 7.351685825558878e-06, "loss": 1.497, "step": 18400 }, { "epoch": 0.5426802299867315, "grad_norm": 3.083372383271062, "learning_rate": 7.351506649523569e-06, "loss": 1.4973, "step": 18405 }, { "epoch": 0.542827657378741, "grad_norm": 3.1447305094503903, "learning_rate": 7.351327367509816e-06, "loss": 1.4902, "step": 18410 }, { "epoch": 0.5429750847707504, "grad_norm": 3.1833211548559515, "learning_rate": 7.3511479795228915e-06, "loss": 1.5457, "step": 18415 }, { "epoch": 0.5431225121627599, "grad_norm": 3.0977715669319545, "learning_rate": 7.350968485568076e-06, "loss": 1.4883, "step": 18420 }, { "epoch": 0.5432699395547693, "grad_norm": 3.0908892885026993, "learning_rate": 7.35078888565065e-06, "loss": 1.5463, "step": 18425 }, { "epoch": 0.5434173669467787, "grad_norm": 3.1156108274842564, "learning_rate": 7.3506091797759e-06, "loss": 1.5196, "step": 18430 }, { "epoch": 0.5435647943387881, "grad_norm": 2.986375015954005, "learning_rate": 7.350429367949113e-06, "loss": 1.4723, "step": 18435 }, { "epoch": 0.5437122217307976, "grad_norm": 3.0926874273731433, "learning_rate": 7.350249450175582e-06, "loss": 1.4707, "step": 18440 }, { "epoch": 0.543859649122807, "grad_norm": 2.797199885839684, "learning_rate": 7.350069426460599e-06, "loss": 1.4728, "step": 18445 }, { "epoch": 0.5440070765148165, "grad_norm": 2.9754727409633976, "learning_rate": 7.349889296809462e-06, "loss": 1.4748, "step": 18450 }, { "epoch": 0.5441545039068258, "grad_norm": 3.3280618991744353, "learning_rate": 7.349709061227473e-06, "loss": 1.5234, "step": 18455 }, { "epoch": 0.5443019312988353, "grad_norm": 2.9640504143750412, "learning_rate": 7.349528719719934e-06, "loss": 1.4848, "step": 18460 }, { "epoch": 0.5444493586908448, "grad_norm": 2.998689326755909, "learning_rate": 7.349348272292151e-06, "loss": 1.5253, "step": 18465 }, { "epoch": 0.5445967860828542, "grad_norm": 3.2524725609797644, "learning_rate": 7.3491677189494366e-06, "loss": 1.5235, "step": 18470 }, { "epoch": 0.5447442134748637, "grad_norm": 3.0609001995847542, "learning_rate": 7.3489870596971005e-06, "loss": 1.4849, "step": 18475 }, { "epoch": 0.544891640866873, "grad_norm": 3.12951335753661, "learning_rate": 7.348806294540463e-06, "loss": 1.5342, "step": 18480 }, { "epoch": 0.5450390682588825, "grad_norm": 3.074606106083639, "learning_rate": 7.348625423484838e-06, "loss": 1.525, "step": 18485 }, { "epoch": 0.545186495650892, "grad_norm": 3.1264445992282575, "learning_rate": 7.348444446535553e-06, "loss": 1.4723, "step": 18490 }, { "epoch": 0.5453339230429014, "grad_norm": 2.96153735020254, "learning_rate": 7.34826336369793e-06, "loss": 1.4487, "step": 18495 }, { "epoch": 0.5454813504349109, "grad_norm": 2.9099077208857795, "learning_rate": 7.3480821749773e-06, "loss": 1.5303, "step": 18500 }, { "epoch": 0.5454813504349109, "eval_loss": 1.1906399726867676, "eval_runtime": 4.178, "eval_samples_per_second": 94.782, "eval_steps_per_second": 3.112, "step": 18500 }, { "epoch": 0.5456287778269202, "grad_norm": 3.0105530215967673, "learning_rate": 7.3479008803789916e-06, "loss": 1.5211, "step": 18505 }, { "epoch": 0.5457762052189297, "grad_norm": 3.2649733786902195, "learning_rate": 7.347719479908342e-06, "loss": 1.533, "step": 18510 }, { "epoch": 0.5459236326109391, "grad_norm": 3.024874078309203, "learning_rate": 7.347537973570689e-06, "loss": 1.4559, "step": 18515 }, { "epoch": 0.5460710600029486, "grad_norm": 3.0718820726354146, "learning_rate": 7.347356361371373e-06, "loss": 1.5279, "step": 18520 }, { "epoch": 0.5462184873949579, "grad_norm": 3.000564810459031, "learning_rate": 7.3471746433157385e-06, "loss": 1.4977, "step": 18525 }, { "epoch": 0.5463659147869674, "grad_norm": 3.0279155215281386, "learning_rate": 7.346992819409133e-06, "loss": 1.5651, "step": 18530 }, { "epoch": 0.5465133421789768, "grad_norm": 2.9390687894813095, "learning_rate": 7.346810889656905e-06, "loss": 1.5407, "step": 18535 }, { "epoch": 0.5466607695709863, "grad_norm": 3.1688380482159344, "learning_rate": 7.346628854064411e-06, "loss": 1.5402, "step": 18540 }, { "epoch": 0.5468081969629958, "grad_norm": 3.116551317624438, "learning_rate": 7.346446712637005e-06, "loss": 1.5385, "step": 18545 }, { "epoch": 0.5469556243550051, "grad_norm": 3.116820063252823, "learning_rate": 7.346264465380047e-06, "loss": 1.5043, "step": 18550 }, { "epoch": 0.5471030517470146, "grad_norm": 3.0531165431614635, "learning_rate": 7.346082112298903e-06, "loss": 1.4771, "step": 18555 }, { "epoch": 0.547250479139024, "grad_norm": 3.267714549121449, "learning_rate": 7.345899653398935e-06, "loss": 1.5216, "step": 18560 }, { "epoch": 0.5473979065310335, "grad_norm": 3.135196060926279, "learning_rate": 7.345717088685514e-06, "loss": 1.5203, "step": 18565 }, { "epoch": 0.5475453339230429, "grad_norm": 3.1339197508854366, "learning_rate": 7.345534418164012e-06, "loss": 1.5236, "step": 18570 }, { "epoch": 0.5476927613150523, "grad_norm": 3.288937149146865, "learning_rate": 7.345351641839804e-06, "loss": 1.5556, "step": 18575 }, { "epoch": 0.5478401887070617, "grad_norm": 3.0761290816470095, "learning_rate": 7.345168759718268e-06, "loss": 1.4637, "step": 18580 }, { "epoch": 0.5479876160990712, "grad_norm": 2.9488503466590954, "learning_rate": 7.3449857718047865e-06, "loss": 1.5027, "step": 18585 }, { "epoch": 0.5481350434910807, "grad_norm": 2.930996522455437, "learning_rate": 7.3448026781047426e-06, "loss": 1.4683, "step": 18590 }, { "epoch": 0.5482824708830901, "grad_norm": 2.9356771190305984, "learning_rate": 7.344619478623525e-06, "loss": 1.4464, "step": 18595 }, { "epoch": 0.5484298982750995, "grad_norm": 3.0081466870382347, "learning_rate": 7.344436173366527e-06, "loss": 1.503, "step": 18600 }, { "epoch": 0.5485773256671089, "grad_norm": 3.094431890781267, "learning_rate": 7.344252762339138e-06, "loss": 1.5058, "step": 18605 }, { "epoch": 0.5487247530591184, "grad_norm": 3.0725823179586156, "learning_rate": 7.344069245546758e-06, "loss": 1.5416, "step": 18610 }, { "epoch": 0.5488721804511278, "grad_norm": 3.0418860459839254, "learning_rate": 7.343885622994786e-06, "loss": 1.4508, "step": 18615 }, { "epoch": 0.5490196078431373, "grad_norm": 2.8463975257487815, "learning_rate": 7.343701894688627e-06, "loss": 1.4823, "step": 18620 }, { "epoch": 0.5491670352351466, "grad_norm": 3.0165666586893405, "learning_rate": 7.343518060633685e-06, "loss": 1.4969, "step": 18625 }, { "epoch": 0.5493144626271561, "grad_norm": 2.999167423451099, "learning_rate": 7.343334120835371e-06, "loss": 1.479, "step": 18630 }, { "epoch": 0.5494618900191656, "grad_norm": 2.872323288624474, "learning_rate": 7.343150075299097e-06, "loss": 1.4799, "step": 18635 }, { "epoch": 0.549609317411175, "grad_norm": 2.949861202249689, "learning_rate": 7.342965924030279e-06, "loss": 1.4872, "step": 18640 }, { "epoch": 0.5497567448031845, "grad_norm": 3.183133994205378, "learning_rate": 7.342781667034336e-06, "loss": 1.4865, "step": 18645 }, { "epoch": 0.5499041721951938, "grad_norm": 2.9998787148348494, "learning_rate": 7.3425973043166906e-06, "loss": 1.5068, "step": 18650 }, { "epoch": 0.5500515995872033, "grad_norm": 3.0462543188834, "learning_rate": 7.342412835882767e-06, "loss": 1.5107, "step": 18655 }, { "epoch": 0.5501990269792127, "grad_norm": 3.4979733233848394, "learning_rate": 7.342228261737994e-06, "loss": 1.4873, "step": 18660 }, { "epoch": 0.5503464543712222, "grad_norm": 3.113204534765945, "learning_rate": 7.342043581887801e-06, "loss": 1.5281, "step": 18665 }, { "epoch": 0.5504938817632317, "grad_norm": 3.1277042433266478, "learning_rate": 7.341858796337625e-06, "loss": 1.5441, "step": 18670 }, { "epoch": 0.550641309155241, "grad_norm": 3.0425256145448345, "learning_rate": 7.341673905092903e-06, "loss": 1.4676, "step": 18675 }, { "epoch": 0.5507887365472505, "grad_norm": 3.0377825732018477, "learning_rate": 7.3414889081590736e-06, "loss": 1.5037, "step": 18680 }, { "epoch": 0.5509361639392599, "grad_norm": 3.1242830617156905, "learning_rate": 7.341303805541583e-06, "loss": 1.5324, "step": 18685 }, { "epoch": 0.5510835913312694, "grad_norm": 2.9835300814656582, "learning_rate": 7.341118597245877e-06, "loss": 1.4438, "step": 18690 }, { "epoch": 0.5512310187232787, "grad_norm": 2.9758540811873875, "learning_rate": 7.340933283277406e-06, "loss": 1.528, "step": 18695 }, { "epoch": 0.5513784461152882, "grad_norm": 2.9503284945723918, "learning_rate": 7.340747863641623e-06, "loss": 1.5027, "step": 18700 }, { "epoch": 0.5515258735072976, "grad_norm": 3.266394558782369, "learning_rate": 7.340562338343983e-06, "loss": 1.5724, "step": 18705 }, { "epoch": 0.5516733008993071, "grad_norm": 3.076073269900004, "learning_rate": 7.340376707389946e-06, "loss": 1.4913, "step": 18710 }, { "epoch": 0.5518207282913166, "grad_norm": 3.053406170658959, "learning_rate": 7.340190970784975e-06, "loss": 1.5287, "step": 18715 }, { "epoch": 0.5519681556833259, "grad_norm": 3.14772709333194, "learning_rate": 7.3400051285345345e-06, "loss": 1.5035, "step": 18720 }, { "epoch": 0.5521155830753354, "grad_norm": 3.020112831099842, "learning_rate": 7.339819180644094e-06, "loss": 1.4965, "step": 18725 }, { "epoch": 0.5522630104673448, "grad_norm": 3.136442489792933, "learning_rate": 7.339633127119126e-06, "loss": 1.5135, "step": 18730 }, { "epoch": 0.5524104378593543, "grad_norm": 3.146741974295531, "learning_rate": 7.339446967965104e-06, "loss": 1.4794, "step": 18735 }, { "epoch": 0.5525578652513637, "grad_norm": 3.2342942449739374, "learning_rate": 7.339260703187506e-06, "loss": 1.5139, "step": 18740 }, { "epoch": 0.5527052926433731, "grad_norm": 3.211090964773382, "learning_rate": 7.339074332791813e-06, "loss": 1.4889, "step": 18745 }, { "epoch": 0.5528527200353825, "grad_norm": 3.114885459904766, "learning_rate": 7.33888785678351e-06, "loss": 1.5251, "step": 18750 }, { "epoch": 0.553000147427392, "grad_norm": 3.2694284183531432, "learning_rate": 7.338701275168083e-06, "loss": 1.4622, "step": 18755 }, { "epoch": 0.5531475748194015, "grad_norm": 3.001783317996548, "learning_rate": 7.338514587951024e-06, "loss": 1.4813, "step": 18760 }, { "epoch": 0.5532950022114109, "grad_norm": 2.9714786559459445, "learning_rate": 7.338327795137825e-06, "loss": 1.4828, "step": 18765 }, { "epoch": 0.5534424296034203, "grad_norm": 2.9246064842008894, "learning_rate": 7.338140896733983e-06, "loss": 1.4913, "step": 18770 }, { "epoch": 0.5535898569954297, "grad_norm": 3.0165036381609767, "learning_rate": 7.3379538927449976e-06, "loss": 1.4951, "step": 18775 }, { "epoch": 0.5537372843874392, "grad_norm": 3.405465397697131, "learning_rate": 7.337766783176372e-06, "loss": 1.5398, "step": 18780 }, { "epoch": 0.5538847117794486, "grad_norm": 2.9805493630292133, "learning_rate": 7.337579568033613e-06, "loss": 1.4918, "step": 18785 }, { "epoch": 0.5540321391714581, "grad_norm": 3.1298463931718956, "learning_rate": 7.337392247322228e-06, "loss": 1.5511, "step": 18790 }, { "epoch": 0.5541795665634675, "grad_norm": 3.030398324790543, "learning_rate": 7.3372048210477306e-06, "loss": 1.4757, "step": 18795 }, { "epoch": 0.5543269939554769, "grad_norm": 3.1110253448727154, "learning_rate": 7.337017289215634e-06, "loss": 1.5409, "step": 18800 }, { "epoch": 0.5544744213474864, "grad_norm": 3.0195125294790808, "learning_rate": 7.336829651831458e-06, "loss": 1.4995, "step": 18805 }, { "epoch": 0.5546218487394958, "grad_norm": 3.1909304374264775, "learning_rate": 7.336641908900724e-06, "loss": 1.5682, "step": 18810 }, { "epoch": 0.5547692761315053, "grad_norm": 2.998928126692843, "learning_rate": 7.336454060428956e-06, "loss": 1.4964, "step": 18815 }, { "epoch": 0.5549167035235146, "grad_norm": 3.37326758314841, "learning_rate": 7.336266106421682e-06, "loss": 1.5309, "step": 18820 }, { "epoch": 0.5550641309155241, "grad_norm": 3.19816979549206, "learning_rate": 7.336078046884434e-06, "loss": 1.4905, "step": 18825 }, { "epoch": 0.5552115583075335, "grad_norm": 3.1233962789721943, "learning_rate": 7.335889881822743e-06, "loss": 1.5319, "step": 18830 }, { "epoch": 0.555358985699543, "grad_norm": 2.9978857936277024, "learning_rate": 7.335701611242148e-06, "loss": 1.5461, "step": 18835 }, { "epoch": 0.5555064130915525, "grad_norm": 3.0364784193370267, "learning_rate": 7.335513235148189e-06, "loss": 1.4872, "step": 18840 }, { "epoch": 0.5556538404835618, "grad_norm": 3.0076444874792005, "learning_rate": 7.335324753546408e-06, "loss": 1.5264, "step": 18845 }, { "epoch": 0.5558012678755713, "grad_norm": 3.0478446354540054, "learning_rate": 7.335136166442353e-06, "loss": 1.4619, "step": 18850 }, { "epoch": 0.5559486952675807, "grad_norm": 3.033329011499697, "learning_rate": 7.3349474738415726e-06, "loss": 1.5073, "step": 18855 }, { "epoch": 0.5560961226595902, "grad_norm": 3.2413584214338487, "learning_rate": 7.334758675749619e-06, "loss": 1.484, "step": 18860 }, { "epoch": 0.5562435500515995, "grad_norm": 3.069210809911421, "learning_rate": 7.334569772172047e-06, "loss": 1.466, "step": 18865 }, { "epoch": 0.556390977443609, "grad_norm": 3.5782185019082546, "learning_rate": 7.334380763114417e-06, "loss": 1.4608, "step": 18870 }, { "epoch": 0.5565384048356184, "grad_norm": 3.1968373985315006, "learning_rate": 7.3341916485822905e-06, "loss": 1.5652, "step": 18875 }, { "epoch": 0.5566858322276279, "grad_norm": 3.0207447655891735, "learning_rate": 7.334002428581231e-06, "loss": 1.499, "step": 18880 }, { "epoch": 0.5568332596196374, "grad_norm": 3.0100852162985494, "learning_rate": 7.333813103116809e-06, "loss": 1.5053, "step": 18885 }, { "epoch": 0.5569806870116467, "grad_norm": 2.9746005427399345, "learning_rate": 7.333623672194594e-06, "loss": 1.4619, "step": 18890 }, { "epoch": 0.5571281144036562, "grad_norm": 3.2005688836019544, "learning_rate": 7.333434135820159e-06, "loss": 1.5105, "step": 18895 }, { "epoch": 0.5572755417956656, "grad_norm": 2.9921644784378536, "learning_rate": 7.333244493999084e-06, "loss": 1.4746, "step": 18900 }, { "epoch": 0.5574229691876751, "grad_norm": 2.997292107569085, "learning_rate": 7.3330547467369476e-06, "loss": 1.5396, "step": 18905 }, { "epoch": 0.5575703965796845, "grad_norm": 3.5998186306825377, "learning_rate": 7.332864894039333e-06, "loss": 1.5017, "step": 18910 }, { "epoch": 0.5577178239716939, "grad_norm": 2.972469458740989, "learning_rate": 7.332674935911829e-06, "loss": 1.5314, "step": 18915 }, { "epoch": 0.5578652513637034, "grad_norm": 3.171187758422756, "learning_rate": 7.332484872360024e-06, "loss": 1.5035, "step": 18920 }, { "epoch": 0.5580126787557128, "grad_norm": 3.0195661425831535, "learning_rate": 7.332294703389511e-06, "loss": 1.5044, "step": 18925 }, { "epoch": 0.5581601061477223, "grad_norm": 3.0547253790096502, "learning_rate": 7.332104429005885e-06, "loss": 1.5155, "step": 18930 }, { "epoch": 0.5583075335397317, "grad_norm": 3.0463705972402555, "learning_rate": 7.331914049214745e-06, "loss": 1.4941, "step": 18935 }, { "epoch": 0.5584549609317411, "grad_norm": 3.2499427648481873, "learning_rate": 7.331723564021696e-06, "loss": 1.475, "step": 18940 }, { "epoch": 0.5586023883237505, "grad_norm": 3.064079935436254, "learning_rate": 7.33153297343234e-06, "loss": 1.4872, "step": 18945 }, { "epoch": 0.55874981571576, "grad_norm": 3.0626634738582355, "learning_rate": 7.331342277452286e-06, "loss": 1.5521, "step": 18950 }, { "epoch": 0.5588972431077694, "grad_norm": 3.1424780585061334, "learning_rate": 7.331151476087147e-06, "loss": 1.516, "step": 18955 }, { "epoch": 0.5590446704997789, "grad_norm": 3.3544491136199337, "learning_rate": 7.330960569342537e-06, "loss": 1.5298, "step": 18960 }, { "epoch": 0.5591920978917883, "grad_norm": 3.005694702381593, "learning_rate": 7.330769557224072e-06, "loss": 1.5257, "step": 18965 }, { "epoch": 0.5593395252837977, "grad_norm": 3.1642892513622978, "learning_rate": 7.330578439737376e-06, "loss": 1.5179, "step": 18970 }, { "epoch": 0.5594869526758072, "grad_norm": 2.921111728132792, "learning_rate": 7.3303872168880695e-06, "loss": 1.488, "step": 18975 }, { "epoch": 0.5596343800678166, "grad_norm": 3.198814760316585, "learning_rate": 7.33019588868178e-06, "loss": 1.5082, "step": 18980 }, { "epoch": 0.5597818074598261, "grad_norm": 3.1078116532196436, "learning_rate": 7.330004455124139e-06, "loss": 1.4837, "step": 18985 }, { "epoch": 0.5599292348518354, "grad_norm": 3.1053636525096797, "learning_rate": 7.3298129162207795e-06, "loss": 1.5502, "step": 18990 }, { "epoch": 0.5600766622438449, "grad_norm": 3.1563211860422817, "learning_rate": 7.329621271977337e-06, "loss": 1.5357, "step": 18995 }, { "epoch": 0.5602240896358543, "grad_norm": 3.094677956457123, "learning_rate": 7.32942952239945e-06, "loss": 1.5045, "step": 19000 }, { "epoch": 0.5602240896358543, "eval_loss": 1.1881108283996582, "eval_runtime": 4.3134, "eval_samples_per_second": 91.808, "eval_steps_per_second": 3.014, "step": 19000 }, { "epoch": 0.5603715170278638, "grad_norm": 3.190903730025907, "learning_rate": 7.3292376674927635e-06, "loss": 1.5398, "step": 19005 }, { "epoch": 0.5605189444198733, "grad_norm": 3.0690403967935955, "learning_rate": 7.329045707262921e-06, "loss": 1.5388, "step": 19010 }, { "epoch": 0.5606663718118826, "grad_norm": 3.0970183920195056, "learning_rate": 7.328853641715572e-06, "loss": 1.5005, "step": 19015 }, { "epoch": 0.5608137992038921, "grad_norm": 3.023559024028209, "learning_rate": 7.328661470856367e-06, "loss": 1.5201, "step": 19020 }, { "epoch": 0.5609612265959015, "grad_norm": 2.991925901453646, "learning_rate": 7.328469194690963e-06, "loss": 1.5096, "step": 19025 }, { "epoch": 0.561108653987911, "grad_norm": 2.993043303504755, "learning_rate": 7.328276813225016e-06, "loss": 1.4517, "step": 19030 }, { "epoch": 0.5612560813799203, "grad_norm": 2.9483327249786617, "learning_rate": 7.328084326464188e-06, "loss": 1.5183, "step": 19035 }, { "epoch": 0.5614035087719298, "grad_norm": 3.0611375015018547, "learning_rate": 7.327891734414143e-06, "loss": 1.5067, "step": 19040 }, { "epoch": 0.5615509361639393, "grad_norm": 3.255952018576054, "learning_rate": 7.327699037080548e-06, "loss": 1.4987, "step": 19045 }, { "epoch": 0.5616983635559487, "grad_norm": 2.8738612467099967, "learning_rate": 7.327506234469073e-06, "loss": 1.451, "step": 19050 }, { "epoch": 0.5618457909479582, "grad_norm": 3.1541417944597345, "learning_rate": 7.327313326585393e-06, "loss": 1.5353, "step": 19055 }, { "epoch": 0.5619932183399675, "grad_norm": 3.0726318423700554, "learning_rate": 7.3271203134351835e-06, "loss": 1.4538, "step": 19060 }, { "epoch": 0.562140645731977, "grad_norm": 3.204912948914235, "learning_rate": 7.326927195024125e-06, "loss": 1.565, "step": 19065 }, { "epoch": 0.5622880731239864, "grad_norm": 3.0110193846746682, "learning_rate": 7.326733971357897e-06, "loss": 1.5137, "step": 19070 }, { "epoch": 0.5624355005159959, "grad_norm": 3.1233008182712774, "learning_rate": 7.326540642442189e-06, "loss": 1.5125, "step": 19075 }, { "epoch": 0.5625829279080053, "grad_norm": 4.182043415377705, "learning_rate": 7.3263472082826886e-06, "loss": 1.5651, "step": 19080 }, { "epoch": 0.5627303553000147, "grad_norm": 3.0943553768405696, "learning_rate": 7.326153668885088e-06, "loss": 1.4835, "step": 19085 }, { "epoch": 0.5628777826920242, "grad_norm": 3.2067464899608993, "learning_rate": 7.325960024255082e-06, "loss": 1.5063, "step": 19090 }, { "epoch": 0.5630252100840336, "grad_norm": 3.058451252158006, "learning_rate": 7.3257662743983686e-06, "loss": 1.496, "step": 19095 }, { "epoch": 0.5631726374760431, "grad_norm": 3.0657663549598078, "learning_rate": 7.32557241932065e-06, "loss": 1.5445, "step": 19100 }, { "epoch": 0.5633200648680525, "grad_norm": 2.980041808599942, "learning_rate": 7.3253784590276295e-06, "loss": 1.5281, "step": 19105 }, { "epoch": 0.5634674922600619, "grad_norm": 2.9001631213757957, "learning_rate": 7.325184393525015e-06, "loss": 1.4803, "step": 19110 }, { "epoch": 0.5636149196520713, "grad_norm": 2.865191743499621, "learning_rate": 7.324990222818518e-06, "loss": 1.4949, "step": 19115 }, { "epoch": 0.5637623470440808, "grad_norm": 2.8698673340040424, "learning_rate": 7.324795946913851e-06, "loss": 1.4633, "step": 19120 }, { "epoch": 0.5639097744360902, "grad_norm": 3.1364886538876116, "learning_rate": 7.324601565816732e-06, "loss": 1.4896, "step": 19125 }, { "epoch": 0.5640572018280997, "grad_norm": 2.94343282016879, "learning_rate": 7.32440707953288e-06, "loss": 1.4707, "step": 19130 }, { "epoch": 0.564204629220109, "grad_norm": 3.149689488181847, "learning_rate": 7.324212488068017e-06, "loss": 1.5033, "step": 19135 }, { "epoch": 0.5643520566121185, "grad_norm": 2.9805588146422517, "learning_rate": 7.3240177914278714e-06, "loss": 1.4904, "step": 19140 }, { "epoch": 0.564499484004128, "grad_norm": 3.1691677580433706, "learning_rate": 7.323822989618171e-06, "loss": 1.4567, "step": 19145 }, { "epoch": 0.5646469113961374, "grad_norm": 3.0867649204916976, "learning_rate": 7.3236280826446485e-06, "loss": 1.4397, "step": 19150 }, { "epoch": 0.5647943387881469, "grad_norm": 3.0050756780462295, "learning_rate": 7.3234330705130385e-06, "loss": 1.4939, "step": 19155 }, { "epoch": 0.5649417661801562, "grad_norm": 3.0444661480782242, "learning_rate": 7.32323795322908e-06, "loss": 1.5065, "step": 19160 }, { "epoch": 0.5650891935721657, "grad_norm": 3.1982372720189813, "learning_rate": 7.323042730798514e-06, "loss": 1.5174, "step": 19165 }, { "epoch": 0.5652366209641752, "grad_norm": 3.0067692017945356, "learning_rate": 7.322847403227087e-06, "loss": 1.5491, "step": 19170 }, { "epoch": 0.5653840483561846, "grad_norm": 3.1072698321788224, "learning_rate": 7.322651970520544e-06, "loss": 1.5339, "step": 19175 }, { "epoch": 0.5655314757481941, "grad_norm": 3.004752323297966, "learning_rate": 7.322456432684637e-06, "loss": 1.4722, "step": 19180 }, { "epoch": 0.5656789031402034, "grad_norm": 2.9678633415363467, "learning_rate": 7.322260789725121e-06, "loss": 1.5592, "step": 19185 }, { "epoch": 0.5658263305322129, "grad_norm": 3.0689225962670212, "learning_rate": 7.3220650416477535e-06, "loss": 1.4706, "step": 19190 }, { "epoch": 0.5659737579242223, "grad_norm": 3.255860237060989, "learning_rate": 7.321869188458291e-06, "loss": 1.4935, "step": 19195 }, { "epoch": 0.5661211853162318, "grad_norm": 3.1074902385342082, "learning_rate": 7.321673230162501e-06, "loss": 1.5117, "step": 19200 }, { "epoch": 0.5662686127082411, "grad_norm": 3.0765166716620986, "learning_rate": 7.321477166766146e-06, "loss": 1.5452, "step": 19205 }, { "epoch": 0.5664160401002506, "grad_norm": 3.0162246520405094, "learning_rate": 7.321280998274998e-06, "loss": 1.462, "step": 19210 }, { "epoch": 0.56656346749226, "grad_norm": 3.119033319323887, "learning_rate": 7.321084724694829e-06, "loss": 1.4766, "step": 19215 }, { "epoch": 0.5667108948842695, "grad_norm": 2.9934654410450463, "learning_rate": 7.320888346031414e-06, "loss": 1.4685, "step": 19220 }, { "epoch": 0.566858322276279, "grad_norm": 2.9756596483987265, "learning_rate": 7.320691862290532e-06, "loss": 1.5399, "step": 19225 }, { "epoch": 0.5670057496682883, "grad_norm": 3.1682734630540716, "learning_rate": 7.3204952734779635e-06, "loss": 1.5068, "step": 19230 }, { "epoch": 0.5671531770602978, "grad_norm": 3.1091002659767275, "learning_rate": 7.320298579599494e-06, "loss": 1.5391, "step": 19235 }, { "epoch": 0.5673006044523072, "grad_norm": 3.0705334697375064, "learning_rate": 7.320101780660914e-06, "loss": 1.5175, "step": 19240 }, { "epoch": 0.5674480318443167, "grad_norm": 2.988223368955246, "learning_rate": 7.319904876668012e-06, "loss": 1.4772, "step": 19245 }, { "epoch": 0.5675954592363261, "grad_norm": 2.9394670199692055, "learning_rate": 7.3197078676265836e-06, "loss": 1.4857, "step": 19250 }, { "epoch": 0.5677428866283355, "grad_norm": 3.137321840883297, "learning_rate": 7.319510753542423e-06, "loss": 1.5077, "step": 19255 }, { "epoch": 0.567890314020345, "grad_norm": 3.2061671087433674, "learning_rate": 7.319313534421333e-06, "loss": 1.5214, "step": 19260 }, { "epoch": 0.5680377414123544, "grad_norm": 3.0840102617094165, "learning_rate": 7.319116210269117e-06, "loss": 1.497, "step": 19265 }, { "epoch": 0.5681851688043639, "grad_norm": 2.9508322682891657, "learning_rate": 7.318918781091581e-06, "loss": 1.4927, "step": 19270 }, { "epoch": 0.5683325961963733, "grad_norm": 3.06500313707704, "learning_rate": 7.318721246894534e-06, "loss": 1.5206, "step": 19275 }, { "epoch": 0.5684800235883827, "grad_norm": 3.065472378171837, "learning_rate": 7.3185236076837895e-06, "loss": 1.5462, "step": 19280 }, { "epoch": 0.5686274509803921, "grad_norm": 3.242186375387901, "learning_rate": 7.318325863465164e-06, "loss": 1.5083, "step": 19285 }, { "epoch": 0.5687748783724016, "grad_norm": 3.069824009443588, "learning_rate": 7.318128014244474e-06, "loss": 1.5195, "step": 19290 }, { "epoch": 0.568922305764411, "grad_norm": 2.9697616363610595, "learning_rate": 7.317930060027543e-06, "loss": 1.4769, "step": 19295 }, { "epoch": 0.5690697331564205, "grad_norm": 3.911664037244184, "learning_rate": 7.317732000820197e-06, "loss": 1.5204, "step": 19300 }, { "epoch": 0.5692171605484299, "grad_norm": 3.1372903224263893, "learning_rate": 7.3175338366282614e-06, "loss": 1.556, "step": 19305 }, { "epoch": 0.5693645879404393, "grad_norm": 2.8922610048587685, "learning_rate": 7.31733556745757e-06, "loss": 1.518, "step": 19310 }, { "epoch": 0.5695120153324488, "grad_norm": 3.031455395637627, "learning_rate": 7.317137193313956e-06, "loss": 1.4781, "step": 19315 }, { "epoch": 0.5696594427244582, "grad_norm": 3.0882550648202747, "learning_rate": 7.316938714203256e-06, "loss": 1.5365, "step": 19320 }, { "epoch": 0.5698068701164677, "grad_norm": 3.0644444188137707, "learning_rate": 7.316740130131312e-06, "loss": 1.5489, "step": 19325 }, { "epoch": 0.569954297508477, "grad_norm": 2.9747401647454446, "learning_rate": 7.316541441103967e-06, "loss": 1.4812, "step": 19330 }, { "epoch": 0.5701017249004865, "grad_norm": 3.2424720897244836, "learning_rate": 7.316342647127068e-06, "loss": 1.5187, "step": 19335 }, { "epoch": 0.570249152292496, "grad_norm": 3.0196585140345884, "learning_rate": 7.316143748206463e-06, "loss": 1.5483, "step": 19340 }, { "epoch": 0.5703965796845054, "grad_norm": 3.2180265681764264, "learning_rate": 7.315944744348006e-06, "loss": 1.5377, "step": 19345 }, { "epoch": 0.5705440070765149, "grad_norm": 3.001056689683955, "learning_rate": 7.315745635557555e-06, "loss": 1.511, "step": 19350 }, { "epoch": 0.5706914344685242, "grad_norm": 3.035585408855937, "learning_rate": 7.315546421840965e-06, "loss": 1.4898, "step": 19355 }, { "epoch": 0.5708388618605337, "grad_norm": 3.0593044334694355, "learning_rate": 7.3153471032041014e-06, "loss": 1.4948, "step": 19360 }, { "epoch": 0.5709862892525431, "grad_norm": 2.9791611891301457, "learning_rate": 7.315147679652827e-06, "loss": 1.5184, "step": 19365 }, { "epoch": 0.5711337166445526, "grad_norm": 3.149461973264324, "learning_rate": 7.314948151193011e-06, "loss": 1.5188, "step": 19370 }, { "epoch": 0.5712811440365619, "grad_norm": 3.2725636878855116, "learning_rate": 7.314748517830527e-06, "loss": 1.5599, "step": 19375 }, { "epoch": 0.5714285714285714, "grad_norm": 3.0787641314542027, "learning_rate": 7.314548779571245e-06, "loss": 1.5175, "step": 19380 }, { "epoch": 0.5715759988205809, "grad_norm": 3.2175519732581996, "learning_rate": 7.314348936421047e-06, "loss": 1.481, "step": 19385 }, { "epoch": 0.5717234262125903, "grad_norm": 3.0680890371684892, "learning_rate": 7.314148988385809e-06, "loss": 1.505, "step": 19390 }, { "epoch": 0.5718708536045998, "grad_norm": 3.1112464728662164, "learning_rate": 7.3139489354714184e-06, "loss": 1.545, "step": 19395 }, { "epoch": 0.5720182809966091, "grad_norm": 2.93121561222197, "learning_rate": 7.313748777683761e-06, "loss": 1.5173, "step": 19400 }, { "epoch": 0.5721657083886186, "grad_norm": 2.963973081369102, "learning_rate": 7.3135485150287266e-06, "loss": 1.4854, "step": 19405 }, { "epoch": 0.572313135780628, "grad_norm": 3.014539928800291, "learning_rate": 7.313348147512209e-06, "loss": 1.4925, "step": 19410 }, { "epoch": 0.5724605631726375, "grad_norm": 2.904244592561242, "learning_rate": 7.313147675140101e-06, "loss": 1.523, "step": 19415 }, { "epoch": 0.572607990564647, "grad_norm": 3.048998509660313, "learning_rate": 7.312947097918304e-06, "loss": 1.5382, "step": 19420 }, { "epoch": 0.5727554179566563, "grad_norm": 3.0418051550650858, "learning_rate": 7.312746415852722e-06, "loss": 1.5443, "step": 19425 }, { "epoch": 0.5729028453486658, "grad_norm": 3.10129730014246, "learning_rate": 7.312545628949257e-06, "loss": 1.5007, "step": 19430 }, { "epoch": 0.5730502727406752, "grad_norm": 3.1944373033669486, "learning_rate": 7.312344737213819e-06, "loss": 1.4989, "step": 19435 }, { "epoch": 0.5731977001326847, "grad_norm": 3.25504881403889, "learning_rate": 7.31214374065232e-06, "loss": 1.55, "step": 19440 }, { "epoch": 0.5733451275246941, "grad_norm": 2.963856161559179, "learning_rate": 7.311942639270675e-06, "loss": 1.5679, "step": 19445 }, { "epoch": 0.5734925549167035, "grad_norm": 3.0315525708725723, "learning_rate": 7.311741433074799e-06, "loss": 1.5021, "step": 19450 }, { "epoch": 0.5736399823087129, "grad_norm": 2.958355770354892, "learning_rate": 7.311540122070614e-06, "loss": 1.5084, "step": 19455 }, { "epoch": 0.5737874097007224, "grad_norm": 3.0210669099210485, "learning_rate": 7.311338706264044e-06, "loss": 1.5124, "step": 19460 }, { "epoch": 0.5739348370927319, "grad_norm": 2.8965939217266263, "learning_rate": 7.311137185661017e-06, "loss": 1.465, "step": 19465 }, { "epoch": 0.5740822644847413, "grad_norm": 2.9291906302040163, "learning_rate": 7.310935560267461e-06, "loss": 1.4857, "step": 19470 }, { "epoch": 0.5742296918767507, "grad_norm": 3.004327919114619, "learning_rate": 7.3107338300893106e-06, "loss": 1.4832, "step": 19475 }, { "epoch": 0.5743771192687601, "grad_norm": 3.151435531691103, "learning_rate": 7.310531995132501e-06, "loss": 1.5138, "step": 19480 }, { "epoch": 0.5745245466607696, "grad_norm": 2.9163192046584707, "learning_rate": 7.310330055402971e-06, "loss": 1.4855, "step": 19485 }, { "epoch": 0.574671974052779, "grad_norm": 3.0822835128573254, "learning_rate": 7.310128010906665e-06, "loss": 1.4807, "step": 19490 }, { "epoch": 0.5748194014447885, "grad_norm": 2.9383982001116133, "learning_rate": 7.309925861649527e-06, "loss": 1.5183, "step": 19495 }, { "epoch": 0.5749668288367978, "grad_norm": 3.146907165832925, "learning_rate": 7.309723607637505e-06, "loss": 1.4674, "step": 19500 }, { "epoch": 0.5749668288367978, "eval_loss": 1.1853991746902466, "eval_runtime": 4.1899, "eval_samples_per_second": 94.514, "eval_steps_per_second": 3.103, "step": 19500 }, { "epoch": 0.5751142562288073, "grad_norm": 2.9447184836711693, "learning_rate": 7.309521248876551e-06, "loss": 1.4961, "step": 19505 }, { "epoch": 0.5752616836208168, "grad_norm": 3.062702753213008, "learning_rate": 7.309318785372619e-06, "loss": 1.5745, "step": 19510 }, { "epoch": 0.5754091110128262, "grad_norm": 3.0116501342129127, "learning_rate": 7.309116217131669e-06, "loss": 1.4849, "step": 19515 }, { "epoch": 0.5755565384048357, "grad_norm": 3.0288171218620317, "learning_rate": 7.308913544159659e-06, "loss": 1.5554, "step": 19520 }, { "epoch": 0.575703965796845, "grad_norm": 2.9726807524976953, "learning_rate": 7.308710766462554e-06, "loss": 1.4832, "step": 19525 }, { "epoch": 0.5758513931888545, "grad_norm": 3.156806668148496, "learning_rate": 7.308507884046322e-06, "loss": 1.5057, "step": 19530 }, { "epoch": 0.5759988205808639, "grad_norm": 3.1811740754271307, "learning_rate": 7.308304896916932e-06, "loss": 1.513, "step": 19535 }, { "epoch": 0.5761462479728734, "grad_norm": 3.1609749405972307, "learning_rate": 7.308101805080357e-06, "loss": 1.4877, "step": 19540 }, { "epoch": 0.5762936753648827, "grad_norm": 3.0385784984646094, "learning_rate": 7.307898608542573e-06, "loss": 1.5076, "step": 19545 }, { "epoch": 0.5764411027568922, "grad_norm": 3.0773707974343907, "learning_rate": 7.3076953073095604e-06, "loss": 1.493, "step": 19550 }, { "epoch": 0.5765885301489017, "grad_norm": 3.069060543970138, "learning_rate": 7.307491901387301e-06, "loss": 1.4902, "step": 19555 }, { "epoch": 0.5767359575409111, "grad_norm": 2.99121406193704, "learning_rate": 7.30728839078178e-06, "loss": 1.4942, "step": 19560 }, { "epoch": 0.5768833849329206, "grad_norm": 2.984681260971109, "learning_rate": 7.307084775498986e-06, "loss": 1.5103, "step": 19565 }, { "epoch": 0.5770308123249299, "grad_norm": 3.0219145498474833, "learning_rate": 7.306881055544911e-06, "loss": 1.5144, "step": 19570 }, { "epoch": 0.5771782397169394, "grad_norm": 3.0345472830105127, "learning_rate": 7.306677230925548e-06, "loss": 1.5023, "step": 19575 }, { "epoch": 0.5773256671089488, "grad_norm": 3.057150692931577, "learning_rate": 7.306473301646898e-06, "loss": 1.462, "step": 19580 }, { "epoch": 0.5774730945009583, "grad_norm": 3.1997924457952647, "learning_rate": 7.30626926771496e-06, "loss": 1.5073, "step": 19585 }, { "epoch": 0.5776205218929678, "grad_norm": 3.1392100036224537, "learning_rate": 7.3060651291357364e-06, "loss": 1.5648, "step": 19590 }, { "epoch": 0.5777679492849771, "grad_norm": 3.145119253073656, "learning_rate": 7.305860885915236e-06, "loss": 1.5066, "step": 19595 }, { "epoch": 0.5779153766769866, "grad_norm": 2.9186353009300685, "learning_rate": 7.30565653805947e-06, "loss": 1.5181, "step": 19600 }, { "epoch": 0.578062804068996, "grad_norm": 3.0363163018393866, "learning_rate": 7.30545208557445e-06, "loss": 1.5162, "step": 19605 }, { "epoch": 0.5782102314610055, "grad_norm": 3.0680637887603575, "learning_rate": 7.305247528466192e-06, "loss": 1.5559, "step": 19610 }, { "epoch": 0.5783576588530149, "grad_norm": 3.068175221246266, "learning_rate": 7.305042866740716e-06, "loss": 1.5028, "step": 19615 }, { "epoch": 0.5785050862450243, "grad_norm": 3.2885854381454176, "learning_rate": 7.304838100404044e-06, "loss": 1.5259, "step": 19620 }, { "epoch": 0.5786525136370337, "grad_norm": 2.96617507716267, "learning_rate": 7.304633229462202e-06, "loss": 1.5155, "step": 19625 }, { "epoch": 0.5787999410290432, "grad_norm": 3.1068371724155552, "learning_rate": 7.3044282539212176e-06, "loss": 1.5593, "step": 19630 }, { "epoch": 0.5789473684210527, "grad_norm": 2.9828442977447165, "learning_rate": 7.304223173787124e-06, "loss": 1.5013, "step": 19635 }, { "epoch": 0.5790947958130621, "grad_norm": 3.0957247740816474, "learning_rate": 7.304017989065954e-06, "loss": 1.5187, "step": 19640 }, { "epoch": 0.5792422232050715, "grad_norm": 3.235832584965605, "learning_rate": 7.303812699763747e-06, "loss": 1.4863, "step": 19645 }, { "epoch": 0.5793896505970809, "grad_norm": 2.9390359868629004, "learning_rate": 7.303607305886543e-06, "loss": 1.5195, "step": 19650 }, { "epoch": 0.5795370779890904, "grad_norm": 2.947696209742937, "learning_rate": 7.3034018074403865e-06, "loss": 1.4744, "step": 19655 }, { "epoch": 0.5796845053810998, "grad_norm": 3.0265795630228376, "learning_rate": 7.3031962044313244e-06, "loss": 1.5726, "step": 19660 }, { "epoch": 0.5798319327731093, "grad_norm": 3.013726320495524, "learning_rate": 7.302990496865406e-06, "loss": 1.4832, "step": 19665 }, { "epoch": 0.5799793601651186, "grad_norm": 3.295363629854168, "learning_rate": 7.302784684748687e-06, "loss": 1.5053, "step": 19670 }, { "epoch": 0.5801267875571281, "grad_norm": 2.9090744804469484, "learning_rate": 7.30257876808722e-06, "loss": 1.4662, "step": 19675 }, { "epoch": 0.5802742149491376, "grad_norm": 3.4851708575335527, "learning_rate": 7.302372746887067e-06, "loss": 1.5268, "step": 19680 }, { "epoch": 0.580421642341147, "grad_norm": 3.0082342439415535, "learning_rate": 7.302166621154288e-06, "loss": 1.4685, "step": 19685 }, { "epoch": 0.5805690697331565, "grad_norm": 3.083208134195561, "learning_rate": 7.3019603908949515e-06, "loss": 1.5636, "step": 19690 }, { "epoch": 0.5807164971251658, "grad_norm": 3.1910515688436876, "learning_rate": 7.301754056115124e-06, "loss": 1.5365, "step": 19695 }, { "epoch": 0.5808639245171753, "grad_norm": 3.1231241861802896, "learning_rate": 7.3015476168208785e-06, "loss": 1.5461, "step": 19700 }, { "epoch": 0.5810113519091847, "grad_norm": 2.86798485548086, "learning_rate": 7.3013410730182875e-06, "loss": 1.498, "step": 19705 }, { "epoch": 0.5811587793011942, "grad_norm": 3.0039278379738907, "learning_rate": 7.301134424713431e-06, "loss": 1.5378, "step": 19710 }, { "epoch": 0.5813062066932035, "grad_norm": 2.9327944924904306, "learning_rate": 7.30092767191239e-06, "loss": 1.4689, "step": 19715 }, { "epoch": 0.581453634085213, "grad_norm": 2.953534263054498, "learning_rate": 7.300720814621247e-06, "loss": 1.5487, "step": 19720 }, { "epoch": 0.5816010614772225, "grad_norm": 3.2710079691353586, "learning_rate": 7.300513852846088e-06, "loss": 1.5099, "step": 19725 }, { "epoch": 0.5817484888692319, "grad_norm": 2.9892319204156212, "learning_rate": 7.300306786593007e-06, "loss": 1.4993, "step": 19730 }, { "epoch": 0.5818959162612414, "grad_norm": 3.020878850411713, "learning_rate": 7.300099615868093e-06, "loss": 1.5567, "step": 19735 }, { "epoch": 0.5820433436532507, "grad_norm": 3.3471582030334504, "learning_rate": 7.299892340677444e-06, "loss": 1.4889, "step": 19740 }, { "epoch": 0.5821907710452602, "grad_norm": 3.0250913765283185, "learning_rate": 7.29968496102716e-06, "loss": 1.493, "step": 19745 }, { "epoch": 0.5823381984372696, "grad_norm": 2.9526062628170004, "learning_rate": 7.299477476923343e-06, "loss": 1.4893, "step": 19750 }, { "epoch": 0.5824856258292791, "grad_norm": 3.1395436998767057, "learning_rate": 7.299269888372098e-06, "loss": 1.4801, "step": 19755 }, { "epoch": 0.5826330532212886, "grad_norm": 2.9244033740194073, "learning_rate": 7.299062195379534e-06, "loss": 1.4774, "step": 19760 }, { "epoch": 0.5827804806132979, "grad_norm": 3.063106391381156, "learning_rate": 7.298854397951761e-06, "loss": 1.5289, "step": 19765 }, { "epoch": 0.5829279080053074, "grad_norm": 3.167250205702584, "learning_rate": 7.298646496094896e-06, "loss": 1.5605, "step": 19770 }, { "epoch": 0.5830753353973168, "grad_norm": 3.1038525856028567, "learning_rate": 7.298438489815055e-06, "loss": 1.5309, "step": 19775 }, { "epoch": 0.5832227627893263, "grad_norm": 2.9782782596714275, "learning_rate": 7.29823037911836e-06, "loss": 1.5622, "step": 19780 }, { "epoch": 0.5833701901813357, "grad_norm": 3.0846304683381915, "learning_rate": 7.298022164010935e-06, "loss": 1.5753, "step": 19785 }, { "epoch": 0.5835176175733451, "grad_norm": 3.0253814978929494, "learning_rate": 7.297813844498906e-06, "loss": 1.5164, "step": 19790 }, { "epoch": 0.5836650449653545, "grad_norm": 3.0126998943645016, "learning_rate": 7.297605420588403e-06, "loss": 1.5147, "step": 19795 }, { "epoch": 0.583812472357364, "grad_norm": 3.025999856950962, "learning_rate": 7.29739689228556e-06, "loss": 1.5233, "step": 19800 }, { "epoch": 0.5839598997493735, "grad_norm": 3.101819306836912, "learning_rate": 7.297188259596513e-06, "loss": 1.4711, "step": 19805 }, { "epoch": 0.5841073271413829, "grad_norm": 3.1297753238181847, "learning_rate": 7.2969795225274e-06, "loss": 1.4564, "step": 19810 }, { "epoch": 0.5842547545333923, "grad_norm": 3.0213436643238265, "learning_rate": 7.296770681084365e-06, "loss": 1.5161, "step": 19815 }, { "epoch": 0.5844021819254017, "grad_norm": 3.003589451841444, "learning_rate": 7.296561735273552e-06, "loss": 1.4917, "step": 19820 }, { "epoch": 0.5845496093174112, "grad_norm": 3.1196655716801454, "learning_rate": 7.2963526851011115e-06, "loss": 1.4895, "step": 19825 }, { "epoch": 0.5846970367094206, "grad_norm": 3.4213642147675563, "learning_rate": 7.296143530573194e-06, "loss": 1.4406, "step": 19830 }, { "epoch": 0.5848444641014301, "grad_norm": 2.88479534656206, "learning_rate": 7.295934271695952e-06, "loss": 1.5018, "step": 19835 }, { "epoch": 0.5849918914934394, "grad_norm": 2.9328370805543247, "learning_rate": 7.295724908475546e-06, "loss": 1.5218, "step": 19840 }, { "epoch": 0.5851393188854489, "grad_norm": 3.219875126375905, "learning_rate": 7.295515440918136e-06, "loss": 1.5217, "step": 19845 }, { "epoch": 0.5852867462774584, "grad_norm": 3.1556313085776155, "learning_rate": 7.295305869029885e-06, "loss": 1.5609, "step": 19850 }, { "epoch": 0.5854341736694678, "grad_norm": 3.0265404599699686, "learning_rate": 7.295096192816961e-06, "loss": 1.5067, "step": 19855 }, { "epoch": 0.5855816010614773, "grad_norm": 3.0328220997819706, "learning_rate": 7.294886412285534e-06, "loss": 1.5212, "step": 19860 }, { "epoch": 0.5857290284534866, "grad_norm": 3.123540714287551, "learning_rate": 7.294676527441775e-06, "loss": 1.4783, "step": 19865 }, { "epoch": 0.5858764558454961, "grad_norm": 3.026044393419023, "learning_rate": 7.294466538291863e-06, "loss": 1.4856, "step": 19870 }, { "epoch": 0.5860238832375055, "grad_norm": 3.076244416847074, "learning_rate": 7.294256444841974e-06, "loss": 1.5303, "step": 19875 }, { "epoch": 0.586171310629515, "grad_norm": 2.897042750848512, "learning_rate": 7.294046247098294e-06, "loss": 1.4825, "step": 19880 }, { "epoch": 0.5863187380215243, "grad_norm": 3.1452736371830197, "learning_rate": 7.293835945067005e-06, "loss": 1.4575, "step": 19885 }, { "epoch": 0.5864661654135338, "grad_norm": 2.8803546384420007, "learning_rate": 7.293625538754297e-06, "loss": 1.5021, "step": 19890 }, { "epoch": 0.5866135928055433, "grad_norm": 3.0716628844019236, "learning_rate": 7.293415028166362e-06, "loss": 1.5324, "step": 19895 }, { "epoch": 0.5867610201975527, "grad_norm": 3.008741207221493, "learning_rate": 7.293204413309392e-06, "loss": 1.4883, "step": 19900 }, { "epoch": 0.5869084475895622, "grad_norm": 3.1296222096903077, "learning_rate": 7.292993694189588e-06, "loss": 1.5201, "step": 19905 }, { "epoch": 0.5870558749815715, "grad_norm": 3.1562371033222814, "learning_rate": 7.292782870813148e-06, "loss": 1.4858, "step": 19910 }, { "epoch": 0.587203302373581, "grad_norm": 3.148994436801171, "learning_rate": 7.292571943186277e-06, "loss": 1.5635, "step": 19915 }, { "epoch": 0.5873507297655904, "grad_norm": 3.0223481176758153, "learning_rate": 7.29236091131518e-06, "loss": 1.4582, "step": 19920 }, { "epoch": 0.5874981571575999, "grad_norm": 2.9871090546603107, "learning_rate": 7.2921497752060686e-06, "loss": 1.4457, "step": 19925 }, { "epoch": 0.5876455845496094, "grad_norm": 2.958817434217151, "learning_rate": 7.291938534865156e-06, "loss": 1.4982, "step": 19930 }, { "epoch": 0.5877930119416187, "grad_norm": 3.000234845398605, "learning_rate": 7.291727190298658e-06, "loss": 1.5108, "step": 19935 }, { "epoch": 0.5879404393336282, "grad_norm": 3.035988998048667, "learning_rate": 7.291515741512792e-06, "loss": 1.4755, "step": 19940 }, { "epoch": 0.5880878667256376, "grad_norm": 2.9054628915642815, "learning_rate": 7.2913041885137826e-06, "loss": 1.5173, "step": 19945 }, { "epoch": 0.5882352941176471, "grad_norm": 2.9578748245667006, "learning_rate": 7.291092531307852e-06, "loss": 1.4836, "step": 19950 }, { "epoch": 0.5883827215096565, "grad_norm": 2.9395682702719683, "learning_rate": 7.290880769901233e-06, "loss": 1.4818, "step": 19955 }, { "epoch": 0.5885301489016659, "grad_norm": 2.948097048093607, "learning_rate": 7.290668904300151e-06, "loss": 1.5066, "step": 19960 }, { "epoch": 0.5886775762936753, "grad_norm": 2.9886328670738873, "learning_rate": 7.290456934510845e-06, "loss": 1.5217, "step": 19965 }, { "epoch": 0.5888250036856848, "grad_norm": 2.997701001223487, "learning_rate": 7.290244860539551e-06, "loss": 1.5462, "step": 19970 }, { "epoch": 0.5889724310776943, "grad_norm": 2.872478970782838, "learning_rate": 7.2900326823925086e-06, "loss": 1.4596, "step": 19975 }, { "epoch": 0.5891198584697037, "grad_norm": 3.004998485621384, "learning_rate": 7.289820400075964e-06, "loss": 1.4626, "step": 19980 }, { "epoch": 0.5892672858617131, "grad_norm": 3.075830179074867, "learning_rate": 7.289608013596161e-06, "loss": 1.5323, "step": 19985 }, { "epoch": 0.5894147132537225, "grad_norm": 3.026041260531674, "learning_rate": 7.289395522959349e-06, "loss": 1.483, "step": 19990 }, { "epoch": 0.589562140645732, "grad_norm": 3.011580189307904, "learning_rate": 7.289182928171784e-06, "loss": 1.5497, "step": 19995 }, { "epoch": 0.5897095680377414, "grad_norm": 3.0685284985388166, "learning_rate": 7.28897022923972e-06, "loss": 1.518, "step": 20000 }, { "epoch": 0.5897095680377414, "eval_loss": 1.1823087930679321, "eval_runtime": 4.3078, "eval_samples_per_second": 91.926, "eval_steps_per_second": 3.018, "step": 20000 }, { "epoch": 0.5898569954297509, "grad_norm": 3.6082112485797193, "learning_rate": 7.288757426169414e-06, "loss": 1.5284, "step": 20005 }, { "epoch": 0.5900044228217602, "grad_norm": 3.1023619564442617, "learning_rate": 7.288544518967131e-06, "loss": 1.5022, "step": 20010 }, { "epoch": 0.5901518502137697, "grad_norm": 2.9894045440147474, "learning_rate": 7.2883315076391365e-06, "loss": 1.502, "step": 20015 }, { "epoch": 0.5902992776057792, "grad_norm": 2.959133874734106, "learning_rate": 7.288118392191696e-06, "loss": 1.5168, "step": 20020 }, { "epoch": 0.5904467049977886, "grad_norm": 2.998552685497996, "learning_rate": 7.287905172631081e-06, "loss": 1.5268, "step": 20025 }, { "epoch": 0.5905941323897981, "grad_norm": 3.0399142362343747, "learning_rate": 7.287691848963567e-06, "loss": 1.5133, "step": 20030 }, { "epoch": 0.5907415597818074, "grad_norm": 3.0805656267866315, "learning_rate": 7.2874784211954305e-06, "loss": 1.5214, "step": 20035 }, { "epoch": 0.5908889871738169, "grad_norm": 3.127257147487918, "learning_rate": 7.287264889332952e-06, "loss": 1.4984, "step": 20040 }, { "epoch": 0.5910364145658263, "grad_norm": 3.0330557067526698, "learning_rate": 7.287051253382415e-06, "loss": 1.484, "step": 20045 }, { "epoch": 0.5911838419578358, "grad_norm": 3.0096906539909503, "learning_rate": 7.286837513350106e-06, "loss": 1.5175, "step": 20050 }, { "epoch": 0.5913312693498453, "grad_norm": 3.196656146059076, "learning_rate": 7.286623669242315e-06, "loss": 1.5133, "step": 20055 }, { "epoch": 0.5914786967418546, "grad_norm": 2.9712812910436313, "learning_rate": 7.2864097210653344e-06, "loss": 1.5579, "step": 20060 }, { "epoch": 0.5916261241338641, "grad_norm": 2.92306695903262, "learning_rate": 7.286195668825458e-06, "loss": 1.4766, "step": 20065 }, { "epoch": 0.5917735515258735, "grad_norm": 2.913533408624516, "learning_rate": 7.285981512528987e-06, "loss": 1.5033, "step": 20070 }, { "epoch": 0.591920978917883, "grad_norm": 2.992196831436202, "learning_rate": 7.285767252182223e-06, "loss": 1.4691, "step": 20075 }, { "epoch": 0.5920684063098923, "grad_norm": 3.6587415944978074, "learning_rate": 7.285552887791469e-06, "loss": 1.5328, "step": 20080 }, { "epoch": 0.5922158337019018, "grad_norm": 3.0257120984697776, "learning_rate": 7.285338419363035e-06, "loss": 1.4532, "step": 20085 }, { "epoch": 0.5923632610939112, "grad_norm": 2.98233568690303, "learning_rate": 7.2851238469032304e-06, "loss": 1.4568, "step": 20090 }, { "epoch": 0.5925106884859207, "grad_norm": 2.947626310824739, "learning_rate": 7.284909170418371e-06, "loss": 1.5458, "step": 20095 }, { "epoch": 0.5926581158779302, "grad_norm": 3.197151934680114, "learning_rate": 7.2846943899147725e-06, "loss": 1.5607, "step": 20100 }, { "epoch": 0.5928055432699395, "grad_norm": 2.9142283691210156, "learning_rate": 7.284479505398754e-06, "loss": 1.4521, "step": 20105 }, { "epoch": 0.592952970661949, "grad_norm": 3.0074101792182106, "learning_rate": 7.284264516876641e-06, "loss": 1.5004, "step": 20110 }, { "epoch": 0.5931003980539584, "grad_norm": 2.9146313635030383, "learning_rate": 7.284049424354759e-06, "loss": 1.508, "step": 20115 }, { "epoch": 0.5932478254459679, "grad_norm": 3.0963421196428613, "learning_rate": 7.283834227839438e-06, "loss": 1.5046, "step": 20120 }, { "epoch": 0.5933952528379773, "grad_norm": 3.0590306990957816, "learning_rate": 7.283618927337009e-06, "loss": 1.4853, "step": 20125 }, { "epoch": 0.5935426802299867, "grad_norm": 2.9301487744568018, "learning_rate": 7.283403522853808e-06, "loss": 1.4718, "step": 20130 }, { "epoch": 0.5936901076219961, "grad_norm": 2.9867110522992486, "learning_rate": 7.283188014396173e-06, "loss": 1.5238, "step": 20135 }, { "epoch": 0.5938375350140056, "grad_norm": 2.9616235382497753, "learning_rate": 7.282972401970447e-06, "loss": 1.4915, "step": 20140 }, { "epoch": 0.5939849624060151, "grad_norm": 3.0572528244707664, "learning_rate": 7.282756685582974e-06, "loss": 1.5208, "step": 20145 }, { "epoch": 0.5941323897980245, "grad_norm": 3.1488130388250872, "learning_rate": 7.282540865240101e-06, "loss": 1.519, "step": 20150 }, { "epoch": 0.5942798171900339, "grad_norm": 3.2577196344605235, "learning_rate": 7.28232494094818e-06, "loss": 1.4891, "step": 20155 }, { "epoch": 0.5944272445820433, "grad_norm": 2.945290256403467, "learning_rate": 7.282108912713564e-06, "loss": 1.4833, "step": 20160 }, { "epoch": 0.5945746719740528, "grad_norm": 3.0461446286586327, "learning_rate": 7.281892780542609e-06, "loss": 1.527, "step": 20165 }, { "epoch": 0.5947220993660622, "grad_norm": 3.076870166155963, "learning_rate": 7.2816765444416766e-06, "loss": 1.548, "step": 20170 }, { "epoch": 0.5948695267580717, "grad_norm": 2.925803789865515, "learning_rate": 7.281460204417129e-06, "loss": 1.5099, "step": 20175 }, { "epoch": 0.595016954150081, "grad_norm": 3.0247950486677926, "learning_rate": 7.281243760475332e-06, "loss": 1.527, "step": 20180 }, { "epoch": 0.5951643815420905, "grad_norm": 2.949063330879455, "learning_rate": 7.281027212622656e-06, "loss": 1.4733, "step": 20185 }, { "epoch": 0.5953118089341, "grad_norm": 2.9678759791788627, "learning_rate": 7.280810560865472e-06, "loss": 1.4839, "step": 20190 }, { "epoch": 0.5954592363261094, "grad_norm": 3.0469498220182634, "learning_rate": 7.2805938052101545e-06, "loss": 1.5245, "step": 20195 }, { "epoch": 0.5956066637181189, "grad_norm": 3.37606509179368, "learning_rate": 7.280376945663084e-06, "loss": 1.4578, "step": 20200 }, { "epoch": 0.5957540911101282, "grad_norm": 2.9444673517287017, "learning_rate": 7.28015998223064e-06, "loss": 1.4844, "step": 20205 }, { "epoch": 0.5959015185021377, "grad_norm": 2.8983529686056824, "learning_rate": 7.279942914919207e-06, "loss": 1.4794, "step": 20210 }, { "epoch": 0.5960489458941471, "grad_norm": 3.099532470527431, "learning_rate": 7.2797257437351745e-06, "loss": 1.5247, "step": 20215 }, { "epoch": 0.5961963732861566, "grad_norm": 2.9320282765879946, "learning_rate": 7.2795084686849305e-06, "loss": 1.4847, "step": 20220 }, { "epoch": 0.5963438006781661, "grad_norm": 3.03782442907784, "learning_rate": 7.279291089774868e-06, "loss": 1.4721, "step": 20225 }, { "epoch": 0.5964912280701754, "grad_norm": 3.1444752416830917, "learning_rate": 7.279073607011388e-06, "loss": 1.4851, "step": 20230 }, { "epoch": 0.5966386554621849, "grad_norm": 3.0556538153897304, "learning_rate": 7.278856020400886e-06, "loss": 1.5241, "step": 20235 }, { "epoch": 0.5967860828541943, "grad_norm": 3.0566900115462357, "learning_rate": 7.278638329949766e-06, "loss": 1.5412, "step": 20240 }, { "epoch": 0.5969335102462038, "grad_norm": 3.09250204643573, "learning_rate": 7.278420535664435e-06, "loss": 1.5434, "step": 20245 }, { "epoch": 0.5970809376382131, "grad_norm": 3.017719973581659, "learning_rate": 7.278202637551299e-06, "loss": 1.5077, "step": 20250 }, { "epoch": 0.5972283650302226, "grad_norm": 3.007623037887346, "learning_rate": 7.277984635616772e-06, "loss": 1.531, "step": 20255 }, { "epoch": 0.597375792422232, "grad_norm": 2.866043275663774, "learning_rate": 7.2777665298672695e-06, "loss": 1.473, "step": 20260 }, { "epoch": 0.5975232198142415, "grad_norm": 2.990108927372106, "learning_rate": 7.277548320309208e-06, "loss": 1.449, "step": 20265 }, { "epoch": 0.597670647206251, "grad_norm": 3.0530540156265333, "learning_rate": 7.277330006949009e-06, "loss": 1.5052, "step": 20270 }, { "epoch": 0.5978180745982603, "grad_norm": 3.2842753560282123, "learning_rate": 7.277111589793097e-06, "loss": 1.5197, "step": 20275 }, { "epoch": 0.5979655019902698, "grad_norm": 2.8212754780204308, "learning_rate": 7.276893068847898e-06, "loss": 1.4841, "step": 20280 }, { "epoch": 0.5981129293822792, "grad_norm": 2.98611404175653, "learning_rate": 7.276674444119844e-06, "loss": 1.4554, "step": 20285 }, { "epoch": 0.5982603567742887, "grad_norm": 3.0041568649773374, "learning_rate": 7.276455715615367e-06, "loss": 1.5213, "step": 20290 }, { "epoch": 0.5984077841662981, "grad_norm": 3.1161665016464744, "learning_rate": 7.276236883340905e-06, "loss": 1.5102, "step": 20295 }, { "epoch": 0.5985552115583075, "grad_norm": 3.0530182083890103, "learning_rate": 7.276017947302895e-06, "loss": 1.485, "step": 20300 }, { "epoch": 0.598702638950317, "grad_norm": 3.0541276174967216, "learning_rate": 7.275798907507782e-06, "loss": 1.5236, "step": 20305 }, { "epoch": 0.5988500663423264, "grad_norm": 3.047165341492388, "learning_rate": 7.275579763962009e-06, "loss": 1.4958, "step": 20310 }, { "epoch": 0.5989974937343359, "grad_norm": 3.007905916878081, "learning_rate": 7.275360516672025e-06, "loss": 1.5188, "step": 20315 }, { "epoch": 0.5991449211263453, "grad_norm": 2.9659815979683195, "learning_rate": 7.275141165644283e-06, "loss": 1.4967, "step": 20320 }, { "epoch": 0.5992923485183547, "grad_norm": 2.952049922584679, "learning_rate": 7.274921710885237e-06, "loss": 1.5076, "step": 20325 }, { "epoch": 0.5994397759103641, "grad_norm": 2.943882668220773, "learning_rate": 7.274702152401345e-06, "loss": 1.4654, "step": 20330 }, { "epoch": 0.5995872033023736, "grad_norm": 3.0153292092954054, "learning_rate": 7.274482490199066e-06, "loss": 1.5452, "step": 20335 }, { "epoch": 0.599734630694383, "grad_norm": 2.97490407788462, "learning_rate": 7.2742627242848655e-06, "loss": 1.5292, "step": 20340 }, { "epoch": 0.5998820580863925, "grad_norm": 2.928391905100575, "learning_rate": 7.274042854665211e-06, "loss": 1.501, "step": 20345 }, { "epoch": 0.6000294854784018, "grad_norm": 3.390717826865923, "learning_rate": 7.273822881346571e-06, "loss": 1.4836, "step": 20350 }, { "epoch": 0.6001769128704113, "grad_norm": 2.9722550465557966, "learning_rate": 7.27360280433542e-06, "loss": 1.4931, "step": 20355 }, { "epoch": 0.6003243402624208, "grad_norm": 2.971516925563105, "learning_rate": 7.2733826236382316e-06, "loss": 1.4578, "step": 20360 }, { "epoch": 0.6004717676544302, "grad_norm": 3.0321099960336415, "learning_rate": 7.273162339261487e-06, "loss": 1.4899, "step": 20365 }, { "epoch": 0.6006191950464397, "grad_norm": 3.000550342197034, "learning_rate": 7.2729419512116664e-06, "loss": 1.561, "step": 20370 }, { "epoch": 0.600766622438449, "grad_norm": 3.011512005981054, "learning_rate": 7.2727214594952575e-06, "loss": 1.4545, "step": 20375 }, { "epoch": 0.6009140498304585, "grad_norm": 2.863394475998751, "learning_rate": 7.272500864118746e-06, "loss": 1.5304, "step": 20380 }, { "epoch": 0.6010614772224679, "grad_norm": 3.2997657653771557, "learning_rate": 7.272280165088625e-06, "loss": 1.471, "step": 20385 }, { "epoch": 0.6012089046144774, "grad_norm": 3.137657682185687, "learning_rate": 7.272059362411388e-06, "loss": 1.4926, "step": 20390 }, { "epoch": 0.6013563320064869, "grad_norm": 3.053588973406695, "learning_rate": 7.271838456093532e-06, "loss": 1.5245, "step": 20395 }, { "epoch": 0.6015037593984962, "grad_norm": 3.064516300898707, "learning_rate": 7.271617446141559e-06, "loss": 1.4872, "step": 20400 }, { "epoch": 0.6016511867905057, "grad_norm": 2.988170371358022, "learning_rate": 7.27139633256197e-06, "loss": 1.5034, "step": 20405 }, { "epoch": 0.6017986141825151, "grad_norm": 3.0276443684848875, "learning_rate": 7.2711751153612735e-06, "loss": 1.526, "step": 20410 }, { "epoch": 0.6019460415745246, "grad_norm": 2.87930310187084, "learning_rate": 7.270953794545978e-06, "loss": 1.4898, "step": 20415 }, { "epoch": 0.6020934689665339, "grad_norm": 3.186025284107774, "learning_rate": 7.270732370122598e-06, "loss": 1.4908, "step": 20420 }, { "epoch": 0.6022408963585434, "grad_norm": 3.0605833500983213, "learning_rate": 7.270510842097647e-06, "loss": 1.5342, "step": 20425 }, { "epoch": 0.6023883237505528, "grad_norm": 3.1399755184438867, "learning_rate": 7.270289210477644e-06, "loss": 1.5033, "step": 20430 }, { "epoch": 0.6025357511425623, "grad_norm": 2.8983762064123026, "learning_rate": 7.270067475269111e-06, "loss": 1.5245, "step": 20435 }, { "epoch": 0.6026831785345718, "grad_norm": 2.9885197818093796, "learning_rate": 7.269845636478574e-06, "loss": 1.4802, "step": 20440 }, { "epoch": 0.6028306059265811, "grad_norm": 2.963417518033009, "learning_rate": 7.26962369411256e-06, "loss": 1.4694, "step": 20445 }, { "epoch": 0.6029780333185906, "grad_norm": 3.071353495416986, "learning_rate": 7.269401648177599e-06, "loss": 1.5467, "step": 20450 }, { "epoch": 0.6031254607106, "grad_norm": 2.9677163376998124, "learning_rate": 7.269179498680226e-06, "loss": 1.4803, "step": 20455 }, { "epoch": 0.6032728881026095, "grad_norm": 2.8796672072904097, "learning_rate": 7.268957245626978e-06, "loss": 1.4792, "step": 20460 }, { "epoch": 0.6034203154946189, "grad_norm": 3.0397286203791505, "learning_rate": 7.268734889024395e-06, "loss": 1.512, "step": 20465 }, { "epoch": 0.6035677428866283, "grad_norm": 3.1397976733816293, "learning_rate": 7.268512428879019e-06, "loss": 1.5366, "step": 20470 }, { "epoch": 0.6037151702786377, "grad_norm": 3.115047228993504, "learning_rate": 7.268289865197398e-06, "loss": 1.5097, "step": 20475 }, { "epoch": 0.6038625976706472, "grad_norm": 2.941203072057385, "learning_rate": 7.26806719798608e-06, "loss": 1.4444, "step": 20480 }, { "epoch": 0.6040100250626567, "grad_norm": 3.0525214425936906, "learning_rate": 7.267844427251617e-06, "loss": 1.5153, "step": 20485 }, { "epoch": 0.6041574524546661, "grad_norm": 3.0750647625682643, "learning_rate": 7.267621553000566e-06, "loss": 1.5563, "step": 20490 }, { "epoch": 0.6043048798466755, "grad_norm": 2.8723147275827774, "learning_rate": 7.267398575239482e-06, "loss": 1.4783, "step": 20495 }, { "epoch": 0.6044523072386849, "grad_norm": 3.098544473509262, "learning_rate": 7.26717549397493e-06, "loss": 1.5104, "step": 20500 }, { "epoch": 0.6044523072386849, "eval_loss": 1.1794097423553467, "eval_runtime": 4.1995, "eval_samples_per_second": 94.296, "eval_steps_per_second": 3.096, "step": 20500 }, { "epoch": 0.6045997346306944, "grad_norm": 3.0422119507085, "learning_rate": 7.266952309213472e-06, "loss": 1.4706, "step": 20505 }, { "epoch": 0.6047471620227038, "grad_norm": 2.97806534798097, "learning_rate": 7.266729020961678e-06, "loss": 1.4927, "step": 20510 }, { "epoch": 0.6048945894147133, "grad_norm": 3.0635857955109054, "learning_rate": 7.266505629226116e-06, "loss": 1.588, "step": 20515 }, { "epoch": 0.6050420168067226, "grad_norm": 2.9806007194474327, "learning_rate": 7.26628213401336e-06, "loss": 1.5945, "step": 20520 }, { "epoch": 0.6051894441987321, "grad_norm": 3.024714464054503, "learning_rate": 7.266058535329988e-06, "loss": 1.4878, "step": 20525 }, { "epoch": 0.6053368715907416, "grad_norm": 3.17159998398197, "learning_rate": 7.265834833182578e-06, "loss": 1.5389, "step": 20530 }, { "epoch": 0.605484298982751, "grad_norm": 3.207017902687651, "learning_rate": 7.265611027577713e-06, "loss": 1.5515, "step": 20535 }, { "epoch": 0.6056317263747605, "grad_norm": 3.094260132126329, "learning_rate": 7.26538711852198e-06, "loss": 1.4499, "step": 20540 }, { "epoch": 0.6057791537667698, "grad_norm": 3.02222156978557, "learning_rate": 7.265163106021966e-06, "loss": 1.5149, "step": 20545 }, { "epoch": 0.6059265811587793, "grad_norm": 3.0786788901338684, "learning_rate": 7.264938990084264e-06, "loss": 1.5118, "step": 20550 }, { "epoch": 0.6060740085507887, "grad_norm": 3.0007275119603665, "learning_rate": 7.264714770715469e-06, "loss": 1.5107, "step": 20555 }, { "epoch": 0.6062214359427982, "grad_norm": 2.952846168580074, "learning_rate": 7.264490447922178e-06, "loss": 1.5065, "step": 20560 }, { "epoch": 0.6063688633348077, "grad_norm": 2.8893527971837956, "learning_rate": 7.264266021710992e-06, "loss": 1.5424, "step": 20565 }, { "epoch": 0.606516290726817, "grad_norm": 2.928517154214612, "learning_rate": 7.264041492088516e-06, "loss": 1.5431, "step": 20570 }, { "epoch": 0.6066637181188265, "grad_norm": 3.028341486545324, "learning_rate": 7.263816859061357e-06, "loss": 1.5499, "step": 20575 }, { "epoch": 0.6068111455108359, "grad_norm": 2.9104118226162354, "learning_rate": 7.263592122636124e-06, "loss": 1.4929, "step": 20580 }, { "epoch": 0.6069585729028454, "grad_norm": 3.031309266873429, "learning_rate": 7.263367282819431e-06, "loss": 1.4945, "step": 20585 }, { "epoch": 0.6071060002948547, "grad_norm": 2.9667517983366074, "learning_rate": 7.263142339617894e-06, "loss": 1.5196, "step": 20590 }, { "epoch": 0.6072534276868642, "grad_norm": 2.9826910080859292, "learning_rate": 7.262917293038132e-06, "loss": 1.5125, "step": 20595 }, { "epoch": 0.6074008550788736, "grad_norm": 2.9441932558528308, "learning_rate": 7.262692143086766e-06, "loss": 1.4875, "step": 20600 }, { "epoch": 0.6075482824708831, "grad_norm": 2.987559514695179, "learning_rate": 7.262466889770424e-06, "loss": 1.4954, "step": 20605 }, { "epoch": 0.6076957098628926, "grad_norm": 2.966481310518736, "learning_rate": 7.262241533095733e-06, "loss": 1.5308, "step": 20610 }, { "epoch": 0.6078431372549019, "grad_norm": 3.1382147538501677, "learning_rate": 7.262016073069324e-06, "loss": 1.5414, "step": 20615 }, { "epoch": 0.6079905646469114, "grad_norm": 2.8760434483560395, "learning_rate": 7.261790509697831e-06, "loss": 1.5073, "step": 20620 }, { "epoch": 0.6081379920389208, "grad_norm": 3.010147640049508, "learning_rate": 7.261564842987894e-06, "loss": 1.5047, "step": 20625 }, { "epoch": 0.6082854194309303, "grad_norm": 2.945956941003497, "learning_rate": 7.261339072946151e-06, "loss": 1.488, "step": 20630 }, { "epoch": 0.6084328468229397, "grad_norm": 2.8995193688153122, "learning_rate": 7.261113199579245e-06, "loss": 1.5148, "step": 20635 }, { "epoch": 0.6085802742149491, "grad_norm": 2.9472320387413227, "learning_rate": 7.2608872228938246e-06, "loss": 1.47, "step": 20640 }, { "epoch": 0.6087277016069585, "grad_norm": 2.9501597628971346, "learning_rate": 7.260661142896539e-06, "loss": 1.5235, "step": 20645 }, { "epoch": 0.608875128998968, "grad_norm": 3.043529957505373, "learning_rate": 7.2604349595940406e-06, "loss": 1.4543, "step": 20650 }, { "epoch": 0.6090225563909775, "grad_norm": 3.122101402699169, "learning_rate": 7.260208672992984e-06, "loss": 1.5146, "step": 20655 }, { "epoch": 0.6091699837829869, "grad_norm": 2.9230384485494962, "learning_rate": 7.25998228310003e-06, "loss": 1.5377, "step": 20660 }, { "epoch": 0.6093174111749963, "grad_norm": 2.8976870763162412, "learning_rate": 7.259755789921839e-06, "loss": 1.4996, "step": 20665 }, { "epoch": 0.6094648385670057, "grad_norm": 2.8736157971198995, "learning_rate": 7.259529193465075e-06, "loss": 1.5064, "step": 20670 }, { "epoch": 0.6096122659590152, "grad_norm": 2.955341594073697, "learning_rate": 7.259302493736409e-06, "loss": 1.5253, "step": 20675 }, { "epoch": 0.6097596933510246, "grad_norm": 3.085438004720904, "learning_rate": 7.2590756907425084e-06, "loss": 1.5356, "step": 20680 }, { "epoch": 0.6099071207430341, "grad_norm": 2.941472138837567, "learning_rate": 7.25884878449005e-06, "loss": 1.5007, "step": 20685 }, { "epoch": 0.6100545481350435, "grad_norm": 2.98777977727756, "learning_rate": 7.258621774985709e-06, "loss": 1.5505, "step": 20690 }, { "epoch": 0.6102019755270529, "grad_norm": 3.1100728645054003, "learning_rate": 7.258394662236165e-06, "loss": 1.5341, "step": 20695 }, { "epoch": 0.6103494029190624, "grad_norm": 2.9766589762474713, "learning_rate": 7.258167446248102e-06, "loss": 1.4943, "step": 20700 }, { "epoch": 0.6104968303110718, "grad_norm": 3.0889037969194435, "learning_rate": 7.257940127028207e-06, "loss": 1.4803, "step": 20705 }, { "epoch": 0.6106442577030813, "grad_norm": 2.9478453037038896, "learning_rate": 7.257712704583168e-06, "loss": 1.5201, "step": 20710 }, { "epoch": 0.6107916850950906, "grad_norm": 2.9058042188383797, "learning_rate": 7.257485178919677e-06, "loss": 1.5452, "step": 20715 }, { "epoch": 0.6109391124871001, "grad_norm": 2.874986280786204, "learning_rate": 7.257257550044429e-06, "loss": 1.5175, "step": 20720 }, { "epoch": 0.6110865398791095, "grad_norm": 2.9395675521747857, "learning_rate": 7.257029817964123e-06, "loss": 1.4455, "step": 20725 }, { "epoch": 0.611233967271119, "grad_norm": 2.967329761427187, "learning_rate": 7.256801982685461e-06, "loss": 1.4888, "step": 20730 }, { "epoch": 0.6113813946631285, "grad_norm": 2.9756157852757674, "learning_rate": 7.256574044215144e-06, "loss": 1.557, "step": 20735 }, { "epoch": 0.6115288220551378, "grad_norm": 2.999051585609373, "learning_rate": 7.256346002559884e-06, "loss": 1.5594, "step": 20740 }, { "epoch": 0.6116762494471473, "grad_norm": 2.8830282717558844, "learning_rate": 7.256117857726388e-06, "loss": 1.4009, "step": 20745 }, { "epoch": 0.6118236768391567, "grad_norm": 3.1653616881602775, "learning_rate": 7.255889609721371e-06, "loss": 1.5323, "step": 20750 }, { "epoch": 0.6119711042311662, "grad_norm": 3.0087477086090324, "learning_rate": 7.255661258551549e-06, "loss": 1.4993, "step": 20755 }, { "epoch": 0.6121185316231755, "grad_norm": 2.8856367449243643, "learning_rate": 7.2554328042236404e-06, "loss": 1.4802, "step": 20760 }, { "epoch": 0.612265959015185, "grad_norm": 2.9953403522542095, "learning_rate": 7.255204246744369e-06, "loss": 1.5098, "step": 20765 }, { "epoch": 0.6124133864071944, "grad_norm": 2.864799927363114, "learning_rate": 7.254975586120461e-06, "loss": 1.4725, "step": 20770 }, { "epoch": 0.6125608137992039, "grad_norm": 3.0303491185940516, "learning_rate": 7.254746822358643e-06, "loss": 1.4985, "step": 20775 }, { "epoch": 0.6127082411912134, "grad_norm": 2.9994312405092693, "learning_rate": 7.2545179554656485e-06, "loss": 1.4206, "step": 20780 }, { "epoch": 0.6128556685832227, "grad_norm": 3.1005766878318504, "learning_rate": 7.25428898544821e-06, "loss": 1.5034, "step": 20785 }, { "epoch": 0.6130030959752322, "grad_norm": 2.863122128331272, "learning_rate": 7.2540599123130684e-06, "loss": 1.5545, "step": 20790 }, { "epoch": 0.6131505233672416, "grad_norm": 3.0620856285472997, "learning_rate": 7.253830736066963e-06, "loss": 1.4844, "step": 20795 }, { "epoch": 0.6132979507592511, "grad_norm": 3.176843069859179, "learning_rate": 7.253601456716635e-06, "loss": 1.5375, "step": 20800 }, { "epoch": 0.6134453781512605, "grad_norm": 2.8494911692615164, "learning_rate": 7.2533720742688356e-06, "loss": 1.5506, "step": 20805 }, { "epoch": 0.6135928055432699, "grad_norm": 3.0042965623058002, "learning_rate": 7.253142588730311e-06, "loss": 1.5124, "step": 20810 }, { "epoch": 0.6137402329352794, "grad_norm": 3.212446471579135, "learning_rate": 7.252913000107817e-06, "loss": 1.5297, "step": 20815 }, { "epoch": 0.6138876603272888, "grad_norm": 3.0053354985765433, "learning_rate": 7.2526833084081064e-06, "loss": 1.4616, "step": 20820 }, { "epoch": 0.6140350877192983, "grad_norm": 2.948114706422753, "learning_rate": 7.252453513637941e-06, "loss": 1.5482, "step": 20825 }, { "epoch": 0.6141825151113077, "grad_norm": 3.0375497845874686, "learning_rate": 7.252223615804081e-06, "loss": 1.5189, "step": 20830 }, { "epoch": 0.6143299425033171, "grad_norm": 3.0975545669842672, "learning_rate": 7.251993614913293e-06, "loss": 1.4729, "step": 20835 }, { "epoch": 0.6144773698953265, "grad_norm": 3.164839100374986, "learning_rate": 7.251763510972344e-06, "loss": 1.5387, "step": 20840 }, { "epoch": 0.614624797287336, "grad_norm": 2.8059466225334915, "learning_rate": 7.251533303988003e-06, "loss": 1.5122, "step": 20845 }, { "epoch": 0.6147722246793454, "grad_norm": 3.009928135640503, "learning_rate": 7.251302993967047e-06, "loss": 1.5032, "step": 20850 }, { "epoch": 0.6149196520713549, "grad_norm": 3.3031712020602435, "learning_rate": 7.251072580916253e-06, "loss": 1.5673, "step": 20855 }, { "epoch": 0.6150670794633643, "grad_norm": 2.9898440505400523, "learning_rate": 7.250842064842401e-06, "loss": 1.5296, "step": 20860 }, { "epoch": 0.6152145068553737, "grad_norm": 3.065726502564028, "learning_rate": 7.250611445752272e-06, "loss": 1.5406, "step": 20865 }, { "epoch": 0.6153619342473832, "grad_norm": 2.8971562104650204, "learning_rate": 7.250380723652656e-06, "loss": 1.5134, "step": 20870 }, { "epoch": 0.6155093616393926, "grad_norm": 3.0062974725574017, "learning_rate": 7.250149898550339e-06, "loss": 1.5635, "step": 20875 }, { "epoch": 0.6156567890314021, "grad_norm": 3.008881077663591, "learning_rate": 7.249918970452115e-06, "loss": 1.5341, "step": 20880 }, { "epoch": 0.6158042164234114, "grad_norm": 2.815945482836528, "learning_rate": 7.249687939364779e-06, "loss": 1.4311, "step": 20885 }, { "epoch": 0.6159516438154209, "grad_norm": 2.8947383811222553, "learning_rate": 7.249456805295128e-06, "loss": 1.4805, "step": 20890 }, { "epoch": 0.6160990712074303, "grad_norm": 2.9934188357441425, "learning_rate": 7.249225568249966e-06, "loss": 1.4863, "step": 20895 }, { "epoch": 0.6162464985994398, "grad_norm": 2.974975705083402, "learning_rate": 7.2489942282360945e-06, "loss": 1.5376, "step": 20900 }, { "epoch": 0.6163939259914493, "grad_norm": 3.0495720571215132, "learning_rate": 7.248762785260324e-06, "loss": 1.5257, "step": 20905 }, { "epoch": 0.6165413533834586, "grad_norm": 2.997381241683059, "learning_rate": 7.2485312393294616e-06, "loss": 1.477, "step": 20910 }, { "epoch": 0.6166887807754681, "grad_norm": 3.1049589590302302, "learning_rate": 7.2482995904503235e-06, "loss": 1.5484, "step": 20915 }, { "epoch": 0.6168362081674775, "grad_norm": 2.9581657251770705, "learning_rate": 7.248067838629725e-06, "loss": 1.484, "step": 20920 }, { "epoch": 0.616983635559487, "grad_norm": 3.3255610300808303, "learning_rate": 7.247835983874487e-06, "loss": 1.4543, "step": 20925 }, { "epoch": 0.6171310629514963, "grad_norm": 3.0462515459653092, "learning_rate": 7.24760402619143e-06, "loss": 1.4988, "step": 20930 }, { "epoch": 0.6172784903435058, "grad_norm": 2.780874841971975, "learning_rate": 7.247371965587381e-06, "loss": 1.5209, "step": 20935 }, { "epoch": 0.6174259177355153, "grad_norm": 3.0685569747316137, "learning_rate": 7.247139802069168e-06, "loss": 1.4692, "step": 20940 }, { "epoch": 0.6175733451275247, "grad_norm": 2.960896406723124, "learning_rate": 7.246907535643623e-06, "loss": 1.5049, "step": 20945 }, { "epoch": 0.6177207725195342, "grad_norm": 3.094858865257181, "learning_rate": 7.246675166317581e-06, "loss": 1.4352, "step": 20950 }, { "epoch": 0.6178681999115435, "grad_norm": 2.9935893541278373, "learning_rate": 7.24644269409788e-06, "loss": 1.4917, "step": 20955 }, { "epoch": 0.618015627303553, "grad_norm": 4.400195305532235, "learning_rate": 7.2462101189913596e-06, "loss": 1.5434, "step": 20960 }, { "epoch": 0.6181630546955624, "grad_norm": 3.1474820524396168, "learning_rate": 7.245977441004864e-06, "loss": 1.4632, "step": 20965 }, { "epoch": 0.6183104820875719, "grad_norm": 2.969346631395722, "learning_rate": 7.24574466014524e-06, "loss": 1.5012, "step": 20970 }, { "epoch": 0.6184579094795813, "grad_norm": 3.097556981466648, "learning_rate": 7.245511776419338e-06, "loss": 1.5663, "step": 20975 }, { "epoch": 0.6186053368715907, "grad_norm": 2.948594984031431, "learning_rate": 7.2452787898340114e-06, "loss": 1.5297, "step": 20980 }, { "epoch": 0.6187527642636002, "grad_norm": 2.8879818566473916, "learning_rate": 7.245045700396114e-06, "loss": 1.5645, "step": 20985 }, { "epoch": 0.6189001916556096, "grad_norm": 3.084415745054537, "learning_rate": 7.244812508112506e-06, "loss": 1.5093, "step": 20990 }, { "epoch": 0.6190476190476191, "grad_norm": 2.9316573726052138, "learning_rate": 7.244579212990051e-06, "loss": 1.4954, "step": 20995 }, { "epoch": 0.6191950464396285, "grad_norm": 3.0701534001389126, "learning_rate": 7.244345815035611e-06, "loss": 1.4874, "step": 21000 }, { "epoch": 0.6191950464396285, "eval_loss": 1.17863130569458, "eval_runtime": 4.2919, "eval_samples_per_second": 92.266, "eval_steps_per_second": 3.029, "step": 21000 }, { "epoch": 0.6193424738316379, "grad_norm": 2.917640634296125, "learning_rate": 7.244112314256054e-06, "loss": 1.4667, "step": 21005 }, { "epoch": 0.6194899012236473, "grad_norm": 3.095899936999979, "learning_rate": 7.243878710658254e-06, "loss": 1.4923, "step": 21010 }, { "epoch": 0.6196373286156568, "grad_norm": 2.9229921915622175, "learning_rate": 7.243645004249083e-06, "loss": 1.5242, "step": 21015 }, { "epoch": 0.6197847560076662, "grad_norm": 2.895359025876537, "learning_rate": 7.243411195035418e-06, "loss": 1.498, "step": 21020 }, { "epoch": 0.6199321833996757, "grad_norm": 3.09967707370458, "learning_rate": 7.243177283024139e-06, "loss": 1.5401, "step": 21025 }, { "epoch": 0.620079610791685, "grad_norm": 3.0994863664314116, "learning_rate": 7.242943268222131e-06, "loss": 1.5004, "step": 21030 }, { "epoch": 0.6202270381836945, "grad_norm": 2.9515222408602946, "learning_rate": 7.242709150636279e-06, "loss": 1.4838, "step": 21035 }, { "epoch": 0.620374465575704, "grad_norm": 2.8635122941862474, "learning_rate": 7.2424749302734705e-06, "loss": 1.5143, "step": 21040 }, { "epoch": 0.6205218929677134, "grad_norm": 3.0003284583151006, "learning_rate": 7.2422406071406e-06, "loss": 1.5159, "step": 21045 }, { "epoch": 0.6206693203597229, "grad_norm": 2.874913014003407, "learning_rate": 7.242006181244563e-06, "loss": 1.5181, "step": 21050 }, { "epoch": 0.6208167477517322, "grad_norm": 3.0389849203790016, "learning_rate": 7.241771652592255e-06, "loss": 1.5054, "step": 21055 }, { "epoch": 0.6209641751437417, "grad_norm": 2.865234689866132, "learning_rate": 7.241537021190579e-06, "loss": 1.5026, "step": 21060 }, { "epoch": 0.6211116025357512, "grad_norm": 2.8284104601850055, "learning_rate": 7.241302287046439e-06, "loss": 1.4854, "step": 21065 }, { "epoch": 0.6212590299277606, "grad_norm": 3.1091362203065196, "learning_rate": 7.241067450166744e-06, "loss": 1.498, "step": 21070 }, { "epoch": 0.6214064573197701, "grad_norm": 2.9553347358230813, "learning_rate": 7.240832510558403e-06, "loss": 1.4557, "step": 21075 }, { "epoch": 0.6215538847117794, "grad_norm": 2.9145232667912326, "learning_rate": 7.240597468228328e-06, "loss": 1.4555, "step": 21080 }, { "epoch": 0.6217013121037889, "grad_norm": 3.202270191058809, "learning_rate": 7.240362323183438e-06, "loss": 1.5167, "step": 21085 }, { "epoch": 0.6218487394957983, "grad_norm": 3.163354344362959, "learning_rate": 7.240127075430649e-06, "loss": 1.4913, "step": 21090 }, { "epoch": 0.6219961668878078, "grad_norm": 3.068443773491116, "learning_rate": 7.2398917249768865e-06, "loss": 1.5195, "step": 21095 }, { "epoch": 0.6221435942798171, "grad_norm": 2.930413750414672, "learning_rate": 7.239656271829076e-06, "loss": 1.5279, "step": 21100 }, { "epoch": 0.6222910216718266, "grad_norm": 2.949683431369873, "learning_rate": 7.2394207159941435e-06, "loss": 1.5073, "step": 21105 }, { "epoch": 0.622438449063836, "grad_norm": 3.1103943909147356, "learning_rate": 7.239185057479023e-06, "loss": 1.5416, "step": 21110 }, { "epoch": 0.6225858764558455, "grad_norm": 3.2974051488343297, "learning_rate": 7.238949296290647e-06, "loss": 1.5253, "step": 21115 }, { "epoch": 0.622733303847855, "grad_norm": 3.0136270545754233, "learning_rate": 7.238713432435954e-06, "loss": 1.5566, "step": 21120 }, { "epoch": 0.6228807312398643, "grad_norm": 2.9445997694600794, "learning_rate": 7.238477465921885e-06, "loss": 1.4993, "step": 21125 }, { "epoch": 0.6230281586318738, "grad_norm": 3.0146307793901577, "learning_rate": 7.238241396755384e-06, "loss": 1.4937, "step": 21130 }, { "epoch": 0.6231755860238832, "grad_norm": 3.196431602561995, "learning_rate": 7.238005224943395e-06, "loss": 1.5132, "step": 21135 }, { "epoch": 0.6233230134158927, "grad_norm": 2.873806639744022, "learning_rate": 7.2377689504928705e-06, "loss": 1.5044, "step": 21140 }, { "epoch": 0.6234704408079021, "grad_norm": 3.029388006377006, "learning_rate": 7.237532573410762e-06, "loss": 1.5138, "step": 21145 }, { "epoch": 0.6236178681999115, "grad_norm": 3.069299195273014, "learning_rate": 7.237296093704024e-06, "loss": 1.4362, "step": 21150 }, { "epoch": 0.623765295591921, "grad_norm": 3.0456209753019845, "learning_rate": 7.237059511379618e-06, "loss": 1.4611, "step": 21155 }, { "epoch": 0.6239127229839304, "grad_norm": 2.94287597763038, "learning_rate": 7.236822826444504e-06, "loss": 1.4992, "step": 21160 }, { "epoch": 0.6240601503759399, "grad_norm": 2.977604533385116, "learning_rate": 7.236586038905646e-06, "loss": 1.5294, "step": 21165 }, { "epoch": 0.6242075777679493, "grad_norm": 2.95493388827791, "learning_rate": 7.236349148770013e-06, "loss": 1.5429, "step": 21170 }, { "epoch": 0.6243550051599587, "grad_norm": 3.008698137680464, "learning_rate": 7.236112156044574e-06, "loss": 1.5152, "step": 21175 }, { "epoch": 0.6245024325519681, "grad_norm": 3.234451638786134, "learning_rate": 7.2358750607363065e-06, "loss": 1.5053, "step": 21180 }, { "epoch": 0.6246498599439776, "grad_norm": 2.8689320700750534, "learning_rate": 7.235637862852183e-06, "loss": 1.4809, "step": 21185 }, { "epoch": 0.624797287335987, "grad_norm": 3.045840998963372, "learning_rate": 7.235400562399186e-06, "loss": 1.5372, "step": 21190 }, { "epoch": 0.6249447147279965, "grad_norm": 2.9579699242059734, "learning_rate": 7.235163159384296e-06, "loss": 1.5342, "step": 21195 }, { "epoch": 0.6250921421200059, "grad_norm": 4.475445162048623, "learning_rate": 7.234925653814502e-06, "loss": 1.4987, "step": 21200 }, { "epoch": 0.6252395695120153, "grad_norm": 2.903069238165078, "learning_rate": 7.234688045696791e-06, "loss": 1.4572, "step": 21205 }, { "epoch": 0.6253869969040248, "grad_norm": 3.13179274031352, "learning_rate": 7.2344503350381545e-06, "loss": 1.5623, "step": 21210 }, { "epoch": 0.6255344242960342, "grad_norm": 3.207624287355254, "learning_rate": 7.23421252184559e-06, "loss": 1.4574, "step": 21215 }, { "epoch": 0.6256818516880437, "grad_norm": 3.04540024527458, "learning_rate": 7.233974606126091e-06, "loss": 1.4738, "step": 21220 }, { "epoch": 0.625829279080053, "grad_norm": 3.0390494507736032, "learning_rate": 7.233736587886663e-06, "loss": 1.5385, "step": 21225 }, { "epoch": 0.6259767064720625, "grad_norm": 2.972679057863772, "learning_rate": 7.233498467134307e-06, "loss": 1.5157, "step": 21230 }, { "epoch": 0.626124133864072, "grad_norm": 2.8904249227942445, "learning_rate": 7.23326024387603e-06, "loss": 1.4949, "step": 21235 }, { "epoch": 0.6262715612560814, "grad_norm": 3.078663170036785, "learning_rate": 7.233021918118844e-06, "loss": 1.5424, "step": 21240 }, { "epoch": 0.6264189886480909, "grad_norm": 3.203057124811582, "learning_rate": 7.232783489869761e-06, "loss": 1.4793, "step": 21245 }, { "epoch": 0.6265664160401002, "grad_norm": 2.903565133462133, "learning_rate": 7.232544959135795e-06, "loss": 1.4344, "step": 21250 }, { "epoch": 0.6267138434321097, "grad_norm": 3.178558268307845, "learning_rate": 7.23230632592397e-06, "loss": 1.5496, "step": 21255 }, { "epoch": 0.6268612708241191, "grad_norm": 3.2053757285438196, "learning_rate": 7.232067590241302e-06, "loss": 1.5218, "step": 21260 }, { "epoch": 0.6270086982161286, "grad_norm": 3.0056268782670132, "learning_rate": 7.231828752094821e-06, "loss": 1.4663, "step": 21265 }, { "epoch": 0.6271561256081379, "grad_norm": 3.130372434328295, "learning_rate": 7.231589811491552e-06, "loss": 1.5257, "step": 21270 }, { "epoch": 0.6273035530001474, "grad_norm": 2.974826062744242, "learning_rate": 7.2313507684385285e-06, "loss": 1.5124, "step": 21275 }, { "epoch": 0.6274509803921569, "grad_norm": 2.9499542147109774, "learning_rate": 7.231111622942782e-06, "loss": 1.4728, "step": 21280 }, { "epoch": 0.6275984077841663, "grad_norm": 2.9876217313521027, "learning_rate": 7.230872375011352e-06, "loss": 1.5412, "step": 21285 }, { "epoch": 0.6277458351761758, "grad_norm": 2.9683005383760332, "learning_rate": 7.230633024651278e-06, "loss": 1.4738, "step": 21290 }, { "epoch": 0.6278932625681851, "grad_norm": 2.8677569192425096, "learning_rate": 7.230393571869601e-06, "loss": 1.4784, "step": 21295 }, { "epoch": 0.6280406899601946, "grad_norm": 2.9271509153682436, "learning_rate": 7.23015401667337e-06, "loss": 1.4898, "step": 21300 }, { "epoch": 0.628188117352204, "grad_norm": 2.942975566757925, "learning_rate": 7.229914359069633e-06, "loss": 1.4711, "step": 21305 }, { "epoch": 0.6283355447442135, "grad_norm": 2.9787052834117063, "learning_rate": 7.229674599065442e-06, "loss": 1.508, "step": 21310 }, { "epoch": 0.628482972136223, "grad_norm": 2.909373774396211, "learning_rate": 7.229434736667853e-06, "loss": 1.5108, "step": 21315 }, { "epoch": 0.6286303995282323, "grad_norm": 2.9446373541766446, "learning_rate": 7.2291947718839246e-06, "loss": 1.4487, "step": 21320 }, { "epoch": 0.6287778269202418, "grad_norm": 2.8407191977999395, "learning_rate": 7.228954704720716e-06, "loss": 1.4698, "step": 21325 }, { "epoch": 0.6289252543122512, "grad_norm": 2.8932309094731483, "learning_rate": 7.228714535185293e-06, "loss": 1.5107, "step": 21330 }, { "epoch": 0.6290726817042607, "grad_norm": 3.129900096521204, "learning_rate": 7.228474263284723e-06, "loss": 1.5151, "step": 21335 }, { "epoch": 0.6292201090962701, "grad_norm": 2.8473636615266553, "learning_rate": 7.228233889026075e-06, "loss": 1.4192, "step": 21340 }, { "epoch": 0.6293675364882795, "grad_norm": 2.92085015905669, "learning_rate": 7.227993412416423e-06, "loss": 1.466, "step": 21345 }, { "epoch": 0.6295149638802889, "grad_norm": 2.890427532008051, "learning_rate": 7.227752833462844e-06, "loss": 1.4782, "step": 21350 }, { "epoch": 0.6296623912722984, "grad_norm": 2.8692669225550307, "learning_rate": 7.227512152172417e-06, "loss": 1.5046, "step": 21355 }, { "epoch": 0.6298098186643079, "grad_norm": 3.0740665895708843, "learning_rate": 7.227271368552224e-06, "loss": 1.5309, "step": 21360 }, { "epoch": 0.6299572460563173, "grad_norm": 2.8713979866325463, "learning_rate": 7.227030482609349e-06, "loss": 1.4815, "step": 21365 }, { "epoch": 0.6301046734483267, "grad_norm": 3.0393941040920645, "learning_rate": 7.226789494350884e-06, "loss": 1.4577, "step": 21370 }, { "epoch": 0.6302521008403361, "grad_norm": 3.0865269600003544, "learning_rate": 7.226548403783916e-06, "loss": 1.5338, "step": 21375 }, { "epoch": 0.6303995282323456, "grad_norm": 3.031692967311904, "learning_rate": 7.226307210915542e-06, "loss": 1.4605, "step": 21380 }, { "epoch": 0.630546955624355, "grad_norm": 3.0506561219996633, "learning_rate": 7.226065915752859e-06, "loss": 1.4939, "step": 21385 }, { "epoch": 0.6306943830163645, "grad_norm": 3.0281758866766006, "learning_rate": 7.225824518302967e-06, "loss": 1.5144, "step": 21390 }, { "epoch": 0.6308418104083738, "grad_norm": 3.2341091091307437, "learning_rate": 7.22558301857297e-06, "loss": 1.5316, "step": 21395 }, { "epoch": 0.6309892378003833, "grad_norm": 2.905605827035142, "learning_rate": 7.225341416569973e-06, "loss": 1.5458, "step": 21400 }, { "epoch": 0.6311366651923928, "grad_norm": 2.944439529713176, "learning_rate": 7.2250997123010875e-06, "loss": 1.4737, "step": 21405 }, { "epoch": 0.6312840925844022, "grad_norm": 2.9632282917598642, "learning_rate": 7.224857905773423e-06, "loss": 1.5235, "step": 21410 }, { "epoch": 0.6314315199764117, "grad_norm": 3.116501193997897, "learning_rate": 7.224615996994099e-06, "loss": 1.5064, "step": 21415 }, { "epoch": 0.631578947368421, "grad_norm": 2.919795486322421, "learning_rate": 7.224373985970231e-06, "loss": 1.5294, "step": 21420 }, { "epoch": 0.6317263747604305, "grad_norm": 2.9763040249966073, "learning_rate": 7.224131872708941e-06, "loss": 1.454, "step": 21425 }, { "epoch": 0.6318738021524399, "grad_norm": 2.9099272538481795, "learning_rate": 7.223889657217353e-06, "loss": 1.5284, "step": 21430 }, { "epoch": 0.6320212295444494, "grad_norm": 2.954909395696258, "learning_rate": 7.223647339502596e-06, "loss": 1.5403, "step": 21435 }, { "epoch": 0.6321686569364587, "grad_norm": 2.8199863566151446, "learning_rate": 7.223404919571799e-06, "loss": 1.5112, "step": 21440 }, { "epoch": 0.6323160843284682, "grad_norm": 2.936815412084994, "learning_rate": 7.223162397432096e-06, "loss": 1.5229, "step": 21445 }, { "epoch": 0.6324635117204777, "grad_norm": 2.85427070288374, "learning_rate": 7.2229197730906244e-06, "loss": 1.485, "step": 21450 }, { "epoch": 0.6326109391124871, "grad_norm": 2.891356014063386, "learning_rate": 7.222677046554521e-06, "loss": 1.4793, "step": 21455 }, { "epoch": 0.6327583665044966, "grad_norm": 3.106582886960492, "learning_rate": 7.222434217830932e-06, "loss": 1.4719, "step": 21460 }, { "epoch": 0.6329057938965059, "grad_norm": 3.0284494429858286, "learning_rate": 7.2221912869270005e-06, "loss": 1.5548, "step": 21465 }, { "epoch": 0.6330532212885154, "grad_norm": 2.778702560385949, "learning_rate": 7.2219482538498754e-06, "loss": 1.4695, "step": 21470 }, { "epoch": 0.6332006486805248, "grad_norm": 2.9337062117259074, "learning_rate": 7.221705118606708e-06, "loss": 1.5056, "step": 21475 }, { "epoch": 0.6333480760725343, "grad_norm": 2.932847345796518, "learning_rate": 7.221461881204653e-06, "loss": 1.5151, "step": 21480 }, { "epoch": 0.6334955034645438, "grad_norm": 2.8129325126386764, "learning_rate": 7.221218541650868e-06, "loss": 1.4828, "step": 21485 }, { "epoch": 0.6336429308565531, "grad_norm": 3.1604777914809863, "learning_rate": 7.220975099952515e-06, "loss": 1.5247, "step": 21490 }, { "epoch": 0.6337903582485626, "grad_norm": 3.159989040312956, "learning_rate": 7.220731556116756e-06, "loss": 1.4544, "step": 21495 }, { "epoch": 0.633937785640572, "grad_norm": 2.8723585157037794, "learning_rate": 7.220487910150758e-06, "loss": 1.5025, "step": 21500 }, { "epoch": 0.633937785640572, "eval_loss": 1.1763789653778076, "eval_runtime": 5.1582, "eval_samples_per_second": 76.77, "eval_steps_per_second": 2.52, "step": 21500 }, { "epoch": 0.6340852130325815, "grad_norm": 2.8805517674273404, "learning_rate": 7.220244162061689e-06, "loss": 1.476, "step": 21505 }, { "epoch": 0.6342326404245909, "grad_norm": 2.839223833735017, "learning_rate": 7.220000311856724e-06, "loss": 1.4846, "step": 21510 }, { "epoch": 0.6343800678166003, "grad_norm": 2.951285392213677, "learning_rate": 7.219756359543037e-06, "loss": 1.4972, "step": 21515 }, { "epoch": 0.6345274952086097, "grad_norm": 2.9572166524123285, "learning_rate": 7.219512305127808e-06, "loss": 1.4982, "step": 21520 }, { "epoch": 0.6346749226006192, "grad_norm": 2.9837330388439676, "learning_rate": 7.219268148618216e-06, "loss": 1.4645, "step": 21525 }, { "epoch": 0.6348223499926287, "grad_norm": 2.9620683613508008, "learning_rate": 7.219023890021449e-06, "loss": 1.5147, "step": 21530 }, { "epoch": 0.6349697773846381, "grad_norm": 2.9916583495433478, "learning_rate": 7.218779529344692e-06, "loss": 1.4788, "step": 21535 }, { "epoch": 0.6351172047766475, "grad_norm": 2.985635695341296, "learning_rate": 7.218535066595136e-06, "loss": 1.449, "step": 21540 }, { "epoch": 0.6352646321686569, "grad_norm": 2.94728045346511, "learning_rate": 7.218290501779975e-06, "loss": 1.4513, "step": 21545 }, { "epoch": 0.6354120595606664, "grad_norm": 2.8690665712264614, "learning_rate": 7.218045834906407e-06, "loss": 1.477, "step": 21550 }, { "epoch": 0.6355594869526758, "grad_norm": 3.0620352112611546, "learning_rate": 7.217801065981629e-06, "loss": 1.4732, "step": 21555 }, { "epoch": 0.6357069143446853, "grad_norm": 2.9730227837863548, "learning_rate": 7.217556195012844e-06, "loss": 1.5047, "step": 21560 }, { "epoch": 0.6358543417366946, "grad_norm": 3.0916334746821392, "learning_rate": 7.2173112220072605e-06, "loss": 1.5203, "step": 21565 }, { "epoch": 0.6360017691287041, "grad_norm": 2.9619967124666666, "learning_rate": 7.2170661469720835e-06, "loss": 1.4846, "step": 21570 }, { "epoch": 0.6361491965207136, "grad_norm": 2.919054055993886, "learning_rate": 7.2168209699145265e-06, "loss": 1.4833, "step": 21575 }, { "epoch": 0.636296623912723, "grad_norm": 3.119277421117267, "learning_rate": 7.216575690841804e-06, "loss": 1.4926, "step": 21580 }, { "epoch": 0.6364440513047325, "grad_norm": 2.9681856204413264, "learning_rate": 7.216330309761133e-06, "loss": 1.5156, "step": 21585 }, { "epoch": 0.6365914786967418, "grad_norm": 2.9443233625210414, "learning_rate": 7.2160848266797355e-06, "loss": 1.5096, "step": 21590 }, { "epoch": 0.6367389060887513, "grad_norm": 3.26266278792463, "learning_rate": 7.215839241604834e-06, "loss": 1.5612, "step": 21595 }, { "epoch": 0.6368863334807607, "grad_norm": 2.8925455946695133, "learning_rate": 7.215593554543655e-06, "loss": 1.521, "step": 21600 }, { "epoch": 0.6370337608727702, "grad_norm": 3.049913127182938, "learning_rate": 7.215347765503428e-06, "loss": 1.492, "step": 21605 }, { "epoch": 0.6371811882647795, "grad_norm": 2.9667562843976234, "learning_rate": 7.215101874491385e-06, "loss": 1.5156, "step": 21610 }, { "epoch": 0.637328615656789, "grad_norm": 3.036286237240698, "learning_rate": 7.2148558815147645e-06, "loss": 1.511, "step": 21615 }, { "epoch": 0.6374760430487985, "grad_norm": 2.9076231334425486, "learning_rate": 7.214609786580802e-06, "loss": 1.5036, "step": 21620 }, { "epoch": 0.6376234704408079, "grad_norm": 3.0013972577594394, "learning_rate": 7.21436358969674e-06, "loss": 1.5079, "step": 21625 }, { "epoch": 0.6377708978328174, "grad_norm": 2.937100633607573, "learning_rate": 7.214117290869825e-06, "loss": 1.5104, "step": 21630 }, { "epoch": 0.6379183252248267, "grad_norm": 2.7699838030474853, "learning_rate": 7.213870890107302e-06, "loss": 1.4532, "step": 21635 }, { "epoch": 0.6380657526168362, "grad_norm": 2.8942707744609812, "learning_rate": 7.213624387416424e-06, "loss": 1.5086, "step": 21640 }, { "epoch": 0.6382131800088456, "grad_norm": 2.8275894733252227, "learning_rate": 7.213377782804442e-06, "loss": 1.4659, "step": 21645 }, { "epoch": 0.6383606074008551, "grad_norm": 3.0458357076614324, "learning_rate": 7.213131076278615e-06, "loss": 1.5155, "step": 21650 }, { "epoch": 0.6385080347928646, "grad_norm": 2.777793603929982, "learning_rate": 7.212884267846201e-06, "loss": 1.4955, "step": 21655 }, { "epoch": 0.6386554621848739, "grad_norm": 3.0725599065937312, "learning_rate": 7.2126373575144625e-06, "loss": 1.512, "step": 21660 }, { "epoch": 0.6388028895768834, "grad_norm": 2.888749066815933, "learning_rate": 7.212390345290667e-06, "loss": 1.5069, "step": 21665 }, { "epoch": 0.6389503169688928, "grad_norm": 2.880441077242244, "learning_rate": 7.212143231182081e-06, "loss": 1.4921, "step": 21670 }, { "epoch": 0.6390977443609023, "grad_norm": 2.9638488317152714, "learning_rate": 7.211896015195978e-06, "loss": 1.5, "step": 21675 }, { "epoch": 0.6392451717529117, "grad_norm": 3.1312646689901626, "learning_rate": 7.211648697339632e-06, "loss": 1.5278, "step": 21680 }, { "epoch": 0.6393925991449211, "grad_norm": 3.07722686870135, "learning_rate": 7.211401277620319e-06, "loss": 1.5074, "step": 21685 }, { "epoch": 0.6395400265369305, "grad_norm": 2.794426579654041, "learning_rate": 7.211153756045321e-06, "loss": 1.484, "step": 21690 }, { "epoch": 0.63968745392894, "grad_norm": 2.9640315961629735, "learning_rate": 7.210906132621922e-06, "loss": 1.4653, "step": 21695 }, { "epoch": 0.6398348813209495, "grad_norm": 3.117143579839957, "learning_rate": 7.210658407357408e-06, "loss": 1.5024, "step": 21700 }, { "epoch": 0.6399823087129589, "grad_norm": 2.9702373429597415, "learning_rate": 7.210410580259069e-06, "loss": 1.4962, "step": 21705 }, { "epoch": 0.6401297361049683, "grad_norm": 3.0374140566985868, "learning_rate": 7.210162651334197e-06, "loss": 1.5384, "step": 21710 }, { "epoch": 0.6402771634969777, "grad_norm": 3.073065953238068, "learning_rate": 7.209914620590088e-06, "loss": 1.5246, "step": 21715 }, { "epoch": 0.6404245908889872, "grad_norm": 2.9185022175482853, "learning_rate": 7.209666488034042e-06, "loss": 1.5339, "step": 21720 }, { "epoch": 0.6405720182809966, "grad_norm": 2.9625208609273277, "learning_rate": 7.2094182536733565e-06, "loss": 1.5227, "step": 21725 }, { "epoch": 0.6407194456730061, "grad_norm": 2.804309732664351, "learning_rate": 7.2091699175153395e-06, "loss": 1.4784, "step": 21730 }, { "epoch": 0.6408668730650154, "grad_norm": 2.913359408854471, "learning_rate": 7.208921479567298e-06, "loss": 1.5003, "step": 21735 }, { "epoch": 0.6410143004570249, "grad_norm": 2.9569427966905724, "learning_rate": 7.208672939836542e-06, "loss": 1.5219, "step": 21740 }, { "epoch": 0.6411617278490344, "grad_norm": 2.9663375292595346, "learning_rate": 7.208424298330386e-06, "loss": 1.4769, "step": 21745 }, { "epoch": 0.6413091552410438, "grad_norm": 2.915531543219277, "learning_rate": 7.208175555056145e-06, "loss": 1.4801, "step": 21750 }, { "epoch": 0.6414565826330533, "grad_norm": 2.6537149792513035, "learning_rate": 7.20792671002114e-06, "loss": 1.4501, "step": 21755 }, { "epoch": 0.6416040100250626, "grad_norm": 6.313045002283292, "learning_rate": 7.207677763232693e-06, "loss": 1.5599, "step": 21760 }, { "epoch": 0.6417514374170721, "grad_norm": 3.0911581491004987, "learning_rate": 7.207428714698129e-06, "loss": 1.5332, "step": 21765 }, { "epoch": 0.6418988648090815, "grad_norm": 3.065510175646678, "learning_rate": 7.207179564424779e-06, "loss": 1.4713, "step": 21770 }, { "epoch": 0.642046292201091, "grad_norm": 3.0340402456983475, "learning_rate": 7.206930312419971e-06, "loss": 1.4606, "step": 21775 }, { "epoch": 0.6421937195931003, "grad_norm": 3.0272869175475634, "learning_rate": 7.2066809586910405e-06, "loss": 1.5046, "step": 21780 }, { "epoch": 0.6423411469851098, "grad_norm": 2.9217653472397807, "learning_rate": 7.206431503245327e-06, "loss": 1.4863, "step": 21785 }, { "epoch": 0.6424885743771193, "grad_norm": 2.778885865117478, "learning_rate": 7.206181946090169e-06, "loss": 1.5302, "step": 21790 }, { "epoch": 0.6426360017691287, "grad_norm": 3.080108183184536, "learning_rate": 7.205932287232911e-06, "loss": 1.5404, "step": 21795 }, { "epoch": 0.6427834291611382, "grad_norm": 2.998074411345042, "learning_rate": 7.205682526680899e-06, "loss": 1.4855, "step": 21800 }, { "epoch": 0.6429308565531475, "grad_norm": 3.012252872475506, "learning_rate": 7.205432664441483e-06, "loss": 1.4842, "step": 21805 }, { "epoch": 0.643078283945157, "grad_norm": 2.9748551858750196, "learning_rate": 7.205182700522016e-06, "loss": 1.4829, "step": 21810 }, { "epoch": 0.6432257113371664, "grad_norm": 2.913560653238869, "learning_rate": 7.204932634929851e-06, "loss": 1.5387, "step": 21815 }, { "epoch": 0.6433731387291759, "grad_norm": 2.9199189206271474, "learning_rate": 7.204682467672348e-06, "loss": 1.4747, "step": 21820 }, { "epoch": 0.6435205661211854, "grad_norm": 2.832416115013388, "learning_rate": 7.20443219875687e-06, "loss": 1.5097, "step": 21825 }, { "epoch": 0.6436679935131947, "grad_norm": 3.0293544028392128, "learning_rate": 7.204181828190778e-06, "loss": 1.516, "step": 21830 }, { "epoch": 0.6438154209052042, "grad_norm": 2.864420406002078, "learning_rate": 7.203931355981444e-06, "loss": 1.4622, "step": 21835 }, { "epoch": 0.6439628482972136, "grad_norm": 3.3001774376082103, "learning_rate": 7.2036807821362335e-06, "loss": 1.5218, "step": 21840 }, { "epoch": 0.6441102756892231, "grad_norm": 2.8928921083917354, "learning_rate": 7.203430106662525e-06, "loss": 1.5225, "step": 21845 }, { "epoch": 0.6442577030812325, "grad_norm": 2.817317778275006, "learning_rate": 7.20317932956769e-06, "loss": 1.4812, "step": 21850 }, { "epoch": 0.6444051304732419, "grad_norm": 2.7709076367006866, "learning_rate": 7.2029284508591105e-06, "loss": 1.5098, "step": 21855 }, { "epoch": 0.6445525578652513, "grad_norm": 2.9611760040891855, "learning_rate": 7.202677470544168e-06, "loss": 1.5197, "step": 21860 }, { "epoch": 0.6446999852572608, "grad_norm": 3.123657158859859, "learning_rate": 7.2024263886302495e-06, "loss": 1.5375, "step": 21865 }, { "epoch": 0.6448474126492703, "grad_norm": 2.882915421636426, "learning_rate": 7.2021752051247425e-06, "loss": 1.5012, "step": 21870 }, { "epoch": 0.6449948400412797, "grad_norm": 3.020239862152375, "learning_rate": 7.201923920035037e-06, "loss": 1.5398, "step": 21875 }, { "epoch": 0.6451422674332891, "grad_norm": 2.942634070633396, "learning_rate": 7.201672533368528e-06, "loss": 1.4689, "step": 21880 }, { "epoch": 0.6452896948252985, "grad_norm": 2.897391512893972, "learning_rate": 7.201421045132616e-06, "loss": 1.5242, "step": 21885 }, { "epoch": 0.645437122217308, "grad_norm": 3.1544893491139923, "learning_rate": 7.201169455334695e-06, "loss": 1.518, "step": 21890 }, { "epoch": 0.6455845496093174, "grad_norm": 3.037315132729366, "learning_rate": 7.2009177639821755e-06, "loss": 1.4646, "step": 21895 }, { "epoch": 0.6457319770013269, "grad_norm": 3.0346959689748902, "learning_rate": 7.2006659710824585e-06, "loss": 1.5169, "step": 21900 }, { "epoch": 0.6458794043933362, "grad_norm": 2.958008971355786, "learning_rate": 7.200414076642954e-06, "loss": 1.4783, "step": 21905 }, { "epoch": 0.6460268317853457, "grad_norm": 2.775762321708978, "learning_rate": 7.200162080671078e-06, "loss": 1.4845, "step": 21910 }, { "epoch": 0.6461742591773552, "grad_norm": 2.894510594280835, "learning_rate": 7.19990998317424e-06, "loss": 1.4949, "step": 21915 }, { "epoch": 0.6463216865693646, "grad_norm": 2.9892788796401084, "learning_rate": 7.199657784159864e-06, "loss": 1.5408, "step": 21920 }, { "epoch": 0.6464691139613741, "grad_norm": 3.2863669154492716, "learning_rate": 7.199405483635368e-06, "loss": 1.5138, "step": 21925 }, { "epoch": 0.6466165413533834, "grad_norm": 2.9851574110987276, "learning_rate": 7.199153081608176e-06, "loss": 1.5052, "step": 21930 }, { "epoch": 0.6467639687453929, "grad_norm": 2.9420515283929785, "learning_rate": 7.198900578085716e-06, "loss": 1.4904, "step": 21935 }, { "epoch": 0.6469113961374023, "grad_norm": 2.8491124677825734, "learning_rate": 7.198647973075419e-06, "loss": 1.4955, "step": 21940 }, { "epoch": 0.6470588235294118, "grad_norm": 3.4375909121988686, "learning_rate": 7.198395266584717e-06, "loss": 1.5146, "step": 21945 }, { "epoch": 0.6472062509214213, "grad_norm": 2.9158542536028125, "learning_rate": 7.1981424586210465e-06, "loss": 1.522, "step": 21950 }, { "epoch": 0.6473536783134306, "grad_norm": 3.0323286501197346, "learning_rate": 7.197889549191847e-06, "loss": 1.5034, "step": 21955 }, { "epoch": 0.6475011057054401, "grad_norm": 2.879859296518601, "learning_rate": 7.197636538304561e-06, "loss": 1.5375, "step": 21960 }, { "epoch": 0.6476485330974495, "grad_norm": 2.8927981343493903, "learning_rate": 7.1973834259666325e-06, "loss": 1.4791, "step": 21965 }, { "epoch": 0.647795960489459, "grad_norm": 3.0834546094002278, "learning_rate": 7.197130212185511e-06, "loss": 1.5678, "step": 21970 }, { "epoch": 0.6479433878814683, "grad_norm": 3.0669391586740335, "learning_rate": 7.1968768969686446e-06, "loss": 1.5215, "step": 21975 }, { "epoch": 0.6480908152734778, "grad_norm": 2.973211811829555, "learning_rate": 7.1966234803234915e-06, "loss": 1.4759, "step": 21980 }, { "epoch": 0.6482382426654872, "grad_norm": 2.988384575539955, "learning_rate": 7.196369962257506e-06, "loss": 1.5089, "step": 21985 }, { "epoch": 0.6483856700574967, "grad_norm": 2.946760312404334, "learning_rate": 7.19611634277815e-06, "loss": 1.5, "step": 21990 }, { "epoch": 0.6485330974495062, "grad_norm": 2.8642835957342805, "learning_rate": 7.195862621892885e-06, "loss": 1.5179, "step": 21995 }, { "epoch": 0.6486805248415155, "grad_norm": 2.982266109079571, "learning_rate": 7.195608799609177e-06, "loss": 1.4493, "step": 22000 }, { "epoch": 0.6486805248415155, "eval_loss": 1.1728267669677734, "eval_runtime": 4.3012, "eval_samples_per_second": 92.068, "eval_steps_per_second": 3.022, "step": 22000 }, { "epoch": 0.648827952233525, "grad_norm": 3.067343882903477, "learning_rate": 7.195354875934497e-06, "loss": 1.5105, "step": 22005 }, { "epoch": 0.6489753796255344, "grad_norm": 2.8671251685807535, "learning_rate": 7.195100850876314e-06, "loss": 1.5011, "step": 22010 }, { "epoch": 0.6491228070175439, "grad_norm": 2.962960697404298, "learning_rate": 7.194846724442106e-06, "loss": 1.5356, "step": 22015 }, { "epoch": 0.6492702344095533, "grad_norm": 2.945022330532729, "learning_rate": 7.194592496639349e-06, "loss": 1.4659, "step": 22020 }, { "epoch": 0.6494176618015627, "grad_norm": 2.887695886063684, "learning_rate": 7.194338167475524e-06, "loss": 1.5371, "step": 22025 }, { "epoch": 0.6495650891935721, "grad_norm": 2.9046013572323237, "learning_rate": 7.194083736958116e-06, "loss": 1.5142, "step": 22030 }, { "epoch": 0.6497125165855816, "grad_norm": 2.9308727969466557, "learning_rate": 7.1938292050946115e-06, "loss": 1.5034, "step": 22035 }, { "epoch": 0.6498599439775911, "grad_norm": 2.918670436208256, "learning_rate": 7.1935745718925e-06, "loss": 1.4841, "step": 22040 }, { "epoch": 0.6500073713696005, "grad_norm": 3.519907424455139, "learning_rate": 7.193319837359273e-06, "loss": 1.5126, "step": 22045 }, { "epoch": 0.6501547987616099, "grad_norm": 3.1821791009024083, "learning_rate": 7.19306500150243e-06, "loss": 1.5196, "step": 22050 }, { "epoch": 0.6503022261536193, "grad_norm": 3.17221529814453, "learning_rate": 7.192810064329468e-06, "loss": 1.5061, "step": 22055 }, { "epoch": 0.6504496535456288, "grad_norm": 2.895048602441336, "learning_rate": 7.192555025847887e-06, "loss": 1.4737, "step": 22060 }, { "epoch": 0.6505970809376382, "grad_norm": 2.8883042978423235, "learning_rate": 7.192299886065194e-06, "loss": 1.4852, "step": 22065 }, { "epoch": 0.6507445083296477, "grad_norm": 2.860796279938906, "learning_rate": 7.192044644988895e-06, "loss": 1.5072, "step": 22070 }, { "epoch": 0.650891935721657, "grad_norm": 2.8526746660277262, "learning_rate": 7.191789302626504e-06, "loss": 1.5282, "step": 22075 }, { "epoch": 0.6510393631136665, "grad_norm": 3.037542970346037, "learning_rate": 7.191533858985531e-06, "loss": 1.5101, "step": 22080 }, { "epoch": 0.651186790505676, "grad_norm": 2.8925802410163337, "learning_rate": 7.191278314073496e-06, "loss": 1.4757, "step": 22085 }, { "epoch": 0.6513342178976854, "grad_norm": 2.87581724721276, "learning_rate": 7.191022667897916e-06, "loss": 1.474, "step": 22090 }, { "epoch": 0.6514816452896949, "grad_norm": 2.9698154203566585, "learning_rate": 7.190766920466315e-06, "loss": 1.5309, "step": 22095 }, { "epoch": 0.6516290726817042, "grad_norm": 2.8789481561695442, "learning_rate": 7.1905110717862186e-06, "loss": 1.5453, "step": 22100 }, { "epoch": 0.6517765000737137, "grad_norm": 2.956301718020041, "learning_rate": 7.190255121865155e-06, "loss": 1.4872, "step": 22105 }, { "epoch": 0.6519239274657231, "grad_norm": 3.0608091208389716, "learning_rate": 7.1899990707106556e-06, "loss": 1.4911, "step": 22110 }, { "epoch": 0.6520713548577326, "grad_norm": 2.910407335709711, "learning_rate": 7.1897429183302565e-06, "loss": 1.4767, "step": 22115 }, { "epoch": 0.6522187822497421, "grad_norm": 2.9390743796840098, "learning_rate": 7.189486664731494e-06, "loss": 1.5249, "step": 22120 }, { "epoch": 0.6523662096417514, "grad_norm": 2.9406696882815466, "learning_rate": 7.189230309921908e-06, "loss": 1.544, "step": 22125 }, { "epoch": 0.6525136370337609, "grad_norm": 2.953801430412549, "learning_rate": 7.188973853909043e-06, "loss": 1.5229, "step": 22130 }, { "epoch": 0.6526610644257703, "grad_norm": 2.9737594324043224, "learning_rate": 7.188717296700447e-06, "loss": 1.5325, "step": 22135 }, { "epoch": 0.6528084918177798, "grad_norm": 3.1295599187414487, "learning_rate": 7.188460638303667e-06, "loss": 1.5235, "step": 22140 }, { "epoch": 0.6529559192097891, "grad_norm": 2.8653911091042255, "learning_rate": 7.188203878726256e-06, "loss": 1.55, "step": 22145 }, { "epoch": 0.6531033466017986, "grad_norm": 2.9515211356935716, "learning_rate": 7.18794701797577e-06, "loss": 1.5445, "step": 22150 }, { "epoch": 0.653250773993808, "grad_norm": 3.0826808073246386, "learning_rate": 7.187690056059768e-06, "loss": 1.5032, "step": 22155 }, { "epoch": 0.6533982013858175, "grad_norm": 3.0271369783191555, "learning_rate": 7.187432992985811e-06, "loss": 1.4982, "step": 22160 }, { "epoch": 0.653545628777827, "grad_norm": 2.8270710389761042, "learning_rate": 7.187175828761462e-06, "loss": 1.4442, "step": 22165 }, { "epoch": 0.6536930561698363, "grad_norm": 3.1632091884867317, "learning_rate": 7.186918563394289e-06, "loss": 1.4788, "step": 22170 }, { "epoch": 0.6538404835618458, "grad_norm": 2.9652087280148627, "learning_rate": 7.186661196891863e-06, "loss": 1.5302, "step": 22175 }, { "epoch": 0.6539879109538552, "grad_norm": 2.891294447870898, "learning_rate": 7.1864037292617575e-06, "loss": 1.5017, "step": 22180 }, { "epoch": 0.6541353383458647, "grad_norm": 2.974854201505897, "learning_rate": 7.186146160511549e-06, "loss": 1.4639, "step": 22185 }, { "epoch": 0.6542827657378741, "grad_norm": 2.8996452065420484, "learning_rate": 7.185888490648814e-06, "loss": 1.5032, "step": 22190 }, { "epoch": 0.6544301931298835, "grad_norm": 2.94258019348109, "learning_rate": 7.185630719681139e-06, "loss": 1.505, "step": 22195 }, { "epoch": 0.6545776205218929, "grad_norm": 2.7539926581920047, "learning_rate": 7.1853728476161065e-06, "loss": 1.4985, "step": 22200 }, { "epoch": 0.6547250479139024, "grad_norm": 3.010620092674269, "learning_rate": 7.185114874461304e-06, "loss": 1.4972, "step": 22205 }, { "epoch": 0.6548724753059119, "grad_norm": 2.7356950878105444, "learning_rate": 7.184856800224324e-06, "loss": 1.5186, "step": 22210 }, { "epoch": 0.6550199026979213, "grad_norm": 2.926799320269882, "learning_rate": 7.184598624912762e-06, "loss": 1.4417, "step": 22215 }, { "epoch": 0.6551673300899307, "grad_norm": 2.9049312701417715, "learning_rate": 7.184340348534212e-06, "loss": 1.5173, "step": 22220 }, { "epoch": 0.6553147574819401, "grad_norm": 2.833841186163319, "learning_rate": 7.184081971096276e-06, "loss": 1.5377, "step": 22225 }, { "epoch": 0.6554621848739496, "grad_norm": 2.9233909223776418, "learning_rate": 7.183823492606557e-06, "loss": 1.5341, "step": 22230 }, { "epoch": 0.655609612265959, "grad_norm": 3.0199702935395867, "learning_rate": 7.18356491307266e-06, "loss": 1.5082, "step": 22235 }, { "epoch": 0.6557570396579685, "grad_norm": 2.8451311062971816, "learning_rate": 7.183306232502197e-06, "loss": 1.5268, "step": 22240 }, { "epoch": 0.6559044670499778, "grad_norm": 2.884476613568832, "learning_rate": 7.183047450902776e-06, "loss": 1.4967, "step": 22245 }, { "epoch": 0.6560518944419873, "grad_norm": 3.0413094184796905, "learning_rate": 7.182788568282013e-06, "loss": 1.541, "step": 22250 }, { "epoch": 0.6561993218339968, "grad_norm": 2.8267836097665904, "learning_rate": 7.182529584647527e-06, "loss": 1.4936, "step": 22255 }, { "epoch": 0.6563467492260062, "grad_norm": 2.885674100844239, "learning_rate": 7.182270500006939e-06, "loss": 1.5812, "step": 22260 }, { "epoch": 0.6564941766180157, "grad_norm": 3.024222320756331, "learning_rate": 7.182011314367871e-06, "loss": 1.4986, "step": 22265 }, { "epoch": 0.656641604010025, "grad_norm": 2.786924286488386, "learning_rate": 7.181752027737953e-06, "loss": 1.4539, "step": 22270 }, { "epoch": 0.6567890314020345, "grad_norm": 2.9663411362757235, "learning_rate": 7.181492640124811e-06, "loss": 1.5017, "step": 22275 }, { "epoch": 0.6569364587940439, "grad_norm": 2.853510545374861, "learning_rate": 7.181233151536082e-06, "loss": 1.4579, "step": 22280 }, { "epoch": 0.6570838861860534, "grad_norm": 2.9687440212882965, "learning_rate": 7.180973561979398e-06, "loss": 1.5466, "step": 22285 }, { "epoch": 0.6572313135780629, "grad_norm": 3.006116594460592, "learning_rate": 7.180713871462398e-06, "loss": 1.5204, "step": 22290 }, { "epoch": 0.6573787409700722, "grad_norm": 2.9352645165574227, "learning_rate": 7.180454079992727e-06, "loss": 1.5015, "step": 22295 }, { "epoch": 0.6575261683620817, "grad_norm": 2.8782237410290823, "learning_rate": 7.180194187578026e-06, "loss": 1.5381, "step": 22300 }, { "epoch": 0.6576735957540911, "grad_norm": 2.909837661420052, "learning_rate": 7.179934194225946e-06, "loss": 1.5322, "step": 22305 }, { "epoch": 0.6578210231461006, "grad_norm": 3.0776417816219075, "learning_rate": 7.179674099944134e-06, "loss": 1.5264, "step": 22310 }, { "epoch": 0.6579684505381099, "grad_norm": 3.465567720078928, "learning_rate": 7.179413904740246e-06, "loss": 1.4786, "step": 22315 }, { "epoch": 0.6581158779301194, "grad_norm": 2.8588601524605712, "learning_rate": 7.179153608621937e-06, "loss": 1.4578, "step": 22320 }, { "epoch": 0.6582633053221288, "grad_norm": 3.0673409248728345, "learning_rate": 7.178893211596869e-06, "loss": 1.4512, "step": 22325 }, { "epoch": 0.6584107327141383, "grad_norm": 2.888028012253292, "learning_rate": 7.178632713672702e-06, "loss": 1.501, "step": 22330 }, { "epoch": 0.6585581601061478, "grad_norm": 3.09312390306036, "learning_rate": 7.178372114857102e-06, "loss": 1.513, "step": 22335 }, { "epoch": 0.6587055874981571, "grad_norm": 3.0706823856754064, "learning_rate": 7.178111415157739e-06, "loss": 1.5454, "step": 22340 }, { "epoch": 0.6588530148901666, "grad_norm": 2.9145284671601237, "learning_rate": 7.177850614582281e-06, "loss": 1.4687, "step": 22345 }, { "epoch": 0.659000442282176, "grad_norm": 2.962202837935642, "learning_rate": 7.177589713138406e-06, "loss": 1.4736, "step": 22350 }, { "epoch": 0.6591478696741855, "grad_norm": 2.7949799082659728, "learning_rate": 7.17732871083379e-06, "loss": 1.5022, "step": 22355 }, { "epoch": 0.6592952970661949, "grad_norm": 2.826419149996121, "learning_rate": 7.177067607676112e-06, "loss": 1.5165, "step": 22360 }, { "epoch": 0.6594427244582043, "grad_norm": 2.9321582124907746, "learning_rate": 7.176806403673057e-06, "loss": 1.4927, "step": 22365 }, { "epoch": 0.6595901518502137, "grad_norm": 2.8948508671085946, "learning_rate": 7.176545098832311e-06, "loss": 1.4785, "step": 22370 }, { "epoch": 0.6597375792422232, "grad_norm": 2.935832216900248, "learning_rate": 7.176283693161562e-06, "loss": 1.4837, "step": 22375 }, { "epoch": 0.6598850066342327, "grad_norm": 2.9173234734719284, "learning_rate": 7.176022186668503e-06, "loss": 1.4812, "step": 22380 }, { "epoch": 0.6600324340262421, "grad_norm": 2.7800071744123582, "learning_rate": 7.17576057936083e-06, "loss": 1.4701, "step": 22385 }, { "epoch": 0.6601798614182515, "grad_norm": 2.7831718939446355, "learning_rate": 7.175498871246239e-06, "loss": 1.505, "step": 22390 }, { "epoch": 0.6603272888102609, "grad_norm": 3.389907046945688, "learning_rate": 7.175237062332432e-06, "loss": 1.4966, "step": 22395 }, { "epoch": 0.6604747162022704, "grad_norm": 2.9826683098699487, "learning_rate": 7.174975152627114e-06, "loss": 1.5042, "step": 22400 }, { "epoch": 0.6606221435942798, "grad_norm": 2.780418176085752, "learning_rate": 7.174713142137991e-06, "loss": 1.454, "step": 22405 }, { "epoch": 0.6607695709862893, "grad_norm": 2.8285439092370215, "learning_rate": 7.174451030872773e-06, "loss": 1.5228, "step": 22410 }, { "epoch": 0.6609169983782986, "grad_norm": 3.094659714680067, "learning_rate": 7.174188818839174e-06, "loss": 1.5534, "step": 22415 }, { "epoch": 0.6610644257703081, "grad_norm": 2.9137623761397373, "learning_rate": 7.173926506044907e-06, "loss": 1.509, "step": 22420 }, { "epoch": 0.6612118531623176, "grad_norm": 2.9601929033291325, "learning_rate": 7.173664092497695e-06, "loss": 1.5113, "step": 22425 }, { "epoch": 0.661359280554327, "grad_norm": 2.986060118210185, "learning_rate": 7.173401578205257e-06, "loss": 1.5286, "step": 22430 }, { "epoch": 0.6615067079463365, "grad_norm": 2.9017913060740925, "learning_rate": 7.173138963175318e-06, "loss": 1.5285, "step": 22435 }, { "epoch": 0.6616541353383458, "grad_norm": 2.9432360745113346, "learning_rate": 7.172876247415606e-06, "loss": 1.4723, "step": 22440 }, { "epoch": 0.6618015627303553, "grad_norm": 2.8620103204435887, "learning_rate": 7.172613430933853e-06, "loss": 1.508, "step": 22445 }, { "epoch": 0.6619489901223647, "grad_norm": 2.8745288712050145, "learning_rate": 7.172350513737791e-06, "loss": 1.4765, "step": 22450 }, { "epoch": 0.6620964175143742, "grad_norm": 2.916908853747911, "learning_rate": 7.172087495835157e-06, "loss": 1.5458, "step": 22455 }, { "epoch": 0.6622438449063837, "grad_norm": 2.957023683151354, "learning_rate": 7.171824377233692e-06, "loss": 1.5007, "step": 22460 }, { "epoch": 0.662391272298393, "grad_norm": 2.824853784490956, "learning_rate": 7.171561157941136e-06, "loss": 1.4817, "step": 22465 }, { "epoch": 0.6625386996904025, "grad_norm": 2.8238836366142133, "learning_rate": 7.1712978379652355e-06, "loss": 1.5013, "step": 22470 }, { "epoch": 0.6626861270824119, "grad_norm": 2.975664293716427, "learning_rate": 7.17103441731374e-06, "loss": 1.4948, "step": 22475 }, { "epoch": 0.6628335544744214, "grad_norm": 2.943192447044364, "learning_rate": 7.1707708959944014e-06, "loss": 1.4908, "step": 22480 }, { "epoch": 0.6629809818664307, "grad_norm": 2.8815448072837913, "learning_rate": 7.170507274014972e-06, "loss": 1.4816, "step": 22485 }, { "epoch": 0.6631284092584402, "grad_norm": 3.0422566975836833, "learning_rate": 7.170243551383211e-06, "loss": 1.5162, "step": 22490 }, { "epoch": 0.6632758366504496, "grad_norm": 2.8834467247485036, "learning_rate": 7.169979728106877e-06, "loss": 1.4506, "step": 22495 }, { "epoch": 0.6634232640424591, "grad_norm": 2.8600823771566994, "learning_rate": 7.169715804193735e-06, "loss": 1.5114, "step": 22500 }, { "epoch": 0.6634232640424591, "eval_loss": 1.1722216606140137, "eval_runtime": 4.6369, "eval_samples_per_second": 85.403, "eval_steps_per_second": 2.804, "step": 22500 }, { "epoch": 0.6635706914344686, "grad_norm": 2.8278877009672376, "learning_rate": 7.16945177965155e-06, "loss": 1.5278, "step": 22505 }, { "epoch": 0.6637181188264779, "grad_norm": 2.91933588402527, "learning_rate": 7.169187654488092e-06, "loss": 1.4694, "step": 22510 }, { "epoch": 0.6638655462184874, "grad_norm": 3.0142821174745085, "learning_rate": 7.168923428711133e-06, "loss": 1.4981, "step": 22515 }, { "epoch": 0.6640129736104968, "grad_norm": 2.827506363742875, "learning_rate": 7.168659102328449e-06, "loss": 1.5172, "step": 22520 }, { "epoch": 0.6641604010025063, "grad_norm": 3.057535304316981, "learning_rate": 7.1683946753478156e-06, "loss": 1.5454, "step": 22525 }, { "epoch": 0.6643078283945157, "grad_norm": 2.855105396786172, "learning_rate": 7.168130147777017e-06, "loss": 1.5141, "step": 22530 }, { "epoch": 0.6644552557865251, "grad_norm": 3.044903357349223, "learning_rate": 7.167865519623834e-06, "loss": 1.4516, "step": 22535 }, { "epoch": 0.6646026831785345, "grad_norm": 3.020278807169611, "learning_rate": 7.167600790896056e-06, "loss": 1.5221, "step": 22540 }, { "epoch": 0.664750110570544, "grad_norm": 2.954573839027938, "learning_rate": 7.167335961601473e-06, "loss": 1.5274, "step": 22545 }, { "epoch": 0.6648975379625535, "grad_norm": 2.896741914245056, "learning_rate": 7.167071031747877e-06, "loss": 1.5383, "step": 22550 }, { "epoch": 0.6650449653545629, "grad_norm": 3.0291702445421476, "learning_rate": 7.166806001343063e-06, "loss": 1.5474, "step": 22555 }, { "epoch": 0.6651923927465723, "grad_norm": 2.9765591536970946, "learning_rate": 7.166540870394832e-06, "loss": 1.5063, "step": 22560 }, { "epoch": 0.6653398201385817, "grad_norm": 2.8927008054811925, "learning_rate": 7.166275638910983e-06, "loss": 1.5156, "step": 22565 }, { "epoch": 0.6654872475305912, "grad_norm": 2.875993628815695, "learning_rate": 7.166010306899324e-06, "loss": 1.4603, "step": 22570 }, { "epoch": 0.6656346749226006, "grad_norm": 3.0029303564319334, "learning_rate": 7.165744874367661e-06, "loss": 1.5093, "step": 22575 }, { "epoch": 0.6657821023146101, "grad_norm": 2.9522588292669525, "learning_rate": 7.165479341323804e-06, "loss": 1.5126, "step": 22580 }, { "epoch": 0.6659295297066194, "grad_norm": 3.0484546326655457, "learning_rate": 7.165213707775568e-06, "loss": 1.5463, "step": 22585 }, { "epoch": 0.6660769570986289, "grad_norm": 2.907982360799474, "learning_rate": 7.1649479737307676e-06, "loss": 1.4419, "step": 22590 }, { "epoch": 0.6662243844906384, "grad_norm": 3.040981183696682, "learning_rate": 7.164682139197225e-06, "loss": 1.4747, "step": 22595 }, { "epoch": 0.6663718118826478, "grad_norm": 3.1540211159813354, "learning_rate": 7.16441620418276e-06, "loss": 1.5207, "step": 22600 }, { "epoch": 0.6665192392746573, "grad_norm": 2.8160945897608864, "learning_rate": 7.164150168695201e-06, "loss": 1.4857, "step": 22605 }, { "epoch": 0.6666666666666666, "grad_norm": 2.9196743877181057, "learning_rate": 7.163884032742373e-06, "loss": 1.5086, "step": 22610 }, { "epoch": 0.6668140940586761, "grad_norm": 3.022745902160417, "learning_rate": 7.16361779633211e-06, "loss": 1.4649, "step": 22615 }, { "epoch": 0.6669615214506855, "grad_norm": 2.880610705125291, "learning_rate": 7.1633514594722455e-06, "loss": 1.4912, "step": 22620 }, { "epoch": 0.667108948842695, "grad_norm": 2.9820815757464167, "learning_rate": 7.163085022170616e-06, "loss": 1.5099, "step": 22625 }, { "epoch": 0.6672563762347045, "grad_norm": 3.012507405283559, "learning_rate": 7.162818484435062e-06, "loss": 1.503, "step": 22630 }, { "epoch": 0.6674038036267138, "grad_norm": 3.0246691943324433, "learning_rate": 7.162551846273429e-06, "loss": 1.4888, "step": 22635 }, { "epoch": 0.6675512310187233, "grad_norm": 2.892011512391453, "learning_rate": 7.162285107693559e-06, "loss": 1.4558, "step": 22640 }, { "epoch": 0.6676986584107327, "grad_norm": 3.002372518255189, "learning_rate": 7.162018268703305e-06, "loss": 1.4921, "step": 22645 }, { "epoch": 0.6678460858027422, "grad_norm": 2.9023768685744202, "learning_rate": 7.161751329310518e-06, "loss": 1.4812, "step": 22650 }, { "epoch": 0.6679935131947515, "grad_norm": 2.896652935196914, "learning_rate": 7.16148428952305e-06, "loss": 1.523, "step": 22655 }, { "epoch": 0.668140940586761, "grad_norm": 3.0524592834552067, "learning_rate": 7.161217149348762e-06, "loss": 1.5334, "step": 22660 }, { "epoch": 0.6682883679787704, "grad_norm": 2.9099401937878047, "learning_rate": 7.160949908795515e-06, "loss": 1.433, "step": 22665 }, { "epoch": 0.6684357953707799, "grad_norm": 2.784799568492072, "learning_rate": 7.160682567871171e-06, "loss": 1.4968, "step": 22670 }, { "epoch": 0.6685832227627894, "grad_norm": 3.101642695469769, "learning_rate": 7.160415126583598e-06, "loss": 1.5174, "step": 22675 }, { "epoch": 0.6687306501547987, "grad_norm": 2.9777603320367163, "learning_rate": 7.160147584940666e-06, "loss": 1.5189, "step": 22680 }, { "epoch": 0.6688780775468082, "grad_norm": 2.910748061253814, "learning_rate": 7.1598799429502474e-06, "loss": 1.4974, "step": 22685 }, { "epoch": 0.6690255049388176, "grad_norm": 2.9338924514873757, "learning_rate": 7.159612200620217e-06, "loss": 1.5142, "step": 22690 }, { "epoch": 0.6691729323308271, "grad_norm": 3.0595344104607762, "learning_rate": 7.159344357958456e-06, "loss": 1.5547, "step": 22695 }, { "epoch": 0.6693203597228365, "grad_norm": 2.825720433749918, "learning_rate": 7.159076414972843e-06, "loss": 1.5117, "step": 22700 }, { "epoch": 0.6694677871148459, "grad_norm": 2.966876184541729, "learning_rate": 7.158808371671264e-06, "loss": 1.4627, "step": 22705 }, { "epoch": 0.6696152145068553, "grad_norm": 2.7610879061399367, "learning_rate": 7.158540228061606e-06, "loss": 1.4268, "step": 22710 }, { "epoch": 0.6697626418988648, "grad_norm": 2.919354210989602, "learning_rate": 7.15827198415176e-06, "loss": 1.5515, "step": 22715 }, { "epoch": 0.6699100692908743, "grad_norm": 3.2499037289273716, "learning_rate": 7.158003639949618e-06, "loss": 1.5237, "step": 22720 }, { "epoch": 0.6700574966828837, "grad_norm": 2.929063487285365, "learning_rate": 7.157735195463078e-06, "loss": 1.5072, "step": 22725 }, { "epoch": 0.6702049240748931, "grad_norm": 2.996742737736123, "learning_rate": 7.157466650700039e-06, "loss": 1.5521, "step": 22730 }, { "epoch": 0.6703523514669025, "grad_norm": 2.7646728713597404, "learning_rate": 7.157198005668403e-06, "loss": 1.4581, "step": 22735 }, { "epoch": 0.670499778858912, "grad_norm": 2.839670398252942, "learning_rate": 7.156929260376074e-06, "loss": 1.4818, "step": 22740 }, { "epoch": 0.6706472062509214, "grad_norm": 2.8605583495341786, "learning_rate": 7.156660414830962e-06, "loss": 1.4604, "step": 22745 }, { "epoch": 0.6707946336429309, "grad_norm": 2.8129718704053426, "learning_rate": 7.156391469040978e-06, "loss": 1.5135, "step": 22750 }, { "epoch": 0.6709420610349403, "grad_norm": 2.946863459605892, "learning_rate": 7.156122423014034e-06, "loss": 1.5235, "step": 22755 }, { "epoch": 0.6710894884269497, "grad_norm": 2.9279547642095918, "learning_rate": 7.155853276758049e-06, "loss": 1.4981, "step": 22760 }, { "epoch": 0.6712369158189592, "grad_norm": 2.983766406596306, "learning_rate": 7.155584030280943e-06, "loss": 1.5273, "step": 22765 }, { "epoch": 0.6713843432109686, "grad_norm": 2.680536967718917, "learning_rate": 7.155314683590637e-06, "loss": 1.4801, "step": 22770 }, { "epoch": 0.6715317706029781, "grad_norm": 2.815636942146346, "learning_rate": 7.155045236695058e-06, "loss": 1.5184, "step": 22775 }, { "epoch": 0.6716791979949874, "grad_norm": 2.9159680593898694, "learning_rate": 7.1547756896021344e-06, "loss": 1.48, "step": 22780 }, { "epoch": 0.6718266253869969, "grad_norm": 2.9465287332931798, "learning_rate": 7.154506042319799e-06, "loss": 1.5207, "step": 22785 }, { "epoch": 0.6719740527790063, "grad_norm": 2.793585897241677, "learning_rate": 7.154236294855986e-06, "loss": 1.5018, "step": 22790 }, { "epoch": 0.6721214801710158, "grad_norm": 2.907017257481453, "learning_rate": 7.153966447218632e-06, "loss": 1.4836, "step": 22795 }, { "epoch": 0.6722689075630253, "grad_norm": 2.9486973284568307, "learning_rate": 7.153696499415679e-06, "loss": 1.5368, "step": 22800 }, { "epoch": 0.6724163349550346, "grad_norm": 2.9163528146439646, "learning_rate": 7.15342645145507e-06, "loss": 1.4823, "step": 22805 }, { "epoch": 0.6725637623470441, "grad_norm": 2.8056716396633172, "learning_rate": 7.153156303344751e-06, "loss": 1.5044, "step": 22810 }, { "epoch": 0.6727111897390535, "grad_norm": 3.0173882770846596, "learning_rate": 7.152886055092672e-06, "loss": 1.5125, "step": 22815 }, { "epoch": 0.672858617131063, "grad_norm": 3.1278440662793, "learning_rate": 7.1526157067067846e-06, "loss": 1.4954, "step": 22820 }, { "epoch": 0.6730060445230723, "grad_norm": 3.089413219429265, "learning_rate": 7.152345258195045e-06, "loss": 1.5072, "step": 22825 }, { "epoch": 0.6731534719150818, "grad_norm": 2.925254451231159, "learning_rate": 7.152074709565411e-06, "loss": 1.4353, "step": 22830 }, { "epoch": 0.6733008993070912, "grad_norm": 2.8605531450913277, "learning_rate": 7.151804060825844e-06, "loss": 1.5126, "step": 22835 }, { "epoch": 0.6734483266991007, "grad_norm": 3.1083569483319957, "learning_rate": 7.151533311984308e-06, "loss": 1.5181, "step": 22840 }, { "epoch": 0.6735957540911102, "grad_norm": 2.988267793028378, "learning_rate": 7.151262463048771e-06, "loss": 1.5299, "step": 22845 }, { "epoch": 0.6737431814831195, "grad_norm": 2.9662553747773206, "learning_rate": 7.150991514027201e-06, "loss": 1.509, "step": 22850 }, { "epoch": 0.673890608875129, "grad_norm": 2.8211517930131498, "learning_rate": 7.150720464927573e-06, "loss": 1.4785, "step": 22855 }, { "epoch": 0.6740380362671384, "grad_norm": 2.810510418331349, "learning_rate": 7.150449315757862e-06, "loss": 1.5036, "step": 22860 }, { "epoch": 0.6741854636591479, "grad_norm": 2.9553124428361426, "learning_rate": 7.150178066526046e-06, "loss": 1.5112, "step": 22865 }, { "epoch": 0.6743328910511573, "grad_norm": 3.063691976512908, "learning_rate": 7.149906717240109e-06, "loss": 1.4957, "step": 22870 }, { "epoch": 0.6744803184431667, "grad_norm": 3.0197291083669957, "learning_rate": 7.149635267908033e-06, "loss": 1.5103, "step": 22875 }, { "epoch": 0.6746277458351762, "grad_norm": 2.9251776717235187, "learning_rate": 7.149363718537809e-06, "loss": 1.5256, "step": 22880 }, { "epoch": 0.6747751732271856, "grad_norm": 2.89096016175046, "learning_rate": 7.149092069137425e-06, "loss": 1.4664, "step": 22885 }, { "epoch": 0.6749226006191951, "grad_norm": 2.8421135899780174, "learning_rate": 7.148820319714877e-06, "loss": 1.4841, "step": 22890 }, { "epoch": 0.6750700280112045, "grad_norm": 2.972809768787482, "learning_rate": 7.148548470278158e-06, "loss": 1.4693, "step": 22895 }, { "epoch": 0.6752174554032139, "grad_norm": 2.9927211682222494, "learning_rate": 7.14827652083527e-06, "loss": 1.4793, "step": 22900 }, { "epoch": 0.6753648827952233, "grad_norm": 2.930686002996806, "learning_rate": 7.148004471394216e-06, "loss": 1.4849, "step": 22905 }, { "epoch": 0.6755123101872328, "grad_norm": 2.9513906342390364, "learning_rate": 7.1477323219629986e-06, "loss": 1.4989, "step": 22910 }, { "epoch": 0.6756597375792422, "grad_norm": 3.0166670110506826, "learning_rate": 7.14746007254963e-06, "loss": 1.506, "step": 22915 }, { "epoch": 0.6758071649712517, "grad_norm": 3.0945927131466293, "learning_rate": 7.1471877231621174e-06, "loss": 1.5198, "step": 22920 }, { "epoch": 0.675954592363261, "grad_norm": 2.9089371498974135, "learning_rate": 7.146915273808478e-06, "loss": 1.4494, "step": 22925 }, { "epoch": 0.6761020197552705, "grad_norm": 2.9319338375138937, "learning_rate": 7.146642724496727e-06, "loss": 1.482, "step": 22930 }, { "epoch": 0.67624944714728, "grad_norm": 2.91918803399047, "learning_rate": 7.146370075234886e-06, "loss": 1.4648, "step": 22935 }, { "epoch": 0.6763968745392894, "grad_norm": 2.8693314264645, "learning_rate": 7.146097326030977e-06, "loss": 1.4967, "step": 22940 }, { "epoch": 0.6765443019312989, "grad_norm": 2.8132497826324347, "learning_rate": 7.145824476893026e-06, "loss": 1.4841, "step": 22945 }, { "epoch": 0.6766917293233082, "grad_norm": 3.0781093419216803, "learning_rate": 7.145551527829061e-06, "loss": 1.5084, "step": 22950 }, { "epoch": 0.6768391567153177, "grad_norm": 3.2089113314087276, "learning_rate": 7.145278478847117e-06, "loss": 1.4995, "step": 22955 }, { "epoch": 0.6769865841073271, "grad_norm": 2.9185682331361877, "learning_rate": 7.1450053299552265e-06, "loss": 1.4632, "step": 22960 }, { "epoch": 0.6771340114993366, "grad_norm": 2.9429799562118486, "learning_rate": 7.144732081161426e-06, "loss": 1.4991, "step": 22965 }, { "epoch": 0.6772814388913461, "grad_norm": 2.8854200318813534, "learning_rate": 7.14445873247376e-06, "loss": 1.5102, "step": 22970 }, { "epoch": 0.6774288662833554, "grad_norm": 3.042856974762237, "learning_rate": 7.1441852839002675e-06, "loss": 1.5359, "step": 22975 }, { "epoch": 0.6775762936753649, "grad_norm": 2.9121073533864372, "learning_rate": 7.143911735448998e-06, "loss": 1.4897, "step": 22980 }, { "epoch": 0.6777237210673743, "grad_norm": 2.894477893520564, "learning_rate": 7.1436380871280005e-06, "loss": 1.467, "step": 22985 }, { "epoch": 0.6778711484593838, "grad_norm": 2.767036962488212, "learning_rate": 7.143364338945327e-06, "loss": 1.507, "step": 22990 }, { "epoch": 0.6780185758513931, "grad_norm": 2.8673076826352655, "learning_rate": 7.1430904909090335e-06, "loss": 1.4637, "step": 22995 }, { "epoch": 0.6781660032434026, "grad_norm": 2.872227514925425, "learning_rate": 7.142816543027177e-06, "loss": 1.5394, "step": 23000 }, { "epoch": 0.6781660032434026, "eval_loss": 1.1707206964492798, "eval_runtime": 4.2546, "eval_samples_per_second": 93.075, "eval_steps_per_second": 3.055, "step": 23000 }, { "epoch": 0.678313430635412, "grad_norm": 2.824670135778758, "learning_rate": 7.1425424953078206e-06, "loss": 1.5237, "step": 23005 }, { "epoch": 0.6784608580274215, "grad_norm": 2.954931338230091, "learning_rate": 7.142268347759027e-06, "loss": 1.4677, "step": 23010 }, { "epoch": 0.678608285419431, "grad_norm": 2.864217633809429, "learning_rate": 7.141994100388862e-06, "loss": 1.5222, "step": 23015 }, { "epoch": 0.6787557128114403, "grad_norm": 3.0972191853621256, "learning_rate": 7.141719753205399e-06, "loss": 1.5153, "step": 23020 }, { "epoch": 0.6789031402034498, "grad_norm": 2.908357342686358, "learning_rate": 7.141445306216709e-06, "loss": 1.5217, "step": 23025 }, { "epoch": 0.6790505675954592, "grad_norm": 2.8328060486496285, "learning_rate": 7.1411707594308685e-06, "loss": 1.5305, "step": 23030 }, { "epoch": 0.6791979949874687, "grad_norm": 2.8090984980545235, "learning_rate": 7.1408961128559556e-06, "loss": 1.5478, "step": 23035 }, { "epoch": 0.6793454223794781, "grad_norm": 3.080058448330777, "learning_rate": 7.140621366500053e-06, "loss": 1.5084, "step": 23040 }, { "epoch": 0.6794928497714875, "grad_norm": 2.7300577061417233, "learning_rate": 7.1403465203712455e-06, "loss": 1.4945, "step": 23045 }, { "epoch": 0.679640277163497, "grad_norm": 3.0054910205021788, "learning_rate": 7.14007157447762e-06, "loss": 1.5245, "step": 23050 }, { "epoch": 0.6797877045555064, "grad_norm": 2.869127054274266, "learning_rate": 7.139796528827266e-06, "loss": 1.4298, "step": 23055 }, { "epoch": 0.6799351319475159, "grad_norm": 2.8779324194303406, "learning_rate": 7.1395213834282795e-06, "loss": 1.4874, "step": 23060 }, { "epoch": 0.6800825593395253, "grad_norm": 3.178024319222511, "learning_rate": 7.139246138288756e-06, "loss": 1.4653, "step": 23065 }, { "epoch": 0.6802299867315347, "grad_norm": 2.7585288097982317, "learning_rate": 7.138970793416794e-06, "loss": 1.5124, "step": 23070 }, { "epoch": 0.6803774141235441, "grad_norm": 2.8591513079980078, "learning_rate": 7.138695348820498e-06, "loss": 1.479, "step": 23075 }, { "epoch": 0.6805248415155536, "grad_norm": 2.931989086242022, "learning_rate": 7.138419804507971e-06, "loss": 1.4909, "step": 23080 }, { "epoch": 0.680672268907563, "grad_norm": 2.8830026445346366, "learning_rate": 7.138144160487322e-06, "loss": 1.4991, "step": 23085 }, { "epoch": 0.6808196962995725, "grad_norm": 2.8848333352135427, "learning_rate": 7.137868416766663e-06, "loss": 1.5038, "step": 23090 }, { "epoch": 0.6809671236915819, "grad_norm": 2.893963534106881, "learning_rate": 7.137592573354106e-06, "loss": 1.5042, "step": 23095 }, { "epoch": 0.6811145510835913, "grad_norm": 2.9460351223926406, "learning_rate": 7.13731663025777e-06, "loss": 1.524, "step": 23100 }, { "epoch": 0.6812619784756008, "grad_norm": 2.8213817937435843, "learning_rate": 7.137040587485774e-06, "loss": 1.5325, "step": 23105 }, { "epoch": 0.6814094058676102, "grad_norm": 2.965578997254433, "learning_rate": 7.136764445046241e-06, "loss": 1.5259, "step": 23110 }, { "epoch": 0.6815568332596197, "grad_norm": 2.987817629567949, "learning_rate": 7.1364882029472975e-06, "loss": 1.479, "step": 23115 }, { "epoch": 0.681704260651629, "grad_norm": 2.8920322531786593, "learning_rate": 7.136211861197071e-06, "loss": 1.5349, "step": 23120 }, { "epoch": 0.6818516880436385, "grad_norm": 2.9319235651898032, "learning_rate": 7.135935419803695e-06, "loss": 1.5056, "step": 23125 }, { "epoch": 0.681999115435648, "grad_norm": 2.857398396498546, "learning_rate": 7.135658878775301e-06, "loss": 1.4741, "step": 23130 }, { "epoch": 0.6821465428276574, "grad_norm": 3.065610411243739, "learning_rate": 7.13538223812003e-06, "loss": 1.5146, "step": 23135 }, { "epoch": 0.6822939702196669, "grad_norm": 3.084722459897374, "learning_rate": 7.135105497846021e-06, "loss": 1.5832, "step": 23140 }, { "epoch": 0.6824413976116762, "grad_norm": 2.880848819766514, "learning_rate": 7.134828657961416e-06, "loss": 1.518, "step": 23145 }, { "epoch": 0.6825888250036857, "grad_norm": 2.9392381066202162, "learning_rate": 7.1345517184743645e-06, "loss": 1.5085, "step": 23150 }, { "epoch": 0.6827362523956951, "grad_norm": 2.8520247792246387, "learning_rate": 7.134274679393013e-06, "loss": 1.3868, "step": 23155 }, { "epoch": 0.6828836797877046, "grad_norm": 2.8703606586384693, "learning_rate": 7.133997540725515e-06, "loss": 1.4959, "step": 23160 }, { "epoch": 0.6830311071797139, "grad_norm": 2.967750144628784, "learning_rate": 7.133720302480024e-06, "loss": 1.4877, "step": 23165 }, { "epoch": 0.6831785345717234, "grad_norm": 2.8082992862153926, "learning_rate": 7.133442964664701e-06, "loss": 1.4869, "step": 23170 }, { "epoch": 0.6833259619637329, "grad_norm": 2.978736684212911, "learning_rate": 7.133165527287704e-06, "loss": 1.5351, "step": 23175 }, { "epoch": 0.6834733893557423, "grad_norm": 2.8755533010702075, "learning_rate": 7.132887990357199e-06, "loss": 1.4973, "step": 23180 }, { "epoch": 0.6836208167477518, "grad_norm": 3.0552810419820124, "learning_rate": 7.132610353881352e-06, "loss": 1.463, "step": 23185 }, { "epoch": 0.6837682441397611, "grad_norm": 2.851137184832753, "learning_rate": 7.132332617868332e-06, "loss": 1.4857, "step": 23190 }, { "epoch": 0.6839156715317706, "grad_norm": 2.9574680713195907, "learning_rate": 7.132054782326313e-06, "loss": 1.5012, "step": 23195 }, { "epoch": 0.68406309892378, "grad_norm": 2.886154554112229, "learning_rate": 7.13177684726347e-06, "loss": 1.5294, "step": 23200 }, { "epoch": 0.6842105263157895, "grad_norm": 2.907312122240486, "learning_rate": 7.131498812687982e-06, "loss": 1.5149, "step": 23205 }, { "epoch": 0.684357953707799, "grad_norm": 2.868720884443201, "learning_rate": 7.131220678608029e-06, "loss": 1.5177, "step": 23210 }, { "epoch": 0.6845053810998083, "grad_norm": 2.9305595346431383, "learning_rate": 7.130942445031798e-06, "loss": 1.4607, "step": 23215 }, { "epoch": 0.6846528084918178, "grad_norm": 2.8700862395528035, "learning_rate": 7.130664111967475e-06, "loss": 1.4747, "step": 23220 }, { "epoch": 0.6848002358838272, "grad_norm": 2.917909777169896, "learning_rate": 7.1303856794232494e-06, "loss": 1.5105, "step": 23225 }, { "epoch": 0.6849476632758367, "grad_norm": 3.0752759551451527, "learning_rate": 7.130107147407316e-06, "loss": 1.5456, "step": 23230 }, { "epoch": 0.6850950906678461, "grad_norm": 2.743990024534431, "learning_rate": 7.129828515927869e-06, "loss": 1.4858, "step": 23235 }, { "epoch": 0.6852425180598555, "grad_norm": 2.8718498030269566, "learning_rate": 7.129549784993109e-06, "loss": 1.5012, "step": 23240 }, { "epoch": 0.6853899454518649, "grad_norm": 2.93003035347881, "learning_rate": 7.129270954611238e-06, "loss": 1.4718, "step": 23245 }, { "epoch": 0.6855373728438744, "grad_norm": 2.949616316731852, "learning_rate": 7.12899202479046e-06, "loss": 1.5061, "step": 23250 }, { "epoch": 0.6856848002358839, "grad_norm": 2.7885516758855022, "learning_rate": 7.128712995538984e-06, "loss": 1.4854, "step": 23255 }, { "epoch": 0.6858322276278933, "grad_norm": 2.9214307553299492, "learning_rate": 7.128433866865019e-06, "loss": 1.5416, "step": 23260 }, { "epoch": 0.6859796550199027, "grad_norm": 2.6904968775027696, "learning_rate": 7.12815463877678e-06, "loss": 1.4746, "step": 23265 }, { "epoch": 0.6861270824119121, "grad_norm": 2.872215085055977, "learning_rate": 7.127875311282484e-06, "loss": 1.5367, "step": 23270 }, { "epoch": 0.6862745098039216, "grad_norm": 2.913596100192861, "learning_rate": 7.127595884390348e-06, "loss": 1.4894, "step": 23275 }, { "epoch": 0.686421937195931, "grad_norm": 3.058794347046803, "learning_rate": 7.127316358108598e-06, "loss": 1.5256, "step": 23280 }, { "epoch": 0.6865693645879405, "grad_norm": 2.959682251806867, "learning_rate": 7.127036732445457e-06, "loss": 1.5272, "step": 23285 }, { "epoch": 0.6867167919799498, "grad_norm": 2.8781811192944486, "learning_rate": 7.126757007409154e-06, "loss": 1.5091, "step": 23290 }, { "epoch": 0.6868642193719593, "grad_norm": 4.511797382755324, "learning_rate": 7.126477183007921e-06, "loss": 1.4547, "step": 23295 }, { "epoch": 0.6870116467639688, "grad_norm": 2.8301912578684356, "learning_rate": 7.126197259249991e-06, "loss": 1.5186, "step": 23300 }, { "epoch": 0.6871590741559782, "grad_norm": 2.738918005054667, "learning_rate": 7.125917236143601e-06, "loss": 1.4558, "step": 23305 }, { "epoch": 0.6873065015479877, "grad_norm": 2.9503575067081105, "learning_rate": 7.1256371136969915e-06, "loss": 1.447, "step": 23310 }, { "epoch": 0.687453928939997, "grad_norm": 2.9975090081548785, "learning_rate": 7.125356891918405e-06, "loss": 1.4837, "step": 23315 }, { "epoch": 0.6876013563320065, "grad_norm": 2.974938072422238, "learning_rate": 7.125076570816088e-06, "loss": 1.4776, "step": 23320 }, { "epoch": 0.6877487837240159, "grad_norm": 2.8367226489036343, "learning_rate": 7.124796150398289e-06, "loss": 1.5231, "step": 23325 }, { "epoch": 0.6878962111160254, "grad_norm": 2.862238024891591, "learning_rate": 7.12451563067326e-06, "loss": 1.4314, "step": 23330 }, { "epoch": 0.6880436385080347, "grad_norm": 2.8739444676365524, "learning_rate": 7.124235011649255e-06, "loss": 1.4659, "step": 23335 }, { "epoch": 0.6881910659000442, "grad_norm": 2.8586831461472753, "learning_rate": 7.123954293334532e-06, "loss": 1.4971, "step": 23340 }, { "epoch": 0.6883384932920537, "grad_norm": 3.7182425725374464, "learning_rate": 7.123673475737352e-06, "loss": 1.4646, "step": 23345 }, { "epoch": 0.6884859206840631, "grad_norm": 2.9503569643877556, "learning_rate": 7.123392558865977e-06, "loss": 1.4839, "step": 23350 }, { "epoch": 0.6886333480760726, "grad_norm": 2.830770359410229, "learning_rate": 7.123111542728674e-06, "loss": 1.4759, "step": 23355 }, { "epoch": 0.6887807754680819, "grad_norm": 2.842710401071249, "learning_rate": 7.122830427333712e-06, "loss": 1.5206, "step": 23360 }, { "epoch": 0.6889282028600914, "grad_norm": 3.0531219356779022, "learning_rate": 7.122549212689362e-06, "loss": 1.5921, "step": 23365 }, { "epoch": 0.6890756302521008, "grad_norm": 2.8833963413459007, "learning_rate": 7.122267898803902e-06, "loss": 1.4785, "step": 23370 }, { "epoch": 0.6892230576441103, "grad_norm": 2.9382148179313043, "learning_rate": 7.121986485685608e-06, "loss": 1.5062, "step": 23375 }, { "epoch": 0.6893704850361198, "grad_norm": 2.8680186936272913, "learning_rate": 7.1217049733427604e-06, "loss": 1.5162, "step": 23380 }, { "epoch": 0.6895179124281291, "grad_norm": 2.7591278837202937, "learning_rate": 7.121423361783646e-06, "loss": 1.4932, "step": 23385 }, { "epoch": 0.6896653398201386, "grad_norm": 2.9787994199760894, "learning_rate": 7.121141651016547e-06, "loss": 1.482, "step": 23390 }, { "epoch": 0.689812767212148, "grad_norm": 2.9664397797108384, "learning_rate": 7.120859841049756e-06, "loss": 1.4708, "step": 23395 }, { "epoch": 0.6899601946041575, "grad_norm": 3.004451187001137, "learning_rate": 7.120577931891566e-06, "loss": 1.4849, "step": 23400 }, { "epoch": 0.6901076219961669, "grad_norm": 2.996507846545726, "learning_rate": 7.1202959235502715e-06, "loss": 1.4374, "step": 23405 }, { "epoch": 0.6902550493881763, "grad_norm": 2.99278251671853, "learning_rate": 7.120013816034171e-06, "loss": 1.5437, "step": 23410 }, { "epoch": 0.6904024767801857, "grad_norm": 2.8318399049526612, "learning_rate": 7.119731609351565e-06, "loss": 1.4832, "step": 23415 }, { "epoch": 0.6905499041721952, "grad_norm": 2.8877454147143164, "learning_rate": 7.119449303510759e-06, "loss": 1.4596, "step": 23420 }, { "epoch": 0.6906973315642047, "grad_norm": 2.9428180742201064, "learning_rate": 7.11916689852006e-06, "loss": 1.5365, "step": 23425 }, { "epoch": 0.6908447589562141, "grad_norm": 2.826086178904775, "learning_rate": 7.118884394387777e-06, "loss": 1.5633, "step": 23430 }, { "epoch": 0.6909921863482235, "grad_norm": 2.8195040891152834, "learning_rate": 7.118601791122225e-06, "loss": 1.4859, "step": 23435 }, { "epoch": 0.6911396137402329, "grad_norm": 2.961121396894468, "learning_rate": 7.118319088731718e-06, "loss": 1.4739, "step": 23440 }, { "epoch": 0.6912870411322424, "grad_norm": 2.8442785384907956, "learning_rate": 7.118036287224576e-06, "loss": 1.4979, "step": 23445 }, { "epoch": 0.6914344685242518, "grad_norm": 2.932774520494671, "learning_rate": 7.117753386609121e-06, "loss": 1.4522, "step": 23450 }, { "epoch": 0.6915818959162613, "grad_norm": 2.991156042930206, "learning_rate": 7.117470386893676e-06, "loss": 1.5497, "step": 23455 }, { "epoch": 0.6917293233082706, "grad_norm": 2.886911873185025, "learning_rate": 7.117187288086568e-06, "loss": 1.4996, "step": 23460 }, { "epoch": 0.6918767507002801, "grad_norm": 2.9504107189956468, "learning_rate": 7.116904090196132e-06, "loss": 1.5394, "step": 23465 }, { "epoch": 0.6920241780922896, "grad_norm": 2.8697034719330268, "learning_rate": 7.116620793230697e-06, "loss": 1.4856, "step": 23470 }, { "epoch": 0.692171605484299, "grad_norm": 2.9618029903520924, "learning_rate": 7.116337397198601e-06, "loss": 1.5138, "step": 23475 }, { "epoch": 0.6923190328763085, "grad_norm": 2.96834955738928, "learning_rate": 7.116053902108184e-06, "loss": 1.4671, "step": 23480 }, { "epoch": 0.6924664602683178, "grad_norm": 2.9160201110072017, "learning_rate": 7.1157703079677865e-06, "loss": 1.5115, "step": 23485 }, { "epoch": 0.6926138876603273, "grad_norm": 2.8632330041900467, "learning_rate": 7.115486614785755e-06, "loss": 1.4939, "step": 23490 }, { "epoch": 0.6927613150523367, "grad_norm": 2.9396746716307818, "learning_rate": 7.115202822570436e-06, "loss": 1.5, "step": 23495 }, { "epoch": 0.6929087424443462, "grad_norm": 3.0022624628594667, "learning_rate": 7.114918931330182e-06, "loss": 1.5466, "step": 23500 }, { "epoch": 0.6929087424443462, "eval_loss": 1.167945146560669, "eval_runtime": 4.2, "eval_samples_per_second": 94.285, "eval_steps_per_second": 3.095, "step": 23500 }, { "epoch": 0.6930561698363555, "grad_norm": 2.8081948122293134, "learning_rate": 7.114634941073346e-06, "loss": 1.5115, "step": 23505 }, { "epoch": 0.693203597228365, "grad_norm": 2.977974740948965, "learning_rate": 7.114350851808285e-06, "loss": 1.5259, "step": 23510 }, { "epoch": 0.6933510246203745, "grad_norm": 2.9031266998822045, "learning_rate": 7.114066663543358e-06, "loss": 1.4433, "step": 23515 }, { "epoch": 0.6934984520123839, "grad_norm": 2.983606798023639, "learning_rate": 7.113782376286928e-06, "loss": 1.4984, "step": 23520 }, { "epoch": 0.6936458794043934, "grad_norm": 2.7155571276302535, "learning_rate": 7.113497990047362e-06, "loss": 1.4756, "step": 23525 }, { "epoch": 0.6937933067964027, "grad_norm": 3.3234970449152303, "learning_rate": 7.113213504833025e-06, "loss": 1.5471, "step": 23530 }, { "epoch": 0.6939407341884122, "grad_norm": 2.9781694271159873, "learning_rate": 7.112928920652291e-06, "loss": 1.4986, "step": 23535 }, { "epoch": 0.6940881615804216, "grad_norm": 2.8177831773370543, "learning_rate": 7.112644237513534e-06, "loss": 1.5086, "step": 23540 }, { "epoch": 0.6942355889724311, "grad_norm": 2.9188421422093334, "learning_rate": 7.11235945542513e-06, "loss": 1.4717, "step": 23545 }, { "epoch": 0.6943830163644406, "grad_norm": 3.0338081221576476, "learning_rate": 7.11207457439546e-06, "loss": 1.5079, "step": 23550 }, { "epoch": 0.6945304437564499, "grad_norm": 3.0091537094315584, "learning_rate": 7.111789594432905e-06, "loss": 1.4965, "step": 23555 }, { "epoch": 0.6946778711484594, "grad_norm": 3.036015503431301, "learning_rate": 7.111504515545853e-06, "loss": 1.5429, "step": 23560 }, { "epoch": 0.6948252985404688, "grad_norm": 2.914120943250267, "learning_rate": 7.111219337742694e-06, "loss": 1.4795, "step": 23565 }, { "epoch": 0.6949727259324783, "grad_norm": 3.03210485067571, "learning_rate": 7.110934061031817e-06, "loss": 1.5124, "step": 23570 }, { "epoch": 0.6951201533244877, "grad_norm": 3.185817707103008, "learning_rate": 7.110648685421617e-06, "loss": 1.4573, "step": 23575 }, { "epoch": 0.6952675807164971, "grad_norm": 2.9230817104154605, "learning_rate": 7.110363210920492e-06, "loss": 1.5023, "step": 23580 }, { "epoch": 0.6954150081085065, "grad_norm": 2.917102849873805, "learning_rate": 7.110077637536843e-06, "loss": 1.4393, "step": 23585 }, { "epoch": 0.695562435500516, "grad_norm": 2.8908021047945707, "learning_rate": 7.109791965279072e-06, "loss": 1.5103, "step": 23590 }, { "epoch": 0.6957098628925255, "grad_norm": 3.0501008784515906, "learning_rate": 7.109506194155587e-06, "loss": 1.4605, "step": 23595 }, { "epoch": 0.6958572902845349, "grad_norm": 2.8578209304194986, "learning_rate": 7.109220324174795e-06, "loss": 1.4832, "step": 23600 }, { "epoch": 0.6960047176765443, "grad_norm": 2.9863577013734077, "learning_rate": 7.108934355345109e-06, "loss": 1.5215, "step": 23605 }, { "epoch": 0.6961521450685537, "grad_norm": 2.7936728512329934, "learning_rate": 7.108648287674944e-06, "loss": 1.5095, "step": 23610 }, { "epoch": 0.6962995724605632, "grad_norm": 2.8277339067531853, "learning_rate": 7.108362121172719e-06, "loss": 1.531, "step": 23615 }, { "epoch": 0.6964469998525726, "grad_norm": 2.91143123554397, "learning_rate": 7.108075855846853e-06, "loss": 1.5023, "step": 23620 }, { "epoch": 0.6965944272445821, "grad_norm": 2.8979588843215196, "learning_rate": 7.107789491705771e-06, "loss": 1.5334, "step": 23625 }, { "epoch": 0.6967418546365914, "grad_norm": 2.8572865757693293, "learning_rate": 7.107503028757898e-06, "loss": 1.5164, "step": 23630 }, { "epoch": 0.6968892820286009, "grad_norm": 2.913470864955187, "learning_rate": 7.107216467011665e-06, "loss": 1.5254, "step": 23635 }, { "epoch": 0.6970367094206104, "grad_norm": 2.924554087531572, "learning_rate": 7.1069298064755035e-06, "loss": 1.4625, "step": 23640 }, { "epoch": 0.6971841368126198, "grad_norm": 2.916527936468482, "learning_rate": 7.106643047157851e-06, "loss": 1.4684, "step": 23645 }, { "epoch": 0.6973315642046293, "grad_norm": 2.8928918064017446, "learning_rate": 7.106356189067143e-06, "loss": 1.4898, "step": 23650 }, { "epoch": 0.6974789915966386, "grad_norm": 2.8027790416297926, "learning_rate": 7.1060692322118216e-06, "loss": 1.4349, "step": 23655 }, { "epoch": 0.6976264189886481, "grad_norm": 2.9400102236158485, "learning_rate": 7.10578217660033e-06, "loss": 1.488, "step": 23660 }, { "epoch": 0.6977738463806575, "grad_norm": 2.9882675032582044, "learning_rate": 7.105495022241117e-06, "loss": 1.5023, "step": 23665 }, { "epoch": 0.697921273772667, "grad_norm": 3.0577725772497466, "learning_rate": 7.10520776914263e-06, "loss": 1.475, "step": 23670 }, { "epoch": 0.6980687011646763, "grad_norm": 2.9287009823407484, "learning_rate": 7.1049204173133245e-06, "loss": 1.5093, "step": 23675 }, { "epoch": 0.6982161285566858, "grad_norm": 2.8511825076831783, "learning_rate": 7.104632966761655e-06, "loss": 1.4843, "step": 23680 }, { "epoch": 0.6983635559486953, "grad_norm": 3.0832039978278942, "learning_rate": 7.104345417496078e-06, "loss": 1.5167, "step": 23685 }, { "epoch": 0.6985109833407047, "grad_norm": 2.765473522241172, "learning_rate": 7.1040577695250595e-06, "loss": 1.5046, "step": 23690 }, { "epoch": 0.6986584107327142, "grad_norm": 2.8173786606944216, "learning_rate": 7.103770022857059e-06, "loss": 1.5502, "step": 23695 }, { "epoch": 0.6988058381247235, "grad_norm": 2.914130228854495, "learning_rate": 7.103482177500546e-06, "loss": 1.4505, "step": 23700 }, { "epoch": 0.698953265516733, "grad_norm": 2.8888083132187226, "learning_rate": 7.103194233463992e-06, "loss": 1.488, "step": 23705 }, { "epoch": 0.6991006929087424, "grad_norm": 2.82137810508416, "learning_rate": 7.102906190755868e-06, "loss": 1.4861, "step": 23710 }, { "epoch": 0.6992481203007519, "grad_norm": 3.0469198155533963, "learning_rate": 7.1026180493846505e-06, "loss": 1.5472, "step": 23715 }, { "epoch": 0.6993955476927614, "grad_norm": 2.904750381505622, "learning_rate": 7.102329809358819e-06, "loss": 1.5004, "step": 23720 }, { "epoch": 0.6995429750847707, "grad_norm": 2.766877807717052, "learning_rate": 7.1020414706868554e-06, "loss": 1.4481, "step": 23725 }, { "epoch": 0.6996904024767802, "grad_norm": 2.9685348244749203, "learning_rate": 7.101753033377244e-06, "loss": 1.5091, "step": 23730 }, { "epoch": 0.6998378298687896, "grad_norm": 2.8453042383494767, "learning_rate": 7.10146449743847e-06, "loss": 1.5186, "step": 23735 }, { "epoch": 0.6999852572607991, "grad_norm": 2.8408445588907174, "learning_rate": 7.1011758628790285e-06, "loss": 1.4639, "step": 23740 }, { "epoch": 0.7001326846528085, "grad_norm": 2.9553894761240027, "learning_rate": 7.10088712970741e-06, "loss": 1.4811, "step": 23745 }, { "epoch": 0.7002801120448179, "grad_norm": 2.804614267930821, "learning_rate": 7.100598297932112e-06, "loss": 1.4955, "step": 23750 }, { "epoch": 0.7004275394368273, "grad_norm": 2.880266881466412, "learning_rate": 7.100309367561632e-06, "loss": 1.4763, "step": 23755 }, { "epoch": 0.7005749668288368, "grad_norm": 2.770313899308896, "learning_rate": 7.100020338604473e-06, "loss": 1.5077, "step": 23760 }, { "epoch": 0.7007223942208463, "grad_norm": 2.8137050809596893, "learning_rate": 7.099731211069141e-06, "loss": 1.503, "step": 23765 }, { "epoch": 0.7008698216128557, "grad_norm": 2.8860092085773537, "learning_rate": 7.099441984964143e-06, "loss": 1.4908, "step": 23770 }, { "epoch": 0.7010172490048651, "grad_norm": 2.894123324998275, "learning_rate": 7.09915266029799e-06, "loss": 1.4596, "step": 23775 }, { "epoch": 0.7011646763968745, "grad_norm": 2.798131192998099, "learning_rate": 7.098863237079195e-06, "loss": 1.4841, "step": 23780 }, { "epoch": 0.701312103788884, "grad_norm": 2.9314150594427137, "learning_rate": 7.098573715316277e-06, "loss": 1.456, "step": 23785 }, { "epoch": 0.7014595311808934, "grad_norm": 3.027198332314175, "learning_rate": 7.098284095017752e-06, "loss": 1.4953, "step": 23790 }, { "epoch": 0.7016069585729029, "grad_norm": 2.9073325937042167, "learning_rate": 7.097994376192144e-06, "loss": 1.5212, "step": 23795 }, { "epoch": 0.7017543859649122, "grad_norm": 2.8122837952788564, "learning_rate": 7.0977045588479796e-06, "loss": 1.5088, "step": 23800 }, { "epoch": 0.7019018133569217, "grad_norm": 2.9265380953821607, "learning_rate": 7.097414642993785e-06, "loss": 1.5257, "step": 23805 }, { "epoch": 0.7020492407489312, "grad_norm": 2.8759110372766137, "learning_rate": 7.097124628638093e-06, "loss": 1.4818, "step": 23810 }, { "epoch": 0.7021966681409406, "grad_norm": 2.8142304785129273, "learning_rate": 7.096834515789437e-06, "loss": 1.5118, "step": 23815 }, { "epoch": 0.7023440955329501, "grad_norm": 2.853796682660355, "learning_rate": 7.096544304456352e-06, "loss": 1.4579, "step": 23820 }, { "epoch": 0.7024915229249594, "grad_norm": 3.0621763174465877, "learning_rate": 7.096253994647381e-06, "loss": 1.5218, "step": 23825 }, { "epoch": 0.7026389503169689, "grad_norm": 2.9184334242726364, "learning_rate": 7.095963586371065e-06, "loss": 1.5315, "step": 23830 }, { "epoch": 0.7027863777089783, "grad_norm": 2.867998331813161, "learning_rate": 7.09567307963595e-06, "loss": 1.4986, "step": 23835 }, { "epoch": 0.7029338051009878, "grad_norm": 3.1098159251860253, "learning_rate": 7.095382474450583e-06, "loss": 1.4941, "step": 23840 }, { "epoch": 0.7030812324929971, "grad_norm": 2.952975313893179, "learning_rate": 7.095091770823519e-06, "loss": 1.4753, "step": 23845 }, { "epoch": 0.7032286598850066, "grad_norm": 2.8694102288074403, "learning_rate": 7.0948009687633075e-06, "loss": 1.5261, "step": 23850 }, { "epoch": 0.7033760872770161, "grad_norm": 2.8515085096485597, "learning_rate": 7.094510068278509e-06, "loss": 1.4904, "step": 23855 }, { "epoch": 0.7035235146690255, "grad_norm": 2.7860680168020555, "learning_rate": 7.094219069377683e-06, "loss": 1.4554, "step": 23860 }, { "epoch": 0.703670942061035, "grad_norm": 2.9468645977784753, "learning_rate": 7.093927972069392e-06, "loss": 1.5039, "step": 23865 }, { "epoch": 0.7038183694530443, "grad_norm": 2.945513150584028, "learning_rate": 7.093636776362201e-06, "loss": 1.5079, "step": 23870 }, { "epoch": 0.7039657968450538, "grad_norm": 2.897217559763485, "learning_rate": 7.093345482264682e-06, "loss": 1.4486, "step": 23875 }, { "epoch": 0.7041132242370632, "grad_norm": 2.950686865171896, "learning_rate": 7.0930540897854025e-06, "loss": 1.5177, "step": 23880 }, { "epoch": 0.7042606516290727, "grad_norm": 2.915031423797498, "learning_rate": 7.09276259893294e-06, "loss": 1.4735, "step": 23885 }, { "epoch": 0.7044080790210822, "grad_norm": 3.0944401872018994, "learning_rate": 7.092471009715872e-06, "loss": 1.4906, "step": 23890 }, { "epoch": 0.7045555064130915, "grad_norm": 2.859066966562052, "learning_rate": 7.092179322142776e-06, "loss": 1.4719, "step": 23895 }, { "epoch": 0.704702933805101, "grad_norm": 2.837549572393694, "learning_rate": 7.091887536222238e-06, "loss": 1.5326, "step": 23900 }, { "epoch": 0.7048503611971104, "grad_norm": 3.1102569939847844, "learning_rate": 7.091595651962842e-06, "loss": 1.5493, "step": 23905 }, { "epoch": 0.7049977885891199, "grad_norm": 2.83272845989249, "learning_rate": 7.09130366937318e-06, "loss": 1.4969, "step": 23910 }, { "epoch": 0.7051452159811293, "grad_norm": 2.7897425029232368, "learning_rate": 7.091011588461841e-06, "loss": 1.4744, "step": 23915 }, { "epoch": 0.7052926433731387, "grad_norm": 2.8343689287343103, "learning_rate": 7.090719409237423e-06, "loss": 1.462, "step": 23920 }, { "epoch": 0.7054400707651481, "grad_norm": 2.85041392546647, "learning_rate": 7.090427131708519e-06, "loss": 1.503, "step": 23925 }, { "epoch": 0.7055874981571576, "grad_norm": 2.8226784207727773, "learning_rate": 7.090134755883733e-06, "loss": 1.4651, "step": 23930 }, { "epoch": 0.7057349255491671, "grad_norm": 2.7734881130790914, "learning_rate": 7.089842281771668e-06, "loss": 1.5016, "step": 23935 }, { "epoch": 0.7058823529411765, "grad_norm": 3.0913529508907986, "learning_rate": 7.0895497093809305e-06, "loss": 1.4985, "step": 23940 }, { "epoch": 0.7060297803331859, "grad_norm": 2.7838277530025226, "learning_rate": 7.089257038720129e-06, "loss": 1.4909, "step": 23945 }, { "epoch": 0.7061772077251953, "grad_norm": 3.1115256013135313, "learning_rate": 7.088964269797877e-06, "loss": 1.4884, "step": 23950 }, { "epoch": 0.7063246351172048, "grad_norm": 2.9965959948212797, "learning_rate": 7.0886714026227876e-06, "loss": 1.4998, "step": 23955 }, { "epoch": 0.7064720625092142, "grad_norm": 3.022948547505245, "learning_rate": 7.0883784372034795e-06, "loss": 1.4875, "step": 23960 }, { "epoch": 0.7066194899012237, "grad_norm": 2.843468147168084, "learning_rate": 7.088085373548574e-06, "loss": 1.4947, "step": 23965 }, { "epoch": 0.706766917293233, "grad_norm": 2.8862403477959293, "learning_rate": 7.087792211666696e-06, "loss": 1.4852, "step": 23970 }, { "epoch": 0.7069143446852425, "grad_norm": 2.9631421526053434, "learning_rate": 7.087498951566471e-06, "loss": 1.4688, "step": 23975 }, { "epoch": 0.707061772077252, "grad_norm": 2.6975168919047054, "learning_rate": 7.087205593256526e-06, "loss": 1.4889, "step": 23980 }, { "epoch": 0.7072091994692614, "grad_norm": 2.776717072962095, "learning_rate": 7.086912136745498e-06, "loss": 1.5375, "step": 23985 }, { "epoch": 0.7073566268612709, "grad_norm": 2.7788220927119927, "learning_rate": 7.086618582042019e-06, "loss": 1.4777, "step": 23990 }, { "epoch": 0.7075040542532802, "grad_norm": 2.9999657652687923, "learning_rate": 7.086324929154729e-06, "loss": 1.5081, "step": 23995 }, { "epoch": 0.7076514816452897, "grad_norm": 3.079526822807528, "learning_rate": 7.086031178092267e-06, "loss": 1.5046, "step": 24000 }, { "epoch": 0.7076514816452897, "eval_loss": 1.1659923791885376, "eval_runtime": 4.265, "eval_samples_per_second": 92.848, "eval_steps_per_second": 3.048, "step": 24000 }, { "epoch": 0.7077989090372991, "grad_norm": 2.7975025064450314, "learning_rate": 7.0857373288632795e-06, "loss": 1.4587, "step": 24005 }, { "epoch": 0.7079463364293086, "grad_norm": 2.9769079139419348, "learning_rate": 7.085443381476412e-06, "loss": 1.4906, "step": 24010 }, { "epoch": 0.7080937638213181, "grad_norm": 2.9759132510348665, "learning_rate": 7.085149335940313e-06, "loss": 1.528, "step": 24015 }, { "epoch": 0.7082411912133274, "grad_norm": 2.6397067811923973, "learning_rate": 7.084855192263637e-06, "loss": 1.4384, "step": 24020 }, { "epoch": 0.7083886186053369, "grad_norm": 3.0134652039105423, "learning_rate": 7.084560950455039e-06, "loss": 1.4809, "step": 24025 }, { "epoch": 0.7085360459973463, "grad_norm": 2.801130227210415, "learning_rate": 7.084266610523178e-06, "loss": 1.5374, "step": 24030 }, { "epoch": 0.7086834733893558, "grad_norm": 2.881526517557585, "learning_rate": 7.083972172476714e-06, "loss": 1.4666, "step": 24035 }, { "epoch": 0.7088309007813651, "grad_norm": 2.925725163460669, "learning_rate": 7.083677636324311e-06, "loss": 1.4538, "step": 24040 }, { "epoch": 0.7089783281733746, "grad_norm": 2.9176511812048926, "learning_rate": 7.083383002074638e-06, "loss": 1.5042, "step": 24045 }, { "epoch": 0.709125755565384, "grad_norm": 2.755130106688763, "learning_rate": 7.083088269736364e-06, "loss": 1.4746, "step": 24050 }, { "epoch": 0.7092731829573935, "grad_norm": 2.95374763448325, "learning_rate": 7.082793439318162e-06, "loss": 1.503, "step": 24055 }, { "epoch": 0.709420610349403, "grad_norm": 2.7597744871203034, "learning_rate": 7.082498510828707e-06, "loss": 1.4777, "step": 24060 }, { "epoch": 0.7095680377414123, "grad_norm": 2.8598079026281376, "learning_rate": 7.0822034842766775e-06, "loss": 1.4892, "step": 24065 }, { "epoch": 0.7097154651334218, "grad_norm": 2.8781519874902335, "learning_rate": 7.0819083596707555e-06, "loss": 1.471, "step": 24070 }, { "epoch": 0.7098628925254312, "grad_norm": 2.8469395234527135, "learning_rate": 7.0816131370196255e-06, "loss": 1.4524, "step": 24075 }, { "epoch": 0.7100103199174407, "grad_norm": 2.8419532804577194, "learning_rate": 7.081317816331975e-06, "loss": 1.5229, "step": 24080 }, { "epoch": 0.7101577473094501, "grad_norm": 3.1820183178412833, "learning_rate": 7.081022397616494e-06, "loss": 1.4875, "step": 24085 }, { "epoch": 0.7103051747014595, "grad_norm": 3.1243920077726792, "learning_rate": 7.080726880881875e-06, "loss": 1.4866, "step": 24090 }, { "epoch": 0.7104526020934689, "grad_norm": 2.752132191572674, "learning_rate": 7.080431266136813e-06, "loss": 1.4065, "step": 24095 }, { "epoch": 0.7106000294854784, "grad_norm": 2.983252257143679, "learning_rate": 7.08013555339001e-06, "loss": 1.5059, "step": 24100 }, { "epoch": 0.7107474568774879, "grad_norm": 2.9023084330401603, "learning_rate": 7.0798397426501655e-06, "loss": 1.4733, "step": 24105 }, { "epoch": 0.7108948842694973, "grad_norm": 2.8069016401226627, "learning_rate": 7.079543833925984e-06, "loss": 1.4877, "step": 24110 }, { "epoch": 0.7110423116615067, "grad_norm": 2.8693055426715928, "learning_rate": 7.079247827226172e-06, "loss": 1.4836, "step": 24115 }, { "epoch": 0.7111897390535161, "grad_norm": 2.9123434431152595, "learning_rate": 7.078951722559442e-06, "loss": 1.5127, "step": 24120 }, { "epoch": 0.7113371664455256, "grad_norm": 2.7761076769230826, "learning_rate": 7.0786555199345075e-06, "loss": 1.4989, "step": 24125 }, { "epoch": 0.711484593837535, "grad_norm": 2.959027138315138, "learning_rate": 7.078359219360081e-06, "loss": 1.4891, "step": 24130 }, { "epoch": 0.7116320212295445, "grad_norm": 2.858093924273715, "learning_rate": 7.078062820844886e-06, "loss": 1.5062, "step": 24135 }, { "epoch": 0.7117794486215538, "grad_norm": 2.8149917642098212, "learning_rate": 7.077766324397641e-06, "loss": 1.5123, "step": 24140 }, { "epoch": 0.7119268760135633, "grad_norm": 2.8915177462826365, "learning_rate": 7.077469730027072e-06, "loss": 1.5068, "step": 24145 }, { "epoch": 0.7120743034055728, "grad_norm": 3.0064500141241464, "learning_rate": 7.077173037741907e-06, "loss": 1.4482, "step": 24150 }, { "epoch": 0.7122217307975822, "grad_norm": 2.7869748469054185, "learning_rate": 7.076876247550877e-06, "loss": 1.474, "step": 24155 }, { "epoch": 0.7123691581895917, "grad_norm": 2.966574586270361, "learning_rate": 7.076579359462713e-06, "loss": 1.5125, "step": 24160 }, { "epoch": 0.712516585581601, "grad_norm": 3.010950652620848, "learning_rate": 7.0762823734861535e-06, "loss": 1.4742, "step": 24165 }, { "epoch": 0.7126640129736105, "grad_norm": 2.908707259474318, "learning_rate": 7.0759852896299375e-06, "loss": 1.541, "step": 24170 }, { "epoch": 0.7128114403656199, "grad_norm": 2.7331049297697914, "learning_rate": 7.075688107902806e-06, "loss": 1.4609, "step": 24175 }, { "epoch": 0.7129588677576294, "grad_norm": 2.808555737236399, "learning_rate": 7.075390828313504e-06, "loss": 1.4935, "step": 24180 }, { "epoch": 0.7131062951496389, "grad_norm": 2.8001386920519553, "learning_rate": 7.0750934508707795e-06, "loss": 1.5109, "step": 24185 }, { "epoch": 0.7132537225416482, "grad_norm": 2.797787349338343, "learning_rate": 7.074795975583384e-06, "loss": 1.4989, "step": 24190 }, { "epoch": 0.7134011499336577, "grad_norm": 2.939455113585615, "learning_rate": 7.07449840246007e-06, "loss": 1.4977, "step": 24195 }, { "epoch": 0.7135485773256671, "grad_norm": 2.7715470105915796, "learning_rate": 7.074200731509595e-06, "loss": 1.3984, "step": 24200 }, { "epoch": 0.7136960047176766, "grad_norm": 2.9221743252439865, "learning_rate": 7.0739029627407164e-06, "loss": 1.4882, "step": 24205 }, { "epoch": 0.7138434321096859, "grad_norm": 2.7282427913221765, "learning_rate": 7.0736050961621985e-06, "loss": 1.4915, "step": 24210 }, { "epoch": 0.7139908595016954, "grad_norm": 2.8980748276797432, "learning_rate": 7.073307131782805e-06, "loss": 1.5239, "step": 24215 }, { "epoch": 0.7141382868937048, "grad_norm": 2.9135025541491046, "learning_rate": 7.073009069611306e-06, "loss": 1.497, "step": 24220 }, { "epoch": 0.7142857142857143, "grad_norm": 2.8706017527178025, "learning_rate": 7.072710909656469e-06, "loss": 1.4673, "step": 24225 }, { "epoch": 0.7144331416777238, "grad_norm": 2.8703921624909796, "learning_rate": 7.07241265192707e-06, "loss": 1.5334, "step": 24230 }, { "epoch": 0.7145805690697331, "grad_norm": 3.0064870320484394, "learning_rate": 7.072114296431885e-06, "loss": 1.4942, "step": 24235 }, { "epoch": 0.7147279964617426, "grad_norm": 2.963245935396535, "learning_rate": 7.071815843179693e-06, "loss": 1.5123, "step": 24240 }, { "epoch": 0.714875423853752, "grad_norm": 3.3406324752732632, "learning_rate": 7.071517292179276e-06, "loss": 1.5439, "step": 24245 }, { "epoch": 0.7150228512457615, "grad_norm": 2.928524746804634, "learning_rate": 7.0712186434394205e-06, "loss": 1.4599, "step": 24250 }, { "epoch": 0.7151702786377709, "grad_norm": 2.96283417655547, "learning_rate": 7.070919896968914e-06, "loss": 1.4845, "step": 24255 }, { "epoch": 0.7153177060297803, "grad_norm": 2.9016322090727535, "learning_rate": 7.0706210527765465e-06, "loss": 1.5332, "step": 24260 }, { "epoch": 0.7154651334217897, "grad_norm": 2.77267135833535, "learning_rate": 7.070322110871116e-06, "loss": 1.4941, "step": 24265 }, { "epoch": 0.7156125608137992, "grad_norm": 2.758842423115859, "learning_rate": 7.070023071261413e-06, "loss": 1.5566, "step": 24270 }, { "epoch": 0.7157599882058087, "grad_norm": 2.9584593362075102, "learning_rate": 7.069723933956241e-06, "loss": 1.5369, "step": 24275 }, { "epoch": 0.7159074155978181, "grad_norm": 2.873149439836658, "learning_rate": 7.0694246989644e-06, "loss": 1.4558, "step": 24280 }, { "epoch": 0.7160548429898275, "grad_norm": 2.83693956899061, "learning_rate": 7.0691253662946985e-06, "loss": 1.4944, "step": 24285 }, { "epoch": 0.7162022703818369, "grad_norm": 2.996728849807782, "learning_rate": 7.0688259359559425e-06, "loss": 1.4322, "step": 24290 }, { "epoch": 0.7163496977738464, "grad_norm": 2.9558985360310124, "learning_rate": 7.068526407956944e-06, "loss": 1.4678, "step": 24295 }, { "epoch": 0.7164971251658558, "grad_norm": 2.8687238285408, "learning_rate": 7.068226782306517e-06, "loss": 1.5202, "step": 24300 }, { "epoch": 0.7166445525578653, "grad_norm": 2.792495090771749, "learning_rate": 7.067927059013477e-06, "loss": 1.4914, "step": 24305 }, { "epoch": 0.7167919799498746, "grad_norm": 2.816123870642706, "learning_rate": 7.0676272380866445e-06, "loss": 1.4676, "step": 24310 }, { "epoch": 0.7169394073418841, "grad_norm": 2.9117398806759263, "learning_rate": 7.0673273195348435e-06, "loss": 1.4994, "step": 24315 }, { "epoch": 0.7170868347338936, "grad_norm": 2.850579491031599, "learning_rate": 7.067027303366897e-06, "loss": 1.5184, "step": 24320 }, { "epoch": 0.717234262125903, "grad_norm": 2.839369953673087, "learning_rate": 7.066727189591635e-06, "loss": 1.5607, "step": 24325 }, { "epoch": 0.7173816895179125, "grad_norm": 3.0573605483627726, "learning_rate": 7.066426978217887e-06, "loss": 1.5152, "step": 24330 }, { "epoch": 0.7175291169099218, "grad_norm": 2.8339186058533117, "learning_rate": 7.06612666925449e-06, "loss": 1.5242, "step": 24335 }, { "epoch": 0.7176765443019313, "grad_norm": 2.7342817046379286, "learning_rate": 7.0658262627102786e-06, "loss": 1.5132, "step": 24340 }, { "epoch": 0.7178239716939407, "grad_norm": 2.8307207053431016, "learning_rate": 7.065525758594093e-06, "loss": 1.4478, "step": 24345 }, { "epoch": 0.7179713990859502, "grad_norm": 2.8252997629995584, "learning_rate": 7.065225156914775e-06, "loss": 1.488, "step": 24350 }, { "epoch": 0.7181188264779597, "grad_norm": 2.8154913376532535, "learning_rate": 7.064924457681172e-06, "loss": 1.4262, "step": 24355 }, { "epoch": 0.718266253869969, "grad_norm": 2.7536524273361946, "learning_rate": 7.0646236609021315e-06, "loss": 1.448, "step": 24360 }, { "epoch": 0.7184136812619785, "grad_norm": 2.9640861529612854, "learning_rate": 7.064322766586504e-06, "loss": 1.5218, "step": 24365 }, { "epoch": 0.7185611086539879, "grad_norm": 2.94951382560102, "learning_rate": 7.064021774743147e-06, "loss": 1.4969, "step": 24370 }, { "epoch": 0.7187085360459974, "grad_norm": 2.825758582578351, "learning_rate": 7.063720685380913e-06, "loss": 1.4705, "step": 24375 }, { "epoch": 0.7188559634380067, "grad_norm": 3.1132025534750922, "learning_rate": 7.063419498508665e-06, "loss": 1.4923, "step": 24380 }, { "epoch": 0.7190033908300162, "grad_norm": 2.7612132713762927, "learning_rate": 7.063118214135264e-06, "loss": 1.4175, "step": 24385 }, { "epoch": 0.7191508182220256, "grad_norm": 2.904460868489938, "learning_rate": 7.062816832269576e-06, "loss": 1.5337, "step": 24390 }, { "epoch": 0.7192982456140351, "grad_norm": 2.902766517051635, "learning_rate": 7.06251535292047e-06, "loss": 1.5113, "step": 24395 }, { "epoch": 0.7194456730060446, "grad_norm": 2.9361414375145753, "learning_rate": 7.062213776096818e-06, "loss": 1.4865, "step": 24400 }, { "epoch": 0.7195931003980539, "grad_norm": 2.9390119153474914, "learning_rate": 7.061912101807494e-06, "loss": 1.5097, "step": 24405 }, { "epoch": 0.7197405277900634, "grad_norm": 2.9824397312540345, "learning_rate": 7.061610330061374e-06, "loss": 1.4913, "step": 24410 }, { "epoch": 0.7198879551820728, "grad_norm": 2.766445591493999, "learning_rate": 7.061308460867337e-06, "loss": 1.4973, "step": 24415 }, { "epoch": 0.7200353825740823, "grad_norm": 2.873051713065638, "learning_rate": 7.061006494234269e-06, "loss": 1.5258, "step": 24420 }, { "epoch": 0.7201828099660917, "grad_norm": 2.7902332816515587, "learning_rate": 7.060704430171055e-06, "loss": 1.4197, "step": 24425 }, { "epoch": 0.7203302373581011, "grad_norm": 2.9715046046608222, "learning_rate": 7.0604022686865816e-06, "loss": 1.5218, "step": 24430 }, { "epoch": 0.7204776647501105, "grad_norm": 2.811973073941971, "learning_rate": 7.060100009789741e-06, "loss": 1.5269, "step": 24435 }, { "epoch": 0.72062509214212, "grad_norm": 2.851707918767642, "learning_rate": 7.059797653489428e-06, "loss": 1.5096, "step": 24440 }, { "epoch": 0.7207725195341295, "grad_norm": 2.7600388908276385, "learning_rate": 7.05949519979454e-06, "loss": 1.4398, "step": 24445 }, { "epoch": 0.7209199469261389, "grad_norm": 2.8723312706340107, "learning_rate": 7.0591926487139765e-06, "loss": 1.463, "step": 24450 }, { "epoch": 0.7210673743181483, "grad_norm": 2.82415645869588, "learning_rate": 7.05889000025664e-06, "loss": 1.5104, "step": 24455 }, { "epoch": 0.7212148017101577, "grad_norm": 2.8723373181877414, "learning_rate": 7.058587254431438e-06, "loss": 1.4674, "step": 24460 }, { "epoch": 0.7213622291021672, "grad_norm": 2.8397048709889914, "learning_rate": 7.058284411247278e-06, "loss": 1.4828, "step": 24465 }, { "epoch": 0.7215096564941766, "grad_norm": 2.9485309267083966, "learning_rate": 7.05798147071307e-06, "loss": 1.5212, "step": 24470 }, { "epoch": 0.7216570838861861, "grad_norm": 2.894529199614288, "learning_rate": 7.057678432837732e-06, "loss": 1.5239, "step": 24475 }, { "epoch": 0.7218045112781954, "grad_norm": 3.1079157551751373, "learning_rate": 7.057375297630179e-06, "loss": 1.5192, "step": 24480 }, { "epoch": 0.7219519386702049, "grad_norm": 2.8955932738188723, "learning_rate": 7.05707206509933e-06, "loss": 1.5089, "step": 24485 }, { "epoch": 0.7220993660622144, "grad_norm": 2.8253491348079214, "learning_rate": 7.05676873525411e-06, "loss": 1.4842, "step": 24490 }, { "epoch": 0.7222467934542238, "grad_norm": 2.983639136055254, "learning_rate": 7.056465308103444e-06, "loss": 1.5228, "step": 24495 }, { "epoch": 0.7223942208462333, "grad_norm": 2.873436467315454, "learning_rate": 7.056161783656259e-06, "loss": 1.5397, "step": 24500 }, { "epoch": 0.7223942208462333, "eval_loss": 1.1630758047103882, "eval_runtime": 5.0615, "eval_samples_per_second": 78.238, "eval_steps_per_second": 2.568, "step": 24500 }, { "epoch": 0.7225416482382426, "grad_norm": 2.951233948716499, "learning_rate": 7.05585816192149e-06, "loss": 1.5891, "step": 24505 }, { "epoch": 0.7226890756302521, "grad_norm": 2.906683866985882, "learning_rate": 7.05555444290807e-06, "loss": 1.5312, "step": 24510 }, { "epoch": 0.7228365030222615, "grad_norm": 2.95023428640183, "learning_rate": 7.055250626624934e-06, "loss": 1.5215, "step": 24515 }, { "epoch": 0.722983930414271, "grad_norm": 2.8494173085880847, "learning_rate": 7.054946713081027e-06, "loss": 1.4764, "step": 24520 }, { "epoch": 0.7231313578062805, "grad_norm": 2.95483915187705, "learning_rate": 7.054642702285287e-06, "loss": 1.5015, "step": 24525 }, { "epoch": 0.7232787851982898, "grad_norm": 2.9449183707097406, "learning_rate": 7.054338594246663e-06, "loss": 1.4813, "step": 24530 }, { "epoch": 0.7234262125902993, "grad_norm": 2.796561896617626, "learning_rate": 7.054034388974102e-06, "loss": 1.5125, "step": 24535 }, { "epoch": 0.7235736399823087, "grad_norm": 2.864186915312791, "learning_rate": 7.053730086476556e-06, "loss": 1.5449, "step": 24540 }, { "epoch": 0.7237210673743182, "grad_norm": 2.933303062671676, "learning_rate": 7.0534256867629805e-06, "loss": 1.5254, "step": 24545 }, { "epoch": 0.7238684947663275, "grad_norm": 2.8045217814416588, "learning_rate": 7.053121189842333e-06, "loss": 1.5097, "step": 24550 }, { "epoch": 0.724015922158337, "grad_norm": 2.6812924661414486, "learning_rate": 7.05281659572357e-06, "loss": 1.4733, "step": 24555 }, { "epoch": 0.7241633495503464, "grad_norm": 3.0649191338120163, "learning_rate": 7.052511904415659e-06, "loss": 1.5076, "step": 24560 }, { "epoch": 0.7243107769423559, "grad_norm": 2.8861772841523425, "learning_rate": 7.052207115927565e-06, "loss": 1.4501, "step": 24565 }, { "epoch": 0.7244582043343654, "grad_norm": 2.8721169748785704, "learning_rate": 7.051902230268254e-06, "loss": 1.5244, "step": 24570 }, { "epoch": 0.7246056317263747, "grad_norm": 2.96268112992518, "learning_rate": 7.0515972474467e-06, "loss": 1.4842, "step": 24575 }, { "epoch": 0.7247530591183842, "grad_norm": 2.7405609745084583, "learning_rate": 7.051292167471878e-06, "loss": 1.4828, "step": 24580 }, { "epoch": 0.7249004865103936, "grad_norm": 2.8411990993959533, "learning_rate": 7.050986990352763e-06, "loss": 1.4732, "step": 24585 }, { "epoch": 0.7250479139024031, "grad_norm": 2.7299902166515126, "learning_rate": 7.050681716098337e-06, "loss": 1.4714, "step": 24590 }, { "epoch": 0.7251953412944125, "grad_norm": 2.930885598331089, "learning_rate": 7.050376344717583e-06, "loss": 1.486, "step": 24595 }, { "epoch": 0.7253427686864219, "grad_norm": 2.8106815461369696, "learning_rate": 7.050070876219487e-06, "loss": 1.4973, "step": 24600 }, { "epoch": 0.7254901960784313, "grad_norm": 2.7422662196197707, "learning_rate": 7.049765310613035e-06, "loss": 1.436, "step": 24605 }, { "epoch": 0.7256376234704408, "grad_norm": 2.799266578859085, "learning_rate": 7.049459647907223e-06, "loss": 1.5055, "step": 24610 }, { "epoch": 0.7257850508624503, "grad_norm": 2.8980448938331524, "learning_rate": 7.049153888111042e-06, "loss": 1.4632, "step": 24615 }, { "epoch": 0.7259324782544597, "grad_norm": 2.9505531058666423, "learning_rate": 7.048848031233491e-06, "loss": 1.431, "step": 24620 }, { "epoch": 0.7260799056464691, "grad_norm": 2.9185599239239073, "learning_rate": 7.048542077283569e-06, "loss": 1.542, "step": 24625 }, { "epoch": 0.7262273330384785, "grad_norm": 2.9184417656490362, "learning_rate": 7.048236026270281e-06, "loss": 1.4963, "step": 24630 }, { "epoch": 0.726374760430488, "grad_norm": 2.8052464184178296, "learning_rate": 7.0479298782026315e-06, "loss": 1.4654, "step": 24635 }, { "epoch": 0.7265221878224974, "grad_norm": 2.834954399262012, "learning_rate": 7.0476236330896285e-06, "loss": 1.5332, "step": 24640 }, { "epoch": 0.7266696152145069, "grad_norm": 2.8106635465075436, "learning_rate": 7.047317290940285e-06, "loss": 1.4708, "step": 24645 }, { "epoch": 0.7268170426065163, "grad_norm": 2.963411356121724, "learning_rate": 7.047010851763616e-06, "loss": 1.505, "step": 24650 }, { "epoch": 0.7269644699985257, "grad_norm": 2.7361984878777887, "learning_rate": 7.046704315568637e-06, "loss": 1.4638, "step": 24655 }, { "epoch": 0.7271118973905352, "grad_norm": 2.81131063741952, "learning_rate": 7.04639768236437e-06, "loss": 1.4578, "step": 24660 }, { "epoch": 0.7272593247825446, "grad_norm": 2.808621193126993, "learning_rate": 7.0460909521598355e-06, "loss": 1.5384, "step": 24665 }, { "epoch": 0.7274067521745541, "grad_norm": 2.8718030616599886, "learning_rate": 7.045784124964062e-06, "loss": 1.4894, "step": 24670 }, { "epoch": 0.7275541795665634, "grad_norm": 2.9377808883489376, "learning_rate": 7.045477200786078e-06, "loss": 1.4927, "step": 24675 }, { "epoch": 0.7277016069585729, "grad_norm": 2.7783355806780756, "learning_rate": 7.0451701796349124e-06, "loss": 1.5214, "step": 24680 }, { "epoch": 0.7278490343505823, "grad_norm": 3.0694560423659865, "learning_rate": 7.044863061519604e-06, "loss": 1.4505, "step": 24685 }, { "epoch": 0.7279964617425918, "grad_norm": 2.9805976644567513, "learning_rate": 7.044555846449185e-06, "loss": 1.4263, "step": 24690 }, { "epoch": 0.7281438891346013, "grad_norm": 3.0100784252444797, "learning_rate": 7.0442485344327e-06, "loss": 1.4552, "step": 24695 }, { "epoch": 0.7282913165266106, "grad_norm": 3.0112274377475474, "learning_rate": 7.04394112547919e-06, "loss": 1.4523, "step": 24700 }, { "epoch": 0.7284387439186201, "grad_norm": 2.8696668210694383, "learning_rate": 7.043633619597701e-06, "loss": 1.5284, "step": 24705 }, { "epoch": 0.7285861713106295, "grad_norm": 2.9244095544215405, "learning_rate": 7.043326016797283e-06, "loss": 1.4758, "step": 24710 }, { "epoch": 0.728733598702639, "grad_norm": 3.1076252696679543, "learning_rate": 7.043018317086985e-06, "loss": 1.525, "step": 24715 }, { "epoch": 0.7288810260946483, "grad_norm": 2.7921015032480367, "learning_rate": 7.042710520475863e-06, "loss": 1.4596, "step": 24720 }, { "epoch": 0.7290284534866578, "grad_norm": 2.966089921760168, "learning_rate": 7.042402626972972e-06, "loss": 1.455, "step": 24725 }, { "epoch": 0.7291758808786672, "grad_norm": 3.0168366623200376, "learning_rate": 7.042094636587377e-06, "loss": 1.5005, "step": 24730 }, { "epoch": 0.7293233082706767, "grad_norm": 2.776002623467593, "learning_rate": 7.041786549328138e-06, "loss": 1.4782, "step": 24735 }, { "epoch": 0.7294707356626862, "grad_norm": 2.8994003817990266, "learning_rate": 7.041478365204319e-06, "loss": 1.5134, "step": 24740 }, { "epoch": 0.7296181630546955, "grad_norm": 2.798692226957244, "learning_rate": 7.041170084224992e-06, "loss": 1.4932, "step": 24745 }, { "epoch": 0.729765590446705, "grad_norm": 2.841992324607217, "learning_rate": 7.040861706399227e-06, "loss": 1.4401, "step": 24750 }, { "epoch": 0.7299130178387144, "grad_norm": 2.806331546039457, "learning_rate": 7.040553231736099e-06, "loss": 1.5134, "step": 24755 }, { "epoch": 0.7300604452307239, "grad_norm": 2.8043818351850107, "learning_rate": 7.040244660244685e-06, "loss": 1.4688, "step": 24760 }, { "epoch": 0.7302078726227333, "grad_norm": 2.947036889389184, "learning_rate": 7.039935991934063e-06, "loss": 1.5379, "step": 24765 }, { "epoch": 0.7303553000147427, "grad_norm": 3.0253041326471353, "learning_rate": 7.039627226813319e-06, "loss": 1.51, "step": 24770 }, { "epoch": 0.7305027274067522, "grad_norm": 2.7779133863600136, "learning_rate": 7.0393183648915375e-06, "loss": 1.4857, "step": 24775 }, { "epoch": 0.7306501547987616, "grad_norm": 3.0385570816557634, "learning_rate": 7.039009406177807e-06, "loss": 1.4692, "step": 24780 }, { "epoch": 0.7307975821907711, "grad_norm": 2.929097349928313, "learning_rate": 7.038700350681219e-06, "loss": 1.457, "step": 24785 }, { "epoch": 0.7309450095827805, "grad_norm": 2.7423067305713085, "learning_rate": 7.038391198410868e-06, "loss": 1.4982, "step": 24790 }, { "epoch": 0.7310924369747899, "grad_norm": 2.798835328678134, "learning_rate": 7.0380819493758515e-06, "loss": 1.5006, "step": 24795 }, { "epoch": 0.7312398643667993, "grad_norm": 2.7229996770896707, "learning_rate": 7.037772603585268e-06, "loss": 1.4384, "step": 24800 }, { "epoch": 0.7313872917588088, "grad_norm": 2.8507580240194694, "learning_rate": 7.037463161048221e-06, "loss": 1.4571, "step": 24805 }, { "epoch": 0.7315347191508182, "grad_norm": 2.8114540283368656, "learning_rate": 7.037153621773818e-06, "loss": 1.4366, "step": 24810 }, { "epoch": 0.7316821465428277, "grad_norm": 2.8297245717810053, "learning_rate": 7.036843985771165e-06, "loss": 1.4818, "step": 24815 }, { "epoch": 0.731829573934837, "grad_norm": 2.885035489100148, "learning_rate": 7.036534253049376e-06, "loss": 1.4549, "step": 24820 }, { "epoch": 0.7319770013268465, "grad_norm": 2.760053354333667, "learning_rate": 7.036224423617563e-06, "loss": 1.4798, "step": 24825 }, { "epoch": 0.732124428718856, "grad_norm": 2.9572654128220175, "learning_rate": 7.035914497484844e-06, "loss": 1.4806, "step": 24830 }, { "epoch": 0.7322718561108654, "grad_norm": 2.887547493355774, "learning_rate": 7.0356044746603375e-06, "loss": 1.4775, "step": 24835 }, { "epoch": 0.7324192835028749, "grad_norm": 2.9304522439830936, "learning_rate": 7.035294355153168e-06, "loss": 1.5064, "step": 24840 }, { "epoch": 0.7325667108948842, "grad_norm": 2.8274258544183044, "learning_rate": 7.034984138972462e-06, "loss": 1.4774, "step": 24845 }, { "epoch": 0.7327141382868937, "grad_norm": 2.900734697793523, "learning_rate": 7.034673826127346e-06, "loss": 1.4758, "step": 24850 }, { "epoch": 0.7328615656789031, "grad_norm": 2.989160494907921, "learning_rate": 7.034363416626952e-06, "loss": 1.5146, "step": 24855 }, { "epoch": 0.7330089930709126, "grad_norm": 2.9801096577672723, "learning_rate": 7.034052910480413e-06, "loss": 1.4799, "step": 24860 }, { "epoch": 0.7331564204629221, "grad_norm": 2.729698846584364, "learning_rate": 7.033742307696869e-06, "loss": 1.4764, "step": 24865 }, { "epoch": 0.7333038478549314, "grad_norm": 2.9973174182258013, "learning_rate": 7.033431608285457e-06, "loss": 1.4876, "step": 24870 }, { "epoch": 0.7334512752469409, "grad_norm": 2.707339090991606, "learning_rate": 7.033120812255321e-06, "loss": 1.45, "step": 24875 }, { "epoch": 0.7335987026389503, "grad_norm": 2.8140832438494034, "learning_rate": 7.032809919615606e-06, "loss": 1.5026, "step": 24880 }, { "epoch": 0.7337461300309598, "grad_norm": 2.862464525236749, "learning_rate": 7.03249893037546e-06, "loss": 1.5207, "step": 24885 }, { "epoch": 0.7338935574229691, "grad_norm": 3.749683155882278, "learning_rate": 7.0321878445440354e-06, "loss": 1.476, "step": 24890 }, { "epoch": 0.7340409848149786, "grad_norm": 2.837097651373486, "learning_rate": 7.031876662130486e-06, "loss": 1.4871, "step": 24895 }, { "epoch": 0.734188412206988, "grad_norm": 2.652062434955221, "learning_rate": 7.031565383143968e-06, "loss": 1.482, "step": 24900 }, { "epoch": 0.7343358395989975, "grad_norm": 2.775473568977587, "learning_rate": 7.03125400759364e-06, "loss": 1.4492, "step": 24905 }, { "epoch": 0.734483266991007, "grad_norm": 2.9539055973371684, "learning_rate": 7.030942535488667e-06, "loss": 1.4844, "step": 24910 }, { "epoch": 0.7346306943830163, "grad_norm": 2.8839284094553093, "learning_rate": 7.0306309668382144e-06, "loss": 1.4748, "step": 24915 }, { "epoch": 0.7347781217750258, "grad_norm": 2.906910316178485, "learning_rate": 7.030319301651449e-06, "loss": 1.4744, "step": 24920 }, { "epoch": 0.7349255491670352, "grad_norm": 2.9733505489676704, "learning_rate": 7.030007539937541e-06, "loss": 1.5155, "step": 24925 }, { "epoch": 0.7350729765590447, "grad_norm": 2.881309244288626, "learning_rate": 7.029695681705668e-06, "loss": 1.4711, "step": 24930 }, { "epoch": 0.7352204039510541, "grad_norm": 2.900262599375746, "learning_rate": 7.029383726965003e-06, "loss": 1.4903, "step": 24935 }, { "epoch": 0.7353678313430635, "grad_norm": 2.7508430970434916, "learning_rate": 7.029071675724727e-06, "loss": 1.4995, "step": 24940 }, { "epoch": 0.735515258735073, "grad_norm": 2.963740546345291, "learning_rate": 7.028759527994024e-06, "loss": 1.4849, "step": 24945 }, { "epoch": 0.7356626861270824, "grad_norm": 2.9503237361277423, "learning_rate": 7.028447283782077e-06, "loss": 1.4622, "step": 24950 }, { "epoch": 0.7358101135190919, "grad_norm": 2.7709999540735994, "learning_rate": 7.028134943098075e-06, "loss": 1.5036, "step": 24955 }, { "epoch": 0.7359575409111013, "grad_norm": 2.894081378874065, "learning_rate": 7.02782250595121e-06, "loss": 1.5099, "step": 24960 }, { "epoch": 0.7361049683031107, "grad_norm": 2.9434156301592638, "learning_rate": 7.027509972350674e-06, "loss": 1.5203, "step": 24965 }, { "epoch": 0.7362523956951201, "grad_norm": 2.82824178843836, "learning_rate": 7.027197342305665e-06, "loss": 1.5273, "step": 24970 }, { "epoch": 0.7363998230871296, "grad_norm": 2.8163170465663336, "learning_rate": 7.026884615825381e-06, "loss": 1.4689, "step": 24975 }, { "epoch": 0.736547250479139, "grad_norm": 2.7776304738150555, "learning_rate": 7.026571792919027e-06, "loss": 1.4977, "step": 24980 }, { "epoch": 0.7366946778711485, "grad_norm": 2.7812383583935536, "learning_rate": 7.026258873595806e-06, "loss": 1.4259, "step": 24985 }, { "epoch": 0.7368421052631579, "grad_norm": 2.9292339196576282, "learning_rate": 7.025945857864925e-06, "loss": 1.5131, "step": 24990 }, { "epoch": 0.7369895326551673, "grad_norm": 2.733582567258752, "learning_rate": 7.025632745735598e-06, "loss": 1.4864, "step": 24995 }, { "epoch": 0.7371369600471768, "grad_norm": 2.9578764052021476, "learning_rate": 7.025319537217037e-06, "loss": 1.5111, "step": 25000 }, { "epoch": 0.7371369600471768, "eval_loss": 1.1622768640518188, "eval_runtime": 4.3068, "eval_samples_per_second": 91.948, "eval_steps_per_second": 3.018, "step": 25000 }, { "epoch": 0.7372843874391862, "grad_norm": 2.83780262502548, "learning_rate": 7.025006232318459e-06, "loss": 1.5041, "step": 25005 }, { "epoch": 0.7374318148311957, "grad_norm": 2.779588761874096, "learning_rate": 7.024692831049082e-06, "loss": 1.4516, "step": 25010 }, { "epoch": 0.737579242223205, "grad_norm": 3.0597826104990733, "learning_rate": 7.02437933341813e-06, "loss": 1.4894, "step": 25015 }, { "epoch": 0.7377266696152145, "grad_norm": 2.8660236764141125, "learning_rate": 7.024065739434826e-06, "loss": 1.4366, "step": 25020 }, { "epoch": 0.737874097007224, "grad_norm": 2.8398929631049614, "learning_rate": 7.023752049108399e-06, "loss": 1.4558, "step": 25025 }, { "epoch": 0.7380215243992334, "grad_norm": 2.805546638772023, "learning_rate": 7.02343826244808e-06, "loss": 1.4921, "step": 25030 }, { "epoch": 0.7381689517912429, "grad_norm": 2.7285992875385356, "learning_rate": 7.023124379463104e-06, "loss": 1.466, "step": 25035 }, { "epoch": 0.7383163791832522, "grad_norm": 2.9543021996258596, "learning_rate": 7.022810400162704e-06, "loss": 1.4901, "step": 25040 }, { "epoch": 0.7384638065752617, "grad_norm": 2.8521258421677076, "learning_rate": 7.02249632455612e-06, "loss": 1.5321, "step": 25045 }, { "epoch": 0.7386112339672711, "grad_norm": 2.926170442104902, "learning_rate": 7.022182152652596e-06, "loss": 1.543, "step": 25050 }, { "epoch": 0.7387586613592806, "grad_norm": 2.846054756487171, "learning_rate": 7.021867884461376e-06, "loss": 1.4367, "step": 25055 }, { "epoch": 0.7389060887512899, "grad_norm": 2.9309571407067305, "learning_rate": 7.021553519991707e-06, "loss": 1.4719, "step": 25060 }, { "epoch": 0.7390535161432994, "grad_norm": 3.1154695836192006, "learning_rate": 7.0212390592528395e-06, "loss": 1.532, "step": 25065 }, { "epoch": 0.7392009435353089, "grad_norm": 2.884786384270265, "learning_rate": 7.020924502254028e-06, "loss": 1.5033, "step": 25070 }, { "epoch": 0.7393483709273183, "grad_norm": 2.8105221448216233, "learning_rate": 7.020609849004528e-06, "loss": 1.4508, "step": 25075 }, { "epoch": 0.7394957983193278, "grad_norm": 2.9088049842410864, "learning_rate": 7.020295099513598e-06, "loss": 1.4761, "step": 25080 }, { "epoch": 0.7396432257113371, "grad_norm": 2.9069364844605845, "learning_rate": 7.019980253790501e-06, "loss": 1.5113, "step": 25085 }, { "epoch": 0.7397906531033466, "grad_norm": 3.046783912653419, "learning_rate": 7.019665311844501e-06, "loss": 1.5058, "step": 25090 }, { "epoch": 0.739938080495356, "grad_norm": 3.6173069050202975, "learning_rate": 7.0193502736848655e-06, "loss": 1.528, "step": 25095 }, { "epoch": 0.7400855078873655, "grad_norm": 2.752057939367429, "learning_rate": 7.019035139320864e-06, "loss": 1.4739, "step": 25100 }, { "epoch": 0.740232935279375, "grad_norm": 2.792468604458577, "learning_rate": 7.018719908761773e-06, "loss": 1.5176, "step": 25105 }, { "epoch": 0.7403803626713843, "grad_norm": 2.7333441253553303, "learning_rate": 7.018404582016866e-06, "loss": 1.4669, "step": 25110 }, { "epoch": 0.7405277900633938, "grad_norm": 2.8109650392107417, "learning_rate": 7.01808915909542e-06, "loss": 1.5374, "step": 25115 }, { "epoch": 0.7406752174554032, "grad_norm": 2.708027906494191, "learning_rate": 7.017773640006719e-06, "loss": 1.4632, "step": 25120 }, { "epoch": 0.7408226448474127, "grad_norm": 2.9632734857124507, "learning_rate": 7.017458024760048e-06, "loss": 1.5529, "step": 25125 }, { "epoch": 0.7409700722394221, "grad_norm": 2.760165773454496, "learning_rate": 7.017142313364694e-06, "loss": 1.4837, "step": 25130 }, { "epoch": 0.7411174996314315, "grad_norm": 2.8581532350486736, "learning_rate": 7.016826505829946e-06, "loss": 1.4859, "step": 25135 }, { "epoch": 0.7412649270234409, "grad_norm": 2.8609977663336625, "learning_rate": 7.0165106021650975e-06, "loss": 1.469, "step": 25140 }, { "epoch": 0.7414123544154504, "grad_norm": 2.7734351993290223, "learning_rate": 7.016194602379445e-06, "loss": 1.4614, "step": 25145 }, { "epoch": 0.7415597818074599, "grad_norm": 2.7623555042786405, "learning_rate": 7.015878506482288e-06, "loss": 1.458, "step": 25150 }, { "epoch": 0.7417072091994693, "grad_norm": 2.7817070421835783, "learning_rate": 7.015562314482924e-06, "loss": 1.4759, "step": 25155 }, { "epoch": 0.7418546365914787, "grad_norm": 2.853603224056613, "learning_rate": 7.015246026390662e-06, "loss": 1.4857, "step": 25160 }, { "epoch": 0.7420020639834881, "grad_norm": 2.923610414827752, "learning_rate": 7.014929642214807e-06, "loss": 1.5021, "step": 25165 }, { "epoch": 0.7421494913754976, "grad_norm": 2.8323160299633434, "learning_rate": 7.014613161964668e-06, "loss": 1.5078, "step": 25170 }, { "epoch": 0.742296918767507, "grad_norm": 2.685484892008227, "learning_rate": 7.01429658564956e-06, "loss": 1.4863, "step": 25175 }, { "epoch": 0.7424443461595165, "grad_norm": 2.8379762490391243, "learning_rate": 7.013979913278797e-06, "loss": 1.4589, "step": 25180 }, { "epoch": 0.7425917735515258, "grad_norm": 2.837547875337471, "learning_rate": 7.013663144861698e-06, "loss": 1.5223, "step": 25185 }, { "epoch": 0.7427392009435353, "grad_norm": 2.942610829501263, "learning_rate": 7.013346280407584e-06, "loss": 1.4977, "step": 25190 }, { "epoch": 0.7428866283355448, "grad_norm": 2.8029381628317274, "learning_rate": 7.0130293199257805e-06, "loss": 1.5095, "step": 25195 }, { "epoch": 0.7430340557275542, "grad_norm": 2.747863428740777, "learning_rate": 7.012712263425612e-06, "loss": 1.4857, "step": 25200 }, { "epoch": 0.7431814831195637, "grad_norm": 2.9502070837667147, "learning_rate": 7.01239511091641e-06, "loss": 1.4967, "step": 25205 }, { "epoch": 0.743328910511573, "grad_norm": 2.9072556679391877, "learning_rate": 7.012077862407507e-06, "loss": 1.534, "step": 25210 }, { "epoch": 0.7434763379035825, "grad_norm": 2.8364808263303947, "learning_rate": 7.011760517908237e-06, "loss": 1.4878, "step": 25215 }, { "epoch": 0.7436237652955919, "grad_norm": 2.76859300260514, "learning_rate": 7.01144307742794e-06, "loss": 1.5244, "step": 25220 }, { "epoch": 0.7437711926876014, "grad_norm": 2.785899116912207, "learning_rate": 7.011125540975957e-06, "loss": 1.4783, "step": 25225 }, { "epoch": 0.7439186200796107, "grad_norm": 2.7777423307958764, "learning_rate": 7.01080790856163e-06, "loss": 1.4732, "step": 25230 }, { "epoch": 0.7440660474716202, "grad_norm": 2.7390490976073267, "learning_rate": 7.010490180194307e-06, "loss": 1.4705, "step": 25235 }, { "epoch": 0.7442134748636297, "grad_norm": 2.8133682713969748, "learning_rate": 7.010172355883337e-06, "loss": 1.4767, "step": 25240 }, { "epoch": 0.7443609022556391, "grad_norm": 2.9213541889324817, "learning_rate": 7.009854435638074e-06, "loss": 1.4931, "step": 25245 }, { "epoch": 0.7445083296476486, "grad_norm": 2.7225429682034963, "learning_rate": 7.009536419467871e-06, "loss": 1.469, "step": 25250 }, { "epoch": 0.7446557570396579, "grad_norm": 2.8431305523680432, "learning_rate": 7.009218307382087e-06, "loss": 1.4996, "step": 25255 }, { "epoch": 0.7448031844316674, "grad_norm": 2.6888967449462173, "learning_rate": 7.008900099390081e-06, "loss": 1.5034, "step": 25260 }, { "epoch": 0.7449506118236768, "grad_norm": 2.8023762842416877, "learning_rate": 7.008581795501222e-06, "loss": 1.4742, "step": 25265 }, { "epoch": 0.7450980392156863, "grad_norm": 2.785616417996403, "learning_rate": 7.008263395724869e-06, "loss": 1.4951, "step": 25270 }, { "epoch": 0.7452454666076958, "grad_norm": 3.0020143727912116, "learning_rate": 7.007944900070397e-06, "loss": 1.4657, "step": 25275 }, { "epoch": 0.7453928939997051, "grad_norm": 2.915583788249511, "learning_rate": 7.007626308547176e-06, "loss": 1.5196, "step": 25280 }, { "epoch": 0.7455403213917146, "grad_norm": 2.7420603847966603, "learning_rate": 7.007307621164581e-06, "loss": 1.4214, "step": 25285 }, { "epoch": 0.745687748783724, "grad_norm": 3.024998731972462, "learning_rate": 7.0069888379319896e-06, "loss": 1.5189, "step": 25290 }, { "epoch": 0.7458351761757335, "grad_norm": 2.854567601659568, "learning_rate": 7.006669958858783e-06, "loss": 1.5304, "step": 25295 }, { "epoch": 0.7459826035677429, "grad_norm": 2.770622606763596, "learning_rate": 7.006350983954344e-06, "loss": 1.5058, "step": 25300 }, { "epoch": 0.7461300309597523, "grad_norm": 2.8242547591229976, "learning_rate": 7.006031913228058e-06, "loss": 1.4745, "step": 25305 }, { "epoch": 0.7462774583517617, "grad_norm": 2.7745763442579388, "learning_rate": 7.005712746689315e-06, "loss": 1.4699, "step": 25310 }, { "epoch": 0.7464248857437712, "grad_norm": 2.8526046097722935, "learning_rate": 7.005393484347508e-06, "loss": 1.5342, "step": 25315 }, { "epoch": 0.7465723131357807, "grad_norm": 2.812918521300187, "learning_rate": 7.00507412621203e-06, "loss": 1.4831, "step": 25320 }, { "epoch": 0.7467197405277901, "grad_norm": 2.862648858337889, "learning_rate": 7.004754672292278e-06, "loss": 1.4672, "step": 25325 }, { "epoch": 0.7468671679197995, "grad_norm": 2.814518389227755, "learning_rate": 7.004435122597654e-06, "loss": 1.4546, "step": 25330 }, { "epoch": 0.7470145953118089, "grad_norm": 2.9613255259241624, "learning_rate": 7.00411547713756e-06, "loss": 1.4786, "step": 25335 }, { "epoch": 0.7471620227038184, "grad_norm": 2.7853180523816814, "learning_rate": 7.003795735921403e-06, "loss": 1.4848, "step": 25340 }, { "epoch": 0.7473094500958278, "grad_norm": 2.9092368384728755, "learning_rate": 7.003475898958591e-06, "loss": 1.564, "step": 25345 }, { "epoch": 0.7474568774878373, "grad_norm": 2.8883039580381027, "learning_rate": 7.003155966258535e-06, "loss": 1.5052, "step": 25350 }, { "epoch": 0.7476043048798466, "grad_norm": 2.8896275406571186, "learning_rate": 7.002835937830651e-06, "loss": 1.4937, "step": 25355 }, { "epoch": 0.7477517322718561, "grad_norm": 2.736210554594489, "learning_rate": 7.0025158136843534e-06, "loss": 1.5192, "step": 25360 }, { "epoch": 0.7478991596638656, "grad_norm": 2.769122467239059, "learning_rate": 7.002195593829066e-06, "loss": 1.4176, "step": 25365 }, { "epoch": 0.748046587055875, "grad_norm": 2.8351126421562625, "learning_rate": 7.001875278274208e-06, "loss": 1.4798, "step": 25370 }, { "epoch": 0.7481940144478845, "grad_norm": 3.056621233892507, "learning_rate": 7.001554867029207e-06, "loss": 1.5009, "step": 25375 }, { "epoch": 0.7483414418398938, "grad_norm": 2.819400057783383, "learning_rate": 7.001234360103492e-06, "loss": 1.5064, "step": 25380 }, { "epoch": 0.7484888692319033, "grad_norm": 2.832852013488807, "learning_rate": 7.0009137575064935e-06, "loss": 1.5381, "step": 25385 }, { "epoch": 0.7486362966239127, "grad_norm": 2.832585177522869, "learning_rate": 7.000593059247644e-06, "loss": 1.4644, "step": 25390 }, { "epoch": 0.7487837240159222, "grad_norm": 2.753713336463973, "learning_rate": 7.000272265336383e-06, "loss": 1.4608, "step": 25395 }, { "epoch": 0.7489311514079315, "grad_norm": 2.867657426257419, "learning_rate": 6.99995137578215e-06, "loss": 1.4861, "step": 25400 }, { "epoch": 0.749078578799941, "grad_norm": 2.7661327475233892, "learning_rate": 6.999630390594385e-06, "loss": 1.4785, "step": 25405 }, { "epoch": 0.7492260061919505, "grad_norm": 2.9295161612347678, "learning_rate": 6.9993093097825355e-06, "loss": 1.4573, "step": 25410 }, { "epoch": 0.7493734335839599, "grad_norm": 2.9419917715690866, "learning_rate": 6.99898813335605e-06, "loss": 1.4971, "step": 25415 }, { "epoch": 0.7495208609759694, "grad_norm": 2.8873009657906117, "learning_rate": 6.9986668613243784e-06, "loss": 1.4723, "step": 25420 }, { "epoch": 0.7496682883679787, "grad_norm": 3.0306669820395946, "learning_rate": 6.9983454936969744e-06, "loss": 1.4865, "step": 25425 }, { "epoch": 0.7498157157599882, "grad_norm": 2.966183940738294, "learning_rate": 6.998024030483296e-06, "loss": 1.5173, "step": 25430 }, { "epoch": 0.7499631431519976, "grad_norm": 2.852309367763193, "learning_rate": 6.997702471692801e-06, "loss": 1.5251, "step": 25435 }, { "epoch": 0.7501105705440071, "grad_norm": 2.8470553435846133, "learning_rate": 6.997380817334952e-06, "loss": 1.4988, "step": 25440 }, { "epoch": 0.7502579979360166, "grad_norm": 2.925434555903916, "learning_rate": 6.9970590674192146e-06, "loss": 1.4515, "step": 25445 }, { "epoch": 0.7504054253280259, "grad_norm": 2.928978994767133, "learning_rate": 6.996737221955056e-06, "loss": 1.5119, "step": 25450 }, { "epoch": 0.7505528527200354, "grad_norm": 2.887568929956012, "learning_rate": 6.996415280951948e-06, "loss": 1.4905, "step": 25455 }, { "epoch": 0.7507002801120448, "grad_norm": 2.9137223232210077, "learning_rate": 6.996093244419363e-06, "loss": 1.4334, "step": 25460 }, { "epoch": 0.7508477075040543, "grad_norm": 2.8402121285952378, "learning_rate": 6.995771112366777e-06, "loss": 1.4863, "step": 25465 }, { "epoch": 0.7509951348960637, "grad_norm": 2.765477297028653, "learning_rate": 6.995448884803671e-06, "loss": 1.4826, "step": 25470 }, { "epoch": 0.7511425622880731, "grad_norm": 2.801354828562709, "learning_rate": 6.995126561739524e-06, "loss": 1.4459, "step": 25475 }, { "epoch": 0.7512899896800825, "grad_norm": 2.836809354477069, "learning_rate": 6.994804143183824e-06, "loss": 1.5106, "step": 25480 }, { "epoch": 0.751437417072092, "grad_norm": 2.7888977659798475, "learning_rate": 6.994481629146056e-06, "loss": 1.5203, "step": 25485 }, { "epoch": 0.7515848444641015, "grad_norm": 2.867274919385323, "learning_rate": 6.994159019635712e-06, "loss": 1.5426, "step": 25490 }, { "epoch": 0.7517322718561109, "grad_norm": 3.2014510477765055, "learning_rate": 6.993836314662283e-06, "loss": 1.5018, "step": 25495 }, { "epoch": 0.7518796992481203, "grad_norm": 2.814366970312873, "learning_rate": 6.993513514235268e-06, "loss": 1.4707, "step": 25500 }, { "epoch": 0.7518796992481203, "eval_loss": 1.160471796989441, "eval_runtime": 4.1803, "eval_samples_per_second": 94.73, "eval_steps_per_second": 3.11, "step": 25500 }, { "epoch": 0.7520271266401297, "grad_norm": 2.763487673142702, "learning_rate": 6.993190618364164e-06, "loss": 1.5792, "step": 25505 }, { "epoch": 0.7521745540321392, "grad_norm": 2.722770535520587, "learning_rate": 6.992867627058472e-06, "loss": 1.4817, "step": 25510 }, { "epoch": 0.7523219814241486, "grad_norm": 2.924309774789344, "learning_rate": 6.992544540327697e-06, "loss": 1.4956, "step": 25515 }, { "epoch": 0.7524694088161581, "grad_norm": 2.834063529085765, "learning_rate": 6.992221358181347e-06, "loss": 1.5272, "step": 25520 }, { "epoch": 0.7526168362081674, "grad_norm": 2.829060580174692, "learning_rate": 6.991898080628931e-06, "loss": 1.4763, "step": 25525 }, { "epoch": 0.7527642636001769, "grad_norm": 2.732025508271198, "learning_rate": 6.9915747076799616e-06, "loss": 1.4359, "step": 25530 }, { "epoch": 0.7529116909921864, "grad_norm": 2.677136064143959, "learning_rate": 6.991251239343956e-06, "loss": 1.5031, "step": 25535 }, { "epoch": 0.7530591183841958, "grad_norm": 2.92561909803991, "learning_rate": 6.9909276756304305e-06, "loss": 1.4823, "step": 25540 }, { "epoch": 0.7532065457762053, "grad_norm": 2.852503427639451, "learning_rate": 6.990604016548908e-06, "loss": 1.4017, "step": 25545 }, { "epoch": 0.7533539731682146, "grad_norm": 2.8790579595598578, "learning_rate": 6.990280262108912e-06, "loss": 1.5412, "step": 25550 }, { "epoch": 0.7535014005602241, "grad_norm": 2.8522435924590597, "learning_rate": 6.989956412319968e-06, "loss": 1.4695, "step": 25555 }, { "epoch": 0.7536488279522335, "grad_norm": 2.976995167729614, "learning_rate": 6.989632467191609e-06, "loss": 1.4767, "step": 25560 }, { "epoch": 0.753796255344243, "grad_norm": 3.2907461160696783, "learning_rate": 6.989308426733364e-06, "loss": 1.4828, "step": 25565 }, { "epoch": 0.7539436827362523, "grad_norm": 2.9998155608771264, "learning_rate": 6.98898429095477e-06, "loss": 1.5217, "step": 25570 }, { "epoch": 0.7540911101282618, "grad_norm": 2.705290883732631, "learning_rate": 6.988660059865365e-06, "loss": 1.4945, "step": 25575 }, { "epoch": 0.7542385375202713, "grad_norm": 2.8079001042391845, "learning_rate": 6.988335733474689e-06, "loss": 1.4979, "step": 25580 }, { "epoch": 0.7543859649122807, "grad_norm": 2.738822357677438, "learning_rate": 6.9880113117922866e-06, "loss": 1.4891, "step": 25585 }, { "epoch": 0.7545333923042902, "grad_norm": 2.8906657807885767, "learning_rate": 6.987686794827704e-06, "loss": 1.4765, "step": 25590 }, { "epoch": 0.7546808196962995, "grad_norm": 2.823217816526984, "learning_rate": 6.98736218259049e-06, "loss": 1.4895, "step": 25595 }, { "epoch": 0.754828247088309, "grad_norm": 2.7105811844542957, "learning_rate": 6.987037475090197e-06, "loss": 1.4608, "step": 25600 }, { "epoch": 0.7549756744803184, "grad_norm": 2.7192691750933364, "learning_rate": 6.98671267233638e-06, "loss": 1.483, "step": 25605 }, { "epoch": 0.7551231018723279, "grad_norm": 2.8099798667879425, "learning_rate": 6.986387774338598e-06, "loss": 1.4417, "step": 25610 }, { "epoch": 0.7552705292643374, "grad_norm": 2.955437677186407, "learning_rate": 6.986062781106409e-06, "loss": 1.4836, "step": 25615 }, { "epoch": 0.7554179566563467, "grad_norm": 2.832695863370349, "learning_rate": 6.985737692649377e-06, "loss": 1.4894, "step": 25620 }, { "epoch": 0.7555653840483562, "grad_norm": 2.876575992959894, "learning_rate": 6.9854125089770696e-06, "loss": 1.4731, "step": 25625 }, { "epoch": 0.7557128114403656, "grad_norm": 2.760630824536897, "learning_rate": 6.9850872300990545e-06, "loss": 1.505, "step": 25630 }, { "epoch": 0.7558602388323751, "grad_norm": 2.823997071914026, "learning_rate": 6.984761856024902e-06, "loss": 1.5169, "step": 25635 }, { "epoch": 0.7560076662243845, "grad_norm": 2.98651729541477, "learning_rate": 6.984436386764191e-06, "loss": 1.4846, "step": 25640 }, { "epoch": 0.7561550936163939, "grad_norm": 2.845689341649826, "learning_rate": 6.984110822326494e-06, "loss": 1.5371, "step": 25645 }, { "epoch": 0.7563025210084033, "grad_norm": 2.9411911386129477, "learning_rate": 6.9837851627213935e-06, "loss": 1.5223, "step": 25650 }, { "epoch": 0.7564499484004128, "grad_norm": 2.866860308111417, "learning_rate": 6.983459407958473e-06, "loss": 1.5236, "step": 25655 }, { "epoch": 0.7565973757924223, "grad_norm": 2.900214598318338, "learning_rate": 6.983133558047315e-06, "loss": 1.491, "step": 25660 }, { "epoch": 0.7567448031844317, "grad_norm": 2.9964155209137275, "learning_rate": 6.982807612997511e-06, "loss": 1.5292, "step": 25665 }, { "epoch": 0.7568922305764411, "grad_norm": 2.71459935557975, "learning_rate": 6.982481572818653e-06, "loss": 1.513, "step": 25670 }, { "epoch": 0.7570396579684505, "grad_norm": 2.800015100533776, "learning_rate": 6.982155437520332e-06, "loss": 1.5546, "step": 25675 }, { "epoch": 0.75718708536046, "grad_norm": 2.8460766382478275, "learning_rate": 6.981829207112148e-06, "loss": 1.4465, "step": 25680 }, { "epoch": 0.7573345127524694, "grad_norm": 2.8468215638144927, "learning_rate": 6.981502881603698e-06, "loss": 1.488, "step": 25685 }, { "epoch": 0.7574819401444789, "grad_norm": 2.8060909752553833, "learning_rate": 6.981176461004586e-06, "loss": 1.4701, "step": 25690 }, { "epoch": 0.7576293675364882, "grad_norm": 3.0375690984147896, "learning_rate": 6.980849945324417e-06, "loss": 1.4403, "step": 25695 }, { "epoch": 0.7577767949284977, "grad_norm": 2.777367262853504, "learning_rate": 6.980523334572799e-06, "loss": 1.5415, "step": 25700 }, { "epoch": 0.7579242223205072, "grad_norm": 3.07021890175002, "learning_rate": 6.980196628759344e-06, "loss": 1.5188, "step": 25705 }, { "epoch": 0.7580716497125166, "grad_norm": 2.8573898973666836, "learning_rate": 6.979869827893664e-06, "loss": 1.4692, "step": 25710 }, { "epoch": 0.7582190771045261, "grad_norm": 2.740548406963244, "learning_rate": 6.979542931985376e-06, "loss": 1.4692, "step": 25715 }, { "epoch": 0.7583665044965354, "grad_norm": 2.7659299045634285, "learning_rate": 6.979215941044098e-06, "loss": 1.5493, "step": 25720 }, { "epoch": 0.7585139318885449, "grad_norm": 2.9179294297032468, "learning_rate": 6.978888855079456e-06, "loss": 1.5139, "step": 25725 }, { "epoch": 0.7586613592805543, "grad_norm": 2.94493188820867, "learning_rate": 6.978561674101072e-06, "loss": 1.5417, "step": 25730 }, { "epoch": 0.7588087866725638, "grad_norm": 2.743220692385073, "learning_rate": 6.978234398118574e-06, "loss": 1.4819, "step": 25735 }, { "epoch": 0.7589562140645731, "grad_norm": 2.9181408658649315, "learning_rate": 6.9779070271415925e-06, "loss": 1.5472, "step": 25740 }, { "epoch": 0.7591036414565826, "grad_norm": 2.853644963974301, "learning_rate": 6.9775795611797605e-06, "loss": 1.4837, "step": 25745 }, { "epoch": 0.7592510688485921, "grad_norm": 2.838674359217411, "learning_rate": 6.977252000242714e-06, "loss": 1.4389, "step": 25750 }, { "epoch": 0.7593984962406015, "grad_norm": 2.8936747175654403, "learning_rate": 6.9769243443400925e-06, "loss": 1.5019, "step": 25755 }, { "epoch": 0.759545923632611, "grad_norm": 2.697672743514839, "learning_rate": 6.976596593481538e-06, "loss": 1.451, "step": 25760 }, { "epoch": 0.7596933510246203, "grad_norm": 2.7896262928284403, "learning_rate": 6.976268747676694e-06, "loss": 1.4931, "step": 25765 }, { "epoch": 0.7598407784166298, "grad_norm": 2.8099592625287797, "learning_rate": 6.975940806935208e-06, "loss": 1.4549, "step": 25770 }, { "epoch": 0.7599882058086392, "grad_norm": 2.846843720336315, "learning_rate": 6.97561277126673e-06, "loss": 1.4758, "step": 25775 }, { "epoch": 0.7601356332006487, "grad_norm": 2.726261216511844, "learning_rate": 6.975284640680914e-06, "loss": 1.5017, "step": 25780 }, { "epoch": 0.7602830605926582, "grad_norm": 2.8283602260889404, "learning_rate": 6.974956415187413e-06, "loss": 1.5138, "step": 25785 }, { "epoch": 0.7604304879846675, "grad_norm": 2.7282264044124727, "learning_rate": 6.974628094795888e-06, "loss": 1.539, "step": 25790 }, { "epoch": 0.760577915376677, "grad_norm": 2.8459808150833275, "learning_rate": 6.9742996795159994e-06, "loss": 1.5054, "step": 25795 }, { "epoch": 0.7607253427686864, "grad_norm": 2.8760677794538902, "learning_rate": 6.97397116935741e-06, "loss": 1.5064, "step": 25800 }, { "epoch": 0.7608727701606959, "grad_norm": 2.894412537149197, "learning_rate": 6.973642564329788e-06, "loss": 1.4986, "step": 25805 }, { "epoch": 0.7610201975527053, "grad_norm": 2.7913384186615215, "learning_rate": 6.973313864442803e-06, "loss": 1.5009, "step": 25810 }, { "epoch": 0.7611676249447147, "grad_norm": 2.766051220029901, "learning_rate": 6.9729850697061256e-06, "loss": 1.4906, "step": 25815 }, { "epoch": 0.7613150523367241, "grad_norm": 2.856633633354801, "learning_rate": 6.972656180129433e-06, "loss": 1.4823, "step": 25820 }, { "epoch": 0.7614624797287336, "grad_norm": 2.8922254610476035, "learning_rate": 6.972327195722403e-06, "loss": 1.5304, "step": 25825 }, { "epoch": 0.7616099071207431, "grad_norm": 2.7510086950337125, "learning_rate": 6.971998116494715e-06, "loss": 1.4856, "step": 25830 }, { "epoch": 0.7617573345127525, "grad_norm": 2.7565824716838723, "learning_rate": 6.9716689424560534e-06, "loss": 1.482, "step": 25835 }, { "epoch": 0.7619047619047619, "grad_norm": 2.9814652512973296, "learning_rate": 6.9713396736161045e-06, "loss": 1.5087, "step": 25840 }, { "epoch": 0.7620521892967713, "grad_norm": 2.7418994926107727, "learning_rate": 6.971010309984557e-06, "loss": 1.4277, "step": 25845 }, { "epoch": 0.7621996166887808, "grad_norm": 2.7841557748202352, "learning_rate": 6.970680851571104e-06, "loss": 1.4914, "step": 25850 }, { "epoch": 0.7623470440807902, "grad_norm": 2.8373583959346904, "learning_rate": 6.9703512983854385e-06, "loss": 1.469, "step": 25855 }, { "epoch": 0.7624944714727997, "grad_norm": 3.1551169773635572, "learning_rate": 6.970021650437259e-06, "loss": 1.4795, "step": 25860 }, { "epoch": 0.762641898864809, "grad_norm": 2.7717800391756753, "learning_rate": 6.9696919077362655e-06, "loss": 1.4344, "step": 25865 }, { "epoch": 0.7627893262568185, "grad_norm": 2.8110944794923114, "learning_rate": 6.9693620702921615e-06, "loss": 1.476, "step": 25870 }, { "epoch": 0.762936753648828, "grad_norm": 2.8075091808943404, "learning_rate": 6.969032138114653e-06, "loss": 1.5175, "step": 25875 }, { "epoch": 0.7630841810408374, "grad_norm": 2.950123806880235, "learning_rate": 6.968702111213446e-06, "loss": 1.4968, "step": 25880 }, { "epoch": 0.7632316084328469, "grad_norm": 2.8449718049830546, "learning_rate": 6.968371989598256e-06, "loss": 1.5234, "step": 25885 }, { "epoch": 0.7633790358248562, "grad_norm": 2.847964110638496, "learning_rate": 6.968041773278794e-06, "loss": 1.4796, "step": 25890 }, { "epoch": 0.7635264632168657, "grad_norm": 2.8724346093146096, "learning_rate": 6.967711462264779e-06, "loss": 1.4837, "step": 25895 }, { "epoch": 0.7636738906088751, "grad_norm": 2.8269333277520428, "learning_rate": 6.96738105656593e-06, "loss": 1.5153, "step": 25900 }, { "epoch": 0.7638213180008846, "grad_norm": 2.8578355734561347, "learning_rate": 6.9670505561919685e-06, "loss": 1.5341, "step": 25905 }, { "epoch": 0.7639687453928939, "grad_norm": 2.9913123066347773, "learning_rate": 6.966719961152622e-06, "loss": 1.5758, "step": 25910 }, { "epoch": 0.7641161727849034, "grad_norm": 2.7685009753203347, "learning_rate": 6.966389271457617e-06, "loss": 1.485, "step": 25915 }, { "epoch": 0.7642636001769129, "grad_norm": 3.014691320629421, "learning_rate": 6.9660584871166855e-06, "loss": 1.4968, "step": 25920 }, { "epoch": 0.7644110275689223, "grad_norm": 2.9026109769642336, "learning_rate": 6.96572760813956e-06, "loss": 1.5561, "step": 25925 }, { "epoch": 0.7645584549609318, "grad_norm": 2.84904750022899, "learning_rate": 6.965396634535978e-06, "loss": 1.4995, "step": 25930 }, { "epoch": 0.7647058823529411, "grad_norm": 2.824221929506519, "learning_rate": 6.9650655663156784e-06, "loss": 1.3997, "step": 25935 }, { "epoch": 0.7648533097449506, "grad_norm": 2.729113689106629, "learning_rate": 6.964734403488403e-06, "loss": 1.5453, "step": 25940 }, { "epoch": 0.76500073713696, "grad_norm": 2.893912225724627, "learning_rate": 6.9644031460638976e-06, "loss": 1.475, "step": 25945 }, { "epoch": 0.7651481645289695, "grad_norm": 2.9142254116118402, "learning_rate": 6.964071794051909e-06, "loss": 1.5023, "step": 25950 }, { "epoch": 0.765295591920979, "grad_norm": 2.8138583253690452, "learning_rate": 6.963740347462187e-06, "loss": 1.4821, "step": 25955 }, { "epoch": 0.7654430193129883, "grad_norm": 2.810018886403417, "learning_rate": 6.9634088063044856e-06, "loss": 1.5156, "step": 25960 }, { "epoch": 0.7655904467049978, "grad_norm": 2.8367486271243765, "learning_rate": 6.963077170588562e-06, "loss": 1.5027, "step": 25965 }, { "epoch": 0.7657378740970072, "grad_norm": 2.7981618742336956, "learning_rate": 6.962745440324172e-06, "loss": 1.4501, "step": 25970 }, { "epoch": 0.7658853014890167, "grad_norm": 2.7167378796940205, "learning_rate": 6.962413615521081e-06, "loss": 1.4576, "step": 25975 }, { "epoch": 0.7660327288810261, "grad_norm": 2.8364398187455495, "learning_rate": 6.96208169618905e-06, "loss": 1.4201, "step": 25980 }, { "epoch": 0.7661801562730355, "grad_norm": 3.046505982126823, "learning_rate": 6.961749682337846e-06, "loss": 1.4426, "step": 25985 }, { "epoch": 0.7663275836650449, "grad_norm": 2.9541216479367804, "learning_rate": 6.961417573977242e-06, "loss": 1.4849, "step": 25990 }, { "epoch": 0.7664750110570544, "grad_norm": 2.739124242075502, "learning_rate": 6.961085371117009e-06, "loss": 1.5056, "step": 25995 }, { "epoch": 0.7666224384490639, "grad_norm": 2.979079964485504, "learning_rate": 6.96075307376692e-06, "loss": 1.5201, "step": 26000 }, { "epoch": 0.7666224384490639, "eval_loss": 1.1585757732391357, "eval_runtime": 4.2883, "eval_samples_per_second": 92.345, "eval_steps_per_second": 3.032, "step": 26000 }, { "epoch": 0.7667698658410733, "grad_norm": 2.8201179311674336, "learning_rate": 6.9604206819367586e-06, "loss": 1.4632, "step": 26005 }, { "epoch": 0.7669172932330827, "grad_norm": 2.8060043636044525, "learning_rate": 6.960088195636301e-06, "loss": 1.499, "step": 26010 }, { "epoch": 0.7670647206250921, "grad_norm": 2.822684457155699, "learning_rate": 6.959755614875333e-06, "loss": 1.4523, "step": 26015 }, { "epoch": 0.7672121480171016, "grad_norm": 2.7557162036426814, "learning_rate": 6.95942293966364e-06, "loss": 1.4751, "step": 26020 }, { "epoch": 0.767359575409111, "grad_norm": 2.945129641105185, "learning_rate": 6.959090170011013e-06, "loss": 1.4885, "step": 26025 }, { "epoch": 0.7675070028011205, "grad_norm": 2.960396799745544, "learning_rate": 6.958757305927244e-06, "loss": 1.4523, "step": 26030 }, { "epoch": 0.7676544301931298, "grad_norm": 2.948680496129742, "learning_rate": 6.958424347422126e-06, "loss": 1.5083, "step": 26035 }, { "epoch": 0.7678018575851393, "grad_norm": 2.8093649361021327, "learning_rate": 6.958091294505459e-06, "loss": 1.4389, "step": 26040 }, { "epoch": 0.7679492849771488, "grad_norm": 2.863730802735602, "learning_rate": 6.957758147187041e-06, "loss": 1.4795, "step": 26045 }, { "epoch": 0.7680967123691582, "grad_norm": 2.8428232774786726, "learning_rate": 6.9574249054766785e-06, "loss": 1.4866, "step": 26050 }, { "epoch": 0.7682441397611677, "grad_norm": 2.8377202060762983, "learning_rate": 6.957091569384174e-06, "loss": 1.5022, "step": 26055 }, { "epoch": 0.768391567153177, "grad_norm": 2.9390691888479634, "learning_rate": 6.956758138919339e-06, "loss": 1.4796, "step": 26060 }, { "epoch": 0.7685389945451865, "grad_norm": 2.7636697873749614, "learning_rate": 6.956424614091984e-06, "loss": 1.4879, "step": 26065 }, { "epoch": 0.7686864219371959, "grad_norm": 2.5701116021130503, "learning_rate": 6.956090994911923e-06, "loss": 1.4527, "step": 26070 }, { "epoch": 0.7688338493292054, "grad_norm": 2.85167152140067, "learning_rate": 6.9557572813889735e-06, "loss": 1.4963, "step": 26075 }, { "epoch": 0.7689812767212149, "grad_norm": 2.848261111863682, "learning_rate": 6.955423473532955e-06, "loss": 1.5188, "step": 26080 }, { "epoch": 0.7691287041132242, "grad_norm": 2.9544619255425393, "learning_rate": 6.955089571353691e-06, "loss": 1.4823, "step": 26085 }, { "epoch": 0.7692761315052337, "grad_norm": 2.906154868498114, "learning_rate": 6.954755574861006e-06, "loss": 1.501, "step": 26090 }, { "epoch": 0.7694235588972431, "grad_norm": 2.7172714653393375, "learning_rate": 6.9544214840647305e-06, "loss": 1.462, "step": 26095 }, { "epoch": 0.7695709862892526, "grad_norm": 2.7792264737557617, "learning_rate": 6.954087298974693e-06, "loss": 1.4984, "step": 26100 }, { "epoch": 0.7697184136812619, "grad_norm": 2.858927183262815, "learning_rate": 6.953753019600727e-06, "loss": 1.5318, "step": 26105 }, { "epoch": 0.7698658410732714, "grad_norm": 2.7547801281268933, "learning_rate": 6.9534186459526715e-06, "loss": 1.4735, "step": 26110 }, { "epoch": 0.7700132684652808, "grad_norm": 2.9104967439278826, "learning_rate": 6.953084178040364e-06, "loss": 1.4471, "step": 26115 }, { "epoch": 0.7701606958572903, "grad_norm": 2.911310728055557, "learning_rate": 6.952749615873646e-06, "loss": 1.5422, "step": 26120 }, { "epoch": 0.7703081232492998, "grad_norm": 2.844660417707359, "learning_rate": 6.952414959462363e-06, "loss": 1.4608, "step": 26125 }, { "epoch": 0.7704555506413091, "grad_norm": 2.94916438691558, "learning_rate": 6.952080208816365e-06, "loss": 1.4729, "step": 26130 }, { "epoch": 0.7706029780333186, "grad_norm": 2.7276742946348467, "learning_rate": 6.951745363945501e-06, "loss": 1.4384, "step": 26135 }, { "epoch": 0.770750405425328, "grad_norm": 2.7275803426014753, "learning_rate": 6.951410424859621e-06, "loss": 1.4396, "step": 26140 }, { "epoch": 0.7708978328173375, "grad_norm": 2.8307285473493877, "learning_rate": 6.951075391568586e-06, "loss": 1.4346, "step": 26145 }, { "epoch": 0.7710452602093469, "grad_norm": 2.8127299368285894, "learning_rate": 6.95074026408225e-06, "loss": 1.5013, "step": 26150 }, { "epoch": 0.7711926876013563, "grad_norm": 2.7689783671269774, "learning_rate": 6.950405042410479e-06, "loss": 1.511, "step": 26155 }, { "epoch": 0.7713401149933657, "grad_norm": 2.792287508999394, "learning_rate": 6.950069726563134e-06, "loss": 1.492, "step": 26160 }, { "epoch": 0.7714875423853752, "grad_norm": 2.8523044068696954, "learning_rate": 6.949734316550084e-06, "loss": 1.5192, "step": 26165 }, { "epoch": 0.7716349697773847, "grad_norm": 2.830697792904474, "learning_rate": 6.949398812381197e-06, "loss": 1.4209, "step": 26170 }, { "epoch": 0.7717823971693941, "grad_norm": 2.846722535369651, "learning_rate": 6.949063214066347e-06, "loss": 1.5548, "step": 26175 }, { "epoch": 0.7719298245614035, "grad_norm": 3.2343847798596435, "learning_rate": 6.948727521615409e-06, "loss": 1.4829, "step": 26180 }, { "epoch": 0.7720772519534129, "grad_norm": 2.8434247970075734, "learning_rate": 6.948391735038261e-06, "loss": 1.4984, "step": 26185 }, { "epoch": 0.7722246793454224, "grad_norm": 2.847889004753253, "learning_rate": 6.9480558543447845e-06, "loss": 1.4938, "step": 26190 }, { "epoch": 0.7723721067374318, "grad_norm": 2.916219682066267, "learning_rate": 6.947719879544863e-06, "loss": 1.4538, "step": 26195 }, { "epoch": 0.7725195341294413, "grad_norm": 2.8154962651518316, "learning_rate": 6.947383810648381e-06, "loss": 1.4727, "step": 26200 }, { "epoch": 0.7726669615214506, "grad_norm": 2.8253257468754267, "learning_rate": 6.947047647665231e-06, "loss": 1.4282, "step": 26205 }, { "epoch": 0.7728143889134601, "grad_norm": 2.972852906705654, "learning_rate": 6.946711390605302e-06, "loss": 1.5054, "step": 26210 }, { "epoch": 0.7729618163054696, "grad_norm": 2.8368337991277803, "learning_rate": 6.9463750394784905e-06, "loss": 1.4906, "step": 26215 }, { "epoch": 0.773109243697479, "grad_norm": 2.841103774868191, "learning_rate": 6.946038594294695e-06, "loss": 1.4715, "step": 26220 }, { "epoch": 0.7732566710894885, "grad_norm": 2.8449394807586246, "learning_rate": 6.9457020550638135e-06, "loss": 1.4675, "step": 26225 }, { "epoch": 0.7734040984814978, "grad_norm": 2.8408714424917503, "learning_rate": 6.945365421795749e-06, "loss": 1.5086, "step": 26230 }, { "epoch": 0.7735515258735073, "grad_norm": 2.7715575327113124, "learning_rate": 6.945028694500409e-06, "loss": 1.4818, "step": 26235 }, { "epoch": 0.7736989532655167, "grad_norm": 2.783345450938507, "learning_rate": 6.944691873187701e-06, "loss": 1.4631, "step": 26240 }, { "epoch": 0.7738463806575262, "grad_norm": 2.858593702698728, "learning_rate": 6.944354957867537e-06, "loss": 1.4866, "step": 26245 }, { "epoch": 0.7739938080495357, "grad_norm": 2.806450646558289, "learning_rate": 6.944017948549831e-06, "loss": 1.4407, "step": 26250 }, { "epoch": 0.774141235441545, "grad_norm": 2.7345915027365044, "learning_rate": 6.9436808452444994e-06, "loss": 1.4998, "step": 26255 }, { "epoch": 0.7742886628335545, "grad_norm": 2.872877032081465, "learning_rate": 6.943343647961462e-06, "loss": 1.52, "step": 26260 }, { "epoch": 0.7744360902255639, "grad_norm": 2.814774327735538, "learning_rate": 6.943006356710641e-06, "loss": 1.5098, "step": 26265 }, { "epoch": 0.7745835176175734, "grad_norm": 2.77862299431561, "learning_rate": 6.942668971501962e-06, "loss": 1.4488, "step": 26270 }, { "epoch": 0.7747309450095827, "grad_norm": 2.811282243037782, "learning_rate": 6.942331492345354e-06, "loss": 1.4857, "step": 26275 }, { "epoch": 0.7748783724015922, "grad_norm": 2.9598097278418543, "learning_rate": 6.941993919250746e-06, "loss": 1.4774, "step": 26280 }, { "epoch": 0.7750257997936016, "grad_norm": 2.873250202885555, "learning_rate": 6.941656252228071e-06, "loss": 1.4386, "step": 26285 }, { "epoch": 0.7751732271856111, "grad_norm": 2.9018688910609005, "learning_rate": 6.941318491287268e-06, "loss": 1.5142, "step": 26290 }, { "epoch": 0.7753206545776206, "grad_norm": 2.7801145253293233, "learning_rate": 6.940980636438272e-06, "loss": 1.4878, "step": 26295 }, { "epoch": 0.7754680819696299, "grad_norm": 2.7134814994025764, "learning_rate": 6.940642687691029e-06, "loss": 1.4794, "step": 26300 }, { "epoch": 0.7756155093616394, "grad_norm": 2.8738674116645933, "learning_rate": 6.940304645055482e-06, "loss": 1.4597, "step": 26305 }, { "epoch": 0.7757629367536488, "grad_norm": 2.894187888838818, "learning_rate": 6.939966508541577e-06, "loss": 1.5058, "step": 26310 }, { "epoch": 0.7759103641456583, "grad_norm": 2.9137575481378737, "learning_rate": 6.939628278159264e-06, "loss": 1.5429, "step": 26315 }, { "epoch": 0.7760577915376677, "grad_norm": 2.951802103282537, "learning_rate": 6.939289953918498e-06, "loss": 1.4972, "step": 26320 }, { "epoch": 0.7762052189296771, "grad_norm": 2.8139206124288094, "learning_rate": 6.938951535829233e-06, "loss": 1.4762, "step": 26325 }, { "epoch": 0.7763526463216865, "grad_norm": 2.761794323052463, "learning_rate": 6.938613023901427e-06, "loss": 1.4863, "step": 26330 }, { "epoch": 0.776500073713696, "grad_norm": 2.7233579540385753, "learning_rate": 6.938274418145043e-06, "loss": 1.468, "step": 26335 }, { "epoch": 0.7766475011057055, "grad_norm": 2.7667422703983204, "learning_rate": 6.937935718570043e-06, "loss": 1.3954, "step": 26340 }, { "epoch": 0.7767949284977149, "grad_norm": 2.955216186337151, "learning_rate": 6.937596925186394e-06, "loss": 1.5362, "step": 26345 }, { "epoch": 0.7769423558897243, "grad_norm": 2.78826101316421, "learning_rate": 6.937258038004066e-06, "loss": 1.4933, "step": 26350 }, { "epoch": 0.7770897832817337, "grad_norm": 2.745234699712971, "learning_rate": 6.9369190570330305e-06, "loss": 1.4743, "step": 26355 }, { "epoch": 0.7772372106737432, "grad_norm": 2.96350350794512, "learning_rate": 6.936579982283263e-06, "loss": 1.494, "step": 26360 }, { "epoch": 0.7773846380657526, "grad_norm": 2.756382544367565, "learning_rate": 6.93624081376474e-06, "loss": 1.4875, "step": 26365 }, { "epoch": 0.7775320654577621, "grad_norm": 2.7692031263790997, "learning_rate": 6.935901551487444e-06, "loss": 1.4846, "step": 26370 }, { "epoch": 0.7776794928497714, "grad_norm": 3.1779282580322503, "learning_rate": 6.935562195461356e-06, "loss": 1.4876, "step": 26375 }, { "epoch": 0.7778269202417809, "grad_norm": 2.8783012032366395, "learning_rate": 6.9352227456964634e-06, "loss": 1.4775, "step": 26380 }, { "epoch": 0.7779743476337904, "grad_norm": 2.861763015357297, "learning_rate": 6.9348832022027536e-06, "loss": 1.4858, "step": 26385 }, { "epoch": 0.7781217750257998, "grad_norm": 2.7700455570788844, "learning_rate": 6.934543564990219e-06, "loss": 1.4479, "step": 26390 }, { "epoch": 0.7782692024178093, "grad_norm": 2.752440512265569, "learning_rate": 6.934203834068854e-06, "loss": 1.5066, "step": 26395 }, { "epoch": 0.7784166298098186, "grad_norm": 2.7229400349232136, "learning_rate": 6.933864009448656e-06, "loss": 1.4717, "step": 26400 }, { "epoch": 0.7785640572018281, "grad_norm": 2.9556295399046886, "learning_rate": 6.933524091139622e-06, "loss": 1.4657, "step": 26405 }, { "epoch": 0.7787114845938375, "grad_norm": 2.7886883263143094, "learning_rate": 6.933184079151758e-06, "loss": 1.5014, "step": 26410 }, { "epoch": 0.778858911985847, "grad_norm": 2.887767409994536, "learning_rate": 6.932843973495067e-06, "loss": 1.5066, "step": 26415 }, { "epoch": 0.7790063393778565, "grad_norm": 2.9980893332337466, "learning_rate": 6.932503774179558e-06, "loss": 1.4741, "step": 26420 }, { "epoch": 0.7791537667698658, "grad_norm": 2.7911844181182355, "learning_rate": 6.9321634812152415e-06, "loss": 1.531, "step": 26425 }, { "epoch": 0.7793011941618753, "grad_norm": 2.7569731134355284, "learning_rate": 6.931823094612131e-06, "loss": 1.5257, "step": 26430 }, { "epoch": 0.7794486215538847, "grad_norm": 2.749412602320448, "learning_rate": 6.931482614380243e-06, "loss": 1.5141, "step": 26435 }, { "epoch": 0.7795960489458942, "grad_norm": 2.689396163455992, "learning_rate": 6.931142040529594e-06, "loss": 1.4705, "step": 26440 }, { "epoch": 0.7797434763379035, "grad_norm": 2.808166764349673, "learning_rate": 6.93080137307021e-06, "loss": 1.5185, "step": 26445 }, { "epoch": 0.779890903729913, "grad_norm": 2.834529044509999, "learning_rate": 6.930460612012113e-06, "loss": 1.498, "step": 26450 }, { "epoch": 0.7800383311219224, "grad_norm": 2.730410056665606, "learning_rate": 6.930119757365331e-06, "loss": 1.4441, "step": 26455 }, { "epoch": 0.7801857585139319, "grad_norm": 2.8346429485260334, "learning_rate": 6.929778809139893e-06, "loss": 1.48, "step": 26460 }, { "epoch": 0.7803331859059414, "grad_norm": 2.7942080885651386, "learning_rate": 6.929437767345833e-06, "loss": 1.4675, "step": 26465 }, { "epoch": 0.7804806132979507, "grad_norm": 2.915049479169063, "learning_rate": 6.929096631993187e-06, "loss": 1.5121, "step": 26470 }, { "epoch": 0.7806280406899602, "grad_norm": 2.8422806796205142, "learning_rate": 6.928755403091992e-06, "loss": 1.4545, "step": 26475 }, { "epoch": 0.7807754680819696, "grad_norm": 2.899624368930755, "learning_rate": 6.928414080652289e-06, "loss": 1.4608, "step": 26480 }, { "epoch": 0.7809228954739791, "grad_norm": 2.899283737040069, "learning_rate": 6.928072664684123e-06, "loss": 1.4999, "step": 26485 }, { "epoch": 0.7810703228659885, "grad_norm": 2.8086259249319205, "learning_rate": 6.92773115519754e-06, "loss": 1.4877, "step": 26490 }, { "epoch": 0.7812177502579979, "grad_norm": 2.9044670510479604, "learning_rate": 6.9273895522025876e-06, "loss": 1.4761, "step": 26495 }, { "epoch": 0.7813651776500073, "grad_norm": 3.1184290978436886, "learning_rate": 6.927047855709321e-06, "loss": 1.5511, "step": 26500 }, { "epoch": 0.7813651776500073, "eval_loss": 1.1568034887313843, "eval_runtime": 4.1787, "eval_samples_per_second": 94.767, "eval_steps_per_second": 3.111, "step": 26500 }, { "epoch": 0.7815126050420168, "grad_norm": 2.712033147073672, "learning_rate": 6.926706065727793e-06, "loss": 1.5111, "step": 26505 }, { "epoch": 0.7816600324340263, "grad_norm": 2.7886977572402625, "learning_rate": 6.9263641822680615e-06, "loss": 1.4941, "step": 26510 }, { "epoch": 0.7818074598260357, "grad_norm": 2.681782076583957, "learning_rate": 6.9260222053401865e-06, "loss": 1.448, "step": 26515 }, { "epoch": 0.7819548872180451, "grad_norm": 2.7907543577040173, "learning_rate": 6.925680134954232e-06, "loss": 1.4885, "step": 26520 }, { "epoch": 0.7821023146100545, "grad_norm": 2.813152180668154, "learning_rate": 6.925337971120264e-06, "loss": 1.4872, "step": 26525 }, { "epoch": 0.782249742002064, "grad_norm": 2.855385946711485, "learning_rate": 6.924995713848349e-06, "loss": 1.4892, "step": 26530 }, { "epoch": 0.7823971693940734, "grad_norm": 2.827464798544643, "learning_rate": 6.92465336314856e-06, "loss": 1.5171, "step": 26535 }, { "epoch": 0.7825445967860829, "grad_norm": 2.947036744892626, "learning_rate": 6.924310919030972e-06, "loss": 1.5104, "step": 26540 }, { "epoch": 0.7826920241780922, "grad_norm": 2.8534412401853873, "learning_rate": 6.923968381505658e-06, "loss": 1.4729, "step": 26545 }, { "epoch": 0.7828394515701017, "grad_norm": 3.040696041098435, "learning_rate": 6.9236257505827024e-06, "loss": 1.5131, "step": 26550 }, { "epoch": 0.7829868789621112, "grad_norm": 2.82064326556563, "learning_rate": 6.923283026272184e-06, "loss": 1.4942, "step": 26555 }, { "epoch": 0.7831343063541206, "grad_norm": 2.883332404001965, "learning_rate": 6.92294020858419e-06, "loss": 1.4866, "step": 26560 }, { "epoch": 0.7832817337461301, "grad_norm": 2.8751699216729345, "learning_rate": 6.922597297528806e-06, "loss": 1.5226, "step": 26565 }, { "epoch": 0.7834291611381394, "grad_norm": 2.8205219553606473, "learning_rate": 6.922254293116125e-06, "loss": 1.5412, "step": 26570 }, { "epoch": 0.7835765885301489, "grad_norm": 2.8055206759393614, "learning_rate": 6.921911195356238e-06, "loss": 1.5067, "step": 26575 }, { "epoch": 0.7837240159221583, "grad_norm": 2.984079572303096, "learning_rate": 6.921568004259242e-06, "loss": 1.4886, "step": 26580 }, { "epoch": 0.7838714433141678, "grad_norm": 2.739913651143059, "learning_rate": 6.9212247198352375e-06, "loss": 1.4675, "step": 26585 }, { "epoch": 0.7840188707061773, "grad_norm": 2.8157171520326374, "learning_rate": 6.920881342094323e-06, "loss": 1.5002, "step": 26590 }, { "epoch": 0.7841662980981866, "grad_norm": 2.7654345321971188, "learning_rate": 6.9205378710466045e-06, "loss": 1.4573, "step": 26595 }, { "epoch": 0.7843137254901961, "grad_norm": 2.8350724033690384, "learning_rate": 6.920194306702189e-06, "loss": 1.4543, "step": 26600 }, { "epoch": 0.7844611528822055, "grad_norm": 2.83070117312299, "learning_rate": 6.919850649071187e-06, "loss": 1.4926, "step": 26605 }, { "epoch": 0.784608580274215, "grad_norm": 2.8759425494214175, "learning_rate": 6.919506898163708e-06, "loss": 1.4984, "step": 26610 }, { "epoch": 0.7847560076662243, "grad_norm": 2.8800052316872655, "learning_rate": 6.919163053989871e-06, "loss": 1.5231, "step": 26615 }, { "epoch": 0.7849034350582338, "grad_norm": 2.7854231036136463, "learning_rate": 6.918819116559791e-06, "loss": 1.4776, "step": 26620 }, { "epoch": 0.7850508624502432, "grad_norm": 2.8035991505500983, "learning_rate": 6.918475085883592e-06, "loss": 1.5388, "step": 26625 }, { "epoch": 0.7851982898422527, "grad_norm": 2.8489396612976083, "learning_rate": 6.918130961971394e-06, "loss": 1.5054, "step": 26630 }, { "epoch": 0.7853457172342622, "grad_norm": 2.8314001491915697, "learning_rate": 6.917786744833326e-06, "loss": 1.5038, "step": 26635 }, { "epoch": 0.7854931446262715, "grad_norm": 3.4148671507174395, "learning_rate": 6.917442434479514e-06, "loss": 1.5172, "step": 26640 }, { "epoch": 0.785640572018281, "grad_norm": 2.7750696560286707, "learning_rate": 6.917098030920093e-06, "loss": 1.4945, "step": 26645 }, { "epoch": 0.7857879994102904, "grad_norm": 2.7322977856784654, "learning_rate": 6.916753534165195e-06, "loss": 1.5095, "step": 26650 }, { "epoch": 0.7859354268022999, "grad_norm": 3.0124154552787568, "learning_rate": 6.91640894422496e-06, "loss": 1.4814, "step": 26655 }, { "epoch": 0.7860828541943093, "grad_norm": 2.790463449219323, "learning_rate": 6.916064261109525e-06, "loss": 1.4907, "step": 26660 }, { "epoch": 0.7862302815863187, "grad_norm": 2.8115986960246357, "learning_rate": 6.915719484829033e-06, "loss": 1.5048, "step": 26665 }, { "epoch": 0.7863777089783281, "grad_norm": 2.733269493943593, "learning_rate": 6.915374615393631e-06, "loss": 1.4536, "step": 26670 }, { "epoch": 0.7865251363703376, "grad_norm": 2.819448158713884, "learning_rate": 6.915029652813467e-06, "loss": 1.4944, "step": 26675 }, { "epoch": 0.7866725637623471, "grad_norm": 2.9483560042275623, "learning_rate": 6.914684597098691e-06, "loss": 1.5034, "step": 26680 }, { "epoch": 0.7868199911543565, "grad_norm": 2.7051329446469383, "learning_rate": 6.914339448259457e-06, "loss": 1.5198, "step": 26685 }, { "epoch": 0.7869674185463659, "grad_norm": 2.7672194977041444, "learning_rate": 6.913994206305921e-06, "loss": 1.509, "step": 26690 }, { "epoch": 0.7871148459383753, "grad_norm": 2.7350525730644124, "learning_rate": 6.9136488712482434e-06, "loss": 1.4557, "step": 26695 }, { "epoch": 0.7872622733303848, "grad_norm": 2.9219088030260583, "learning_rate": 6.913303443096585e-06, "loss": 1.4715, "step": 26700 }, { "epoch": 0.7874097007223942, "grad_norm": 2.8307295891627673, "learning_rate": 6.91295792186111e-06, "loss": 1.4836, "step": 26705 }, { "epoch": 0.7875571281144037, "grad_norm": 2.8671993297974008, "learning_rate": 6.912612307551986e-06, "loss": 1.4736, "step": 26710 }, { "epoch": 0.787704555506413, "grad_norm": 2.9721501395514394, "learning_rate": 6.912266600179385e-06, "loss": 1.5534, "step": 26715 }, { "epoch": 0.7878519828984225, "grad_norm": 2.696748233323017, "learning_rate": 6.911920799753479e-06, "loss": 1.4433, "step": 26720 }, { "epoch": 0.787999410290432, "grad_norm": 2.7337776802580414, "learning_rate": 6.911574906284441e-06, "loss": 1.5011, "step": 26725 }, { "epoch": 0.7881468376824414, "grad_norm": 2.9414685874981545, "learning_rate": 6.911228919782452e-06, "loss": 1.4841, "step": 26730 }, { "epoch": 0.7882942650744509, "grad_norm": 2.919209776316744, "learning_rate": 6.910882840257693e-06, "loss": 1.4831, "step": 26735 }, { "epoch": 0.7884416924664602, "grad_norm": 2.8394224111936635, "learning_rate": 6.910536667720345e-06, "loss": 1.4754, "step": 26740 }, { "epoch": 0.7885891198584697, "grad_norm": 2.9357666839648062, "learning_rate": 6.910190402180599e-06, "loss": 1.4771, "step": 26745 }, { "epoch": 0.7887365472504791, "grad_norm": 2.8975370579247026, "learning_rate": 6.90984404364864e-06, "loss": 1.5047, "step": 26750 }, { "epoch": 0.7888839746424886, "grad_norm": 2.903154440727366, "learning_rate": 6.909497592134663e-06, "loss": 1.5424, "step": 26755 }, { "epoch": 0.7890314020344981, "grad_norm": 2.78371267786509, "learning_rate": 6.909151047648862e-06, "loss": 1.5055, "step": 26760 }, { "epoch": 0.7891788294265074, "grad_norm": 2.754492219636609, "learning_rate": 6.908804410201433e-06, "loss": 1.5518, "step": 26765 }, { "epoch": 0.7893262568185169, "grad_norm": 2.7940470356966256, "learning_rate": 6.908457679802578e-06, "loss": 1.4921, "step": 26770 }, { "epoch": 0.7894736842105263, "grad_norm": 2.8767476841802173, "learning_rate": 6.908110856462498e-06, "loss": 1.5022, "step": 26775 }, { "epoch": 0.7896211116025358, "grad_norm": 2.7839926237632913, "learning_rate": 6.9077639401914e-06, "loss": 1.4645, "step": 26780 }, { "epoch": 0.7897685389945451, "grad_norm": 2.753240979810096, "learning_rate": 6.907416930999494e-06, "loss": 1.4274, "step": 26785 }, { "epoch": 0.7899159663865546, "grad_norm": 2.70920899204006, "learning_rate": 6.907069828896988e-06, "loss": 1.4837, "step": 26790 }, { "epoch": 0.790063393778564, "grad_norm": 2.7328263981300704, "learning_rate": 6.906722633894097e-06, "loss": 1.4988, "step": 26795 }, { "epoch": 0.7902108211705735, "grad_norm": 2.914914015997364, "learning_rate": 6.906375346001039e-06, "loss": 1.4795, "step": 26800 }, { "epoch": 0.790358248562583, "grad_norm": 2.936733763968127, "learning_rate": 6.906027965228032e-06, "loss": 1.5209, "step": 26805 }, { "epoch": 0.7905056759545923, "grad_norm": 2.7430742321809958, "learning_rate": 6.905680491585298e-06, "loss": 1.3972, "step": 26810 }, { "epoch": 0.7906531033466018, "grad_norm": 2.748591855506758, "learning_rate": 6.905332925083062e-06, "loss": 1.4528, "step": 26815 }, { "epoch": 0.7908005307386112, "grad_norm": 2.7567512796333413, "learning_rate": 6.904985265731552e-06, "loss": 1.4762, "step": 26820 }, { "epoch": 0.7909479581306207, "grad_norm": 2.692857746517106, "learning_rate": 6.904637513540997e-06, "loss": 1.5169, "step": 26825 }, { "epoch": 0.7910953855226301, "grad_norm": 3.2590178838025183, "learning_rate": 6.904289668521632e-06, "loss": 1.4848, "step": 26830 }, { "epoch": 0.7912428129146395, "grad_norm": 2.7522869423816974, "learning_rate": 6.9039417306836924e-06, "loss": 1.4562, "step": 26835 }, { "epoch": 0.791390240306649, "grad_norm": 2.8284353902589054, "learning_rate": 6.903593700037414e-06, "loss": 1.4621, "step": 26840 }, { "epoch": 0.7915376676986584, "grad_norm": 2.7533852117951327, "learning_rate": 6.903245576593041e-06, "loss": 1.4602, "step": 26845 }, { "epoch": 0.7916850950906679, "grad_norm": 2.782903168353196, "learning_rate": 6.902897360360817e-06, "loss": 1.513, "step": 26850 }, { "epoch": 0.7918325224826773, "grad_norm": 2.9453867427912885, "learning_rate": 6.902549051350987e-06, "loss": 1.5105, "step": 26855 }, { "epoch": 0.7919799498746867, "grad_norm": 2.6901486104321384, "learning_rate": 6.9022006495738e-06, "loss": 1.504, "step": 26860 }, { "epoch": 0.7921273772666961, "grad_norm": 2.778037814283334, "learning_rate": 6.901852155039512e-06, "loss": 1.4874, "step": 26865 }, { "epoch": 0.7922748046587056, "grad_norm": 3.028620828757721, "learning_rate": 6.901503567758374e-06, "loss": 1.5296, "step": 26870 }, { "epoch": 0.792422232050715, "grad_norm": 2.821736684662704, "learning_rate": 6.901154887740645e-06, "loss": 1.476, "step": 26875 }, { "epoch": 0.7925696594427245, "grad_norm": 2.8753359961130625, "learning_rate": 6.900806114996586e-06, "loss": 1.4553, "step": 26880 }, { "epoch": 0.7927170868347339, "grad_norm": 2.6845248799699513, "learning_rate": 6.900457249536458e-06, "loss": 1.4444, "step": 26885 }, { "epoch": 0.7928645142267433, "grad_norm": 2.9978212273590468, "learning_rate": 6.9001082913705285e-06, "loss": 1.5113, "step": 26890 }, { "epoch": 0.7930119416187528, "grad_norm": 2.830998737925216, "learning_rate": 6.899759240509066e-06, "loss": 1.4669, "step": 26895 }, { "epoch": 0.7931593690107622, "grad_norm": 2.6839286683673143, "learning_rate": 6.89941009696234e-06, "loss": 1.4583, "step": 26900 }, { "epoch": 0.7933067964027717, "grad_norm": 3.009883817597026, "learning_rate": 6.899060860740627e-06, "loss": 1.4453, "step": 26905 }, { "epoch": 0.793454223794781, "grad_norm": 2.8889868649198758, "learning_rate": 6.898711531854201e-06, "loss": 1.4633, "step": 26910 }, { "epoch": 0.7936016511867905, "grad_norm": 2.798435066402666, "learning_rate": 6.898362110313344e-06, "loss": 1.4903, "step": 26915 }, { "epoch": 0.7937490785788, "grad_norm": 2.6650240777948473, "learning_rate": 6.898012596128335e-06, "loss": 1.4612, "step": 26920 }, { "epoch": 0.7938965059708094, "grad_norm": 2.8400522733820144, "learning_rate": 6.897662989309462e-06, "loss": 1.4599, "step": 26925 }, { "epoch": 0.7940439333628189, "grad_norm": 2.8168405252488826, "learning_rate": 6.897313289867012e-06, "loss": 1.4897, "step": 26930 }, { "epoch": 0.7941913607548282, "grad_norm": 2.834213676111169, "learning_rate": 6.896963497811273e-06, "loss": 1.4657, "step": 26935 }, { "epoch": 0.7943387881468377, "grad_norm": 2.9307306677588714, "learning_rate": 6.896613613152541e-06, "loss": 1.5069, "step": 26940 }, { "epoch": 0.7944862155388471, "grad_norm": 3.0404087165762497, "learning_rate": 6.89626363590111e-06, "loss": 1.5189, "step": 26945 }, { "epoch": 0.7946336429308566, "grad_norm": 2.741907508918266, "learning_rate": 6.895913566067279e-06, "loss": 1.5444, "step": 26950 }, { "epoch": 0.7947810703228659, "grad_norm": 2.7805807470975816, "learning_rate": 6.895563403661348e-06, "loss": 1.4698, "step": 26955 }, { "epoch": 0.7949284977148754, "grad_norm": 2.8233634995892727, "learning_rate": 6.895213148693623e-06, "loss": 1.4552, "step": 26960 }, { "epoch": 0.7950759251068849, "grad_norm": 2.7473973159143155, "learning_rate": 6.89486280117441e-06, "loss": 1.495, "step": 26965 }, { "epoch": 0.7952233524988943, "grad_norm": 2.7324769527293418, "learning_rate": 6.894512361114017e-06, "loss": 1.5028, "step": 26970 }, { "epoch": 0.7953707798909038, "grad_norm": 2.796920028015594, "learning_rate": 6.894161828522757e-06, "loss": 1.4743, "step": 26975 }, { "epoch": 0.7955182072829131, "grad_norm": 3.02137117726278, "learning_rate": 6.8938112034109466e-06, "loss": 1.5251, "step": 26980 }, { "epoch": 0.7956656346749226, "grad_norm": 2.736991263446103, "learning_rate": 6.8934604857889e-06, "loss": 1.4666, "step": 26985 }, { "epoch": 0.795813062066932, "grad_norm": 2.899885610302823, "learning_rate": 6.893109675666939e-06, "loss": 1.515, "step": 26990 }, { "epoch": 0.7959604894589415, "grad_norm": 2.8302551805432223, "learning_rate": 6.892758773055387e-06, "loss": 1.4554, "step": 26995 }, { "epoch": 0.796107916850951, "grad_norm": 2.8537011218474464, "learning_rate": 6.892407777964571e-06, "loss": 1.4773, "step": 27000 }, { "epoch": 0.796107916850951, "eval_loss": 1.155006766319275, "eval_runtime": 4.3128, "eval_samples_per_second": 91.819, "eval_steps_per_second": 3.014, "step": 27000 }, { "epoch": 0.7962553442429603, "grad_norm": 2.8864377593187864, "learning_rate": 6.892056690404816e-06, "loss": 1.5312, "step": 27005 }, { "epoch": 0.7964027716349698, "grad_norm": 2.7442102991617716, "learning_rate": 6.891705510386456e-06, "loss": 1.4558, "step": 27010 }, { "epoch": 0.7965501990269792, "grad_norm": 2.803754405074574, "learning_rate": 6.891354237919823e-06, "loss": 1.4953, "step": 27015 }, { "epoch": 0.7966976264189887, "grad_norm": 2.7893277706416035, "learning_rate": 6.891002873015256e-06, "loss": 1.4577, "step": 27020 }, { "epoch": 0.7968450538109981, "grad_norm": 2.728263214004544, "learning_rate": 6.890651415683092e-06, "loss": 1.4366, "step": 27025 }, { "epoch": 0.7969924812030075, "grad_norm": 2.8841350985457237, "learning_rate": 6.890299865933674e-06, "loss": 1.4878, "step": 27030 }, { "epoch": 0.7971399085950169, "grad_norm": 2.9723037730955446, "learning_rate": 6.889948223777347e-06, "loss": 1.4935, "step": 27035 }, { "epoch": 0.7972873359870264, "grad_norm": 2.8091097065985937, "learning_rate": 6.8895964892244565e-06, "loss": 1.4754, "step": 27040 }, { "epoch": 0.7974347633790358, "grad_norm": 2.6348807201691735, "learning_rate": 6.889244662285356e-06, "loss": 1.4366, "step": 27045 }, { "epoch": 0.7975821907710453, "grad_norm": 2.949523412639075, "learning_rate": 6.888892742970394e-06, "loss": 1.5266, "step": 27050 }, { "epoch": 0.7977296181630547, "grad_norm": 2.8141361773126774, "learning_rate": 6.888540731289932e-06, "loss": 1.4524, "step": 27055 }, { "epoch": 0.7978770455550641, "grad_norm": 2.8264375275942304, "learning_rate": 6.888188627254324e-06, "loss": 1.4816, "step": 27060 }, { "epoch": 0.7980244729470736, "grad_norm": 2.8573154077479694, "learning_rate": 6.887836430873932e-06, "loss": 1.4522, "step": 27065 }, { "epoch": 0.798171900339083, "grad_norm": 2.8848019313400872, "learning_rate": 6.887484142159119e-06, "loss": 1.5158, "step": 27070 }, { "epoch": 0.7983193277310925, "grad_norm": 2.990781942501596, "learning_rate": 6.887131761120255e-06, "loss": 1.4474, "step": 27075 }, { "epoch": 0.7984667551231018, "grad_norm": 2.7937415794540486, "learning_rate": 6.886779287767705e-06, "loss": 1.433, "step": 27080 }, { "epoch": 0.7986141825151113, "grad_norm": 2.8841494174780915, "learning_rate": 6.886426722111842e-06, "loss": 1.5213, "step": 27085 }, { "epoch": 0.7987616099071208, "grad_norm": 2.8579652224375587, "learning_rate": 6.886074064163043e-06, "loss": 1.49, "step": 27090 }, { "epoch": 0.7989090372991302, "grad_norm": 2.8307373323817173, "learning_rate": 6.885721313931682e-06, "loss": 1.4584, "step": 27095 }, { "epoch": 0.7990564646911397, "grad_norm": 2.760849284711027, "learning_rate": 6.885368471428142e-06, "loss": 1.5109, "step": 27100 }, { "epoch": 0.799203892083149, "grad_norm": 2.8566961624184466, "learning_rate": 6.885015536662804e-06, "loss": 1.5303, "step": 27105 }, { "epoch": 0.7993513194751585, "grad_norm": 2.622360150858746, "learning_rate": 6.884662509646055e-06, "loss": 1.5322, "step": 27110 }, { "epoch": 0.7994987468671679, "grad_norm": 2.7195175842498527, "learning_rate": 6.884309390388281e-06, "loss": 1.4832, "step": 27115 }, { "epoch": 0.7996461742591774, "grad_norm": 2.8423719754458654, "learning_rate": 6.883956178899875e-06, "loss": 1.5001, "step": 27120 }, { "epoch": 0.7997936016511867, "grad_norm": 3.093302264744295, "learning_rate": 6.883602875191229e-06, "loss": 1.4967, "step": 27125 }, { "epoch": 0.7999410290431962, "grad_norm": 2.806742269245726, "learning_rate": 6.88324947927274e-06, "loss": 1.5001, "step": 27130 }, { "epoch": 0.8000884564352057, "grad_norm": 2.7833495771031487, "learning_rate": 6.882895991154807e-06, "loss": 1.5002, "step": 27135 }, { "epoch": 0.8002358838272151, "grad_norm": 2.8060753902283464, "learning_rate": 6.8825424108478335e-06, "loss": 1.5275, "step": 27140 }, { "epoch": 0.8003833112192246, "grad_norm": 2.762961800015505, "learning_rate": 6.882188738362222e-06, "loss": 1.5101, "step": 27145 }, { "epoch": 0.8005307386112339, "grad_norm": 2.736101550784864, "learning_rate": 6.88183497370838e-06, "loss": 1.4844, "step": 27150 }, { "epoch": 0.8006781660032434, "grad_norm": 2.738965292740784, "learning_rate": 6.881481116896718e-06, "loss": 1.4323, "step": 27155 }, { "epoch": 0.8008255933952528, "grad_norm": 2.7906610934991924, "learning_rate": 6.8811271679376486e-06, "loss": 1.4516, "step": 27160 }, { "epoch": 0.8009730207872623, "grad_norm": 2.9476250866595746, "learning_rate": 6.880773126841587e-06, "loss": 1.5029, "step": 27165 }, { "epoch": 0.8011204481792717, "grad_norm": 2.8001317808609447, "learning_rate": 6.880418993618951e-06, "loss": 1.5262, "step": 27170 }, { "epoch": 0.8012678755712811, "grad_norm": 2.8383504014070207, "learning_rate": 6.880064768280161e-06, "loss": 1.4004, "step": 27175 }, { "epoch": 0.8014153029632906, "grad_norm": 2.7781300019861366, "learning_rate": 6.879710450835641e-06, "loss": 1.4927, "step": 27180 }, { "epoch": 0.8015627303553, "grad_norm": 2.7721903941519255, "learning_rate": 6.879356041295818e-06, "loss": 1.4555, "step": 27185 }, { "epoch": 0.8017101577473095, "grad_norm": 2.6883905992037085, "learning_rate": 6.879001539671119e-06, "loss": 1.477, "step": 27190 }, { "epoch": 0.8018575851393189, "grad_norm": 2.7054680033920375, "learning_rate": 6.878646945971977e-06, "loss": 1.482, "step": 27195 }, { "epoch": 0.8020050125313283, "grad_norm": 2.8991855415574554, "learning_rate": 6.878292260208828e-06, "loss": 1.441, "step": 27200 }, { "epoch": 0.8021524399233377, "grad_norm": 2.8804473482973183, "learning_rate": 6.877937482392105e-06, "loss": 1.4776, "step": 27205 }, { "epoch": 0.8022998673153472, "grad_norm": 2.9147014141935217, "learning_rate": 6.87758261253225e-06, "loss": 1.4647, "step": 27210 }, { "epoch": 0.8024472947073567, "grad_norm": 2.8547916931914874, "learning_rate": 6.8772276506397065e-06, "loss": 1.5316, "step": 27215 }, { "epoch": 0.8025947220993661, "grad_norm": 2.8933028131526344, "learning_rate": 6.876872596724917e-06, "loss": 1.4739, "step": 27220 }, { "epoch": 0.8027421494913755, "grad_norm": 2.8128933004481533, "learning_rate": 6.876517450798332e-06, "loss": 1.4688, "step": 27225 }, { "epoch": 0.8028895768833849, "grad_norm": 2.879008731914059, "learning_rate": 6.876162212870401e-06, "loss": 1.5305, "step": 27230 }, { "epoch": 0.8030370042753944, "grad_norm": 2.9381218171843146, "learning_rate": 6.875806882951576e-06, "loss": 1.4875, "step": 27235 }, { "epoch": 0.8031844316674038, "grad_norm": 2.6984960230606023, "learning_rate": 6.875451461052315e-06, "loss": 1.4361, "step": 27240 }, { "epoch": 0.8033318590594133, "grad_norm": 2.7501060612121777, "learning_rate": 6.875095947183076e-06, "loss": 1.4592, "step": 27245 }, { "epoch": 0.8034792864514226, "grad_norm": 2.885482315143422, "learning_rate": 6.87474034135432e-06, "loss": 1.5489, "step": 27250 }, { "epoch": 0.8036267138434321, "grad_norm": 2.839482006857361, "learning_rate": 6.874384643576512e-06, "loss": 1.4904, "step": 27255 }, { "epoch": 0.8037741412354416, "grad_norm": 2.8207232924364263, "learning_rate": 6.874028853860117e-06, "loss": 1.4879, "step": 27260 }, { "epoch": 0.803921568627451, "grad_norm": 2.840361419245746, "learning_rate": 6.873672972215607e-06, "loss": 1.4637, "step": 27265 }, { "epoch": 0.8040689960194605, "grad_norm": 2.767473097659488, "learning_rate": 6.873316998653451e-06, "loss": 1.4953, "step": 27270 }, { "epoch": 0.8042164234114698, "grad_norm": 2.8249190335716965, "learning_rate": 6.872960933184127e-06, "loss": 1.4782, "step": 27275 }, { "epoch": 0.8043638508034793, "grad_norm": 2.8914165845135855, "learning_rate": 6.872604775818113e-06, "loss": 1.4781, "step": 27280 }, { "epoch": 0.8045112781954887, "grad_norm": 2.8019283177346086, "learning_rate": 6.872248526565886e-06, "loss": 1.5504, "step": 27285 }, { "epoch": 0.8046587055874982, "grad_norm": 2.8643807603311675, "learning_rate": 6.8718921854379314e-06, "loss": 1.523, "step": 27290 }, { "epoch": 0.8048061329795075, "grad_norm": 2.7222950899694998, "learning_rate": 6.871535752444733e-06, "loss": 1.4903, "step": 27295 }, { "epoch": 0.804953560371517, "grad_norm": 2.7754608616254153, "learning_rate": 6.871179227596783e-06, "loss": 1.4414, "step": 27300 }, { "epoch": 0.8051009877635265, "grad_norm": 2.693911808826857, "learning_rate": 6.8708226109045685e-06, "loss": 1.4503, "step": 27305 }, { "epoch": 0.8052484151555359, "grad_norm": 2.972390175786614, "learning_rate": 6.870465902378586e-06, "loss": 1.497, "step": 27310 }, { "epoch": 0.8053958425475454, "grad_norm": 2.7646407238893116, "learning_rate": 6.870109102029331e-06, "loss": 1.4384, "step": 27315 }, { "epoch": 0.8055432699395547, "grad_norm": 2.8004445785432193, "learning_rate": 6.869752209867302e-06, "loss": 1.5155, "step": 27320 }, { "epoch": 0.8056906973315642, "grad_norm": 2.6653455540184203, "learning_rate": 6.869395225903003e-06, "loss": 1.4609, "step": 27325 }, { "epoch": 0.8058381247235736, "grad_norm": 2.761385408258114, "learning_rate": 6.869038150146937e-06, "loss": 1.5413, "step": 27330 }, { "epoch": 0.8059855521155831, "grad_norm": 2.7780233138333403, "learning_rate": 6.868680982609612e-06, "loss": 1.4805, "step": 27335 }, { "epoch": 0.8061329795075926, "grad_norm": 2.728247134261116, "learning_rate": 6.868323723301538e-06, "loss": 1.5123, "step": 27340 }, { "epoch": 0.8062804068996019, "grad_norm": 2.7836122516867263, "learning_rate": 6.867966372233228e-06, "loss": 1.4477, "step": 27345 }, { "epoch": 0.8064278342916114, "grad_norm": 2.7836495552857987, "learning_rate": 6.867608929415196e-06, "loss": 1.4485, "step": 27350 }, { "epoch": 0.8065752616836208, "grad_norm": 2.7819821728663587, "learning_rate": 6.867251394857962e-06, "loss": 1.4665, "step": 27355 }, { "epoch": 0.8067226890756303, "grad_norm": 2.6610630271395874, "learning_rate": 6.866893768572047e-06, "loss": 1.4483, "step": 27360 }, { "epoch": 0.8068701164676397, "grad_norm": 2.7127614789942447, "learning_rate": 6.866536050567972e-06, "loss": 1.5346, "step": 27365 }, { "epoch": 0.8070175438596491, "grad_norm": 2.738037305745627, "learning_rate": 6.866178240856266e-06, "loss": 1.4256, "step": 27370 }, { "epoch": 0.8071649712516585, "grad_norm": 2.9280554410864355, "learning_rate": 6.865820339447458e-06, "loss": 1.538, "step": 27375 }, { "epoch": 0.807312398643668, "grad_norm": 2.842842409600755, "learning_rate": 6.865462346352077e-06, "loss": 1.4231, "step": 27380 }, { "epoch": 0.8074598260356775, "grad_norm": 2.9543259436126, "learning_rate": 6.8651042615806595e-06, "loss": 1.5083, "step": 27385 }, { "epoch": 0.8076072534276869, "grad_norm": 2.712433024798304, "learning_rate": 6.864746085143742e-06, "loss": 1.4776, "step": 27390 }, { "epoch": 0.8077546808196963, "grad_norm": 2.88412913158941, "learning_rate": 6.864387817051864e-06, "loss": 1.5603, "step": 27395 }, { "epoch": 0.8079021082117057, "grad_norm": 2.9581608906710875, "learning_rate": 6.864029457315568e-06, "loss": 1.499, "step": 27400 }, { "epoch": 0.8080495356037152, "grad_norm": 2.7134395062464605, "learning_rate": 6.863671005945401e-06, "loss": 1.4463, "step": 27405 }, { "epoch": 0.8081969629957246, "grad_norm": 6.382996838182948, "learning_rate": 6.863312462951908e-06, "loss": 1.4849, "step": 27410 }, { "epoch": 0.8083443903877341, "grad_norm": 2.8283835300799027, "learning_rate": 6.86295382834564e-06, "loss": 1.4799, "step": 27415 }, { "epoch": 0.8084918177797434, "grad_norm": 2.8369913561646394, "learning_rate": 6.862595102137153e-06, "loss": 1.5086, "step": 27420 }, { "epoch": 0.8086392451717529, "grad_norm": 2.94121032749989, "learning_rate": 6.862236284336999e-06, "loss": 1.4941, "step": 27425 }, { "epoch": 0.8087866725637624, "grad_norm": 2.751825840930872, "learning_rate": 6.861877374955739e-06, "loss": 1.448, "step": 27430 }, { "epoch": 0.8089340999557718, "grad_norm": 2.7474489448192765, "learning_rate": 6.861518374003935e-06, "loss": 1.5664, "step": 27435 }, { "epoch": 0.8090815273477813, "grad_norm": 2.9728589763355027, "learning_rate": 6.861159281492148e-06, "loss": 1.5064, "step": 27440 }, { "epoch": 0.8092289547397906, "grad_norm": 2.7714763305240604, "learning_rate": 6.860800097430948e-06, "loss": 1.448, "step": 27445 }, { "epoch": 0.8093763821318001, "grad_norm": 2.774290000652714, "learning_rate": 6.860440821830902e-06, "loss": 1.4929, "step": 27450 }, { "epoch": 0.8095238095238095, "grad_norm": 3.0548996100971326, "learning_rate": 6.860081454702584e-06, "loss": 1.5007, "step": 27455 }, { "epoch": 0.809671236915819, "grad_norm": 2.8378079638659592, "learning_rate": 6.8597219960565686e-06, "loss": 1.5072, "step": 27460 }, { "epoch": 0.8098186643078283, "grad_norm": 2.6745020272886553, "learning_rate": 6.859362445903431e-06, "loss": 1.5111, "step": 27465 }, { "epoch": 0.8099660916998378, "grad_norm": 2.810357059874261, "learning_rate": 6.859002804253754e-06, "loss": 1.4799, "step": 27470 }, { "epoch": 0.8101135190918473, "grad_norm": 3.103077595960212, "learning_rate": 6.858643071118118e-06, "loss": 1.4907, "step": 27475 }, { "epoch": 0.8102609464838567, "grad_norm": 2.8647175853105535, "learning_rate": 6.85828324650711e-06, "loss": 1.4766, "step": 27480 }, { "epoch": 0.8104083738758662, "grad_norm": 2.771991523090709, "learning_rate": 6.857923330431319e-06, "loss": 1.4782, "step": 27485 }, { "epoch": 0.8105558012678755, "grad_norm": 2.815480722243949, "learning_rate": 6.857563322901335e-06, "loss": 1.4967, "step": 27490 }, { "epoch": 0.810703228659885, "grad_norm": 2.695956915212864, "learning_rate": 6.8572032239277515e-06, "loss": 1.4799, "step": 27495 }, { "epoch": 0.8108506560518944, "grad_norm": 2.9150913124481073, "learning_rate": 6.856843033521166e-06, "loss": 1.5146, "step": 27500 }, { "epoch": 0.8108506560518944, "eval_loss": 1.1532968282699585, "eval_runtime": 4.1486, "eval_samples_per_second": 95.454, "eval_steps_per_second": 3.134, "step": 27500 }, { "epoch": 0.8109980834439039, "grad_norm": 2.8580947234054914, "learning_rate": 6.856482751692176e-06, "loss": 1.5643, "step": 27505 }, { "epoch": 0.8111455108359134, "grad_norm": 2.7812584900468202, "learning_rate": 6.856122378451383e-06, "loss": 1.4447, "step": 27510 }, { "epoch": 0.8112929382279227, "grad_norm": 2.919532654144103, "learning_rate": 6.855761913809392e-06, "loss": 1.4658, "step": 27515 }, { "epoch": 0.8114403656199322, "grad_norm": 2.6902474707545534, "learning_rate": 6.8554013577768116e-06, "loss": 1.4817, "step": 27520 }, { "epoch": 0.8115877930119416, "grad_norm": 2.755509517742528, "learning_rate": 6.8550407103642505e-06, "loss": 1.4763, "step": 27525 }, { "epoch": 0.8117352204039511, "grad_norm": 3.1343582456127486, "learning_rate": 6.85467997158232e-06, "loss": 1.43, "step": 27530 }, { "epoch": 0.8118826477959605, "grad_norm": 2.841826696510273, "learning_rate": 6.854319141441637e-06, "loss": 1.4703, "step": 27535 }, { "epoch": 0.8120300751879699, "grad_norm": 2.9542895677193686, "learning_rate": 6.853958219952817e-06, "loss": 1.5143, "step": 27540 }, { "epoch": 0.8121775025799793, "grad_norm": 2.9928907880938422, "learning_rate": 6.853597207126483e-06, "loss": 1.5081, "step": 27545 }, { "epoch": 0.8123249299719888, "grad_norm": 2.7691136022146057, "learning_rate": 6.853236102973257e-06, "loss": 1.4817, "step": 27550 }, { "epoch": 0.8124723573639983, "grad_norm": 2.7936359209767616, "learning_rate": 6.852874907503765e-06, "loss": 1.4219, "step": 27555 }, { "epoch": 0.8126197847560077, "grad_norm": 2.847581311421575, "learning_rate": 6.852513620728636e-06, "loss": 1.4836, "step": 27560 }, { "epoch": 0.8127672121480171, "grad_norm": 2.763391323542618, "learning_rate": 6.852152242658502e-06, "loss": 1.4681, "step": 27565 }, { "epoch": 0.8129146395400265, "grad_norm": 2.7900489169092784, "learning_rate": 6.851790773303995e-06, "loss": 1.4795, "step": 27570 }, { "epoch": 0.813062066932036, "grad_norm": 2.7118514814528747, "learning_rate": 6.851429212675752e-06, "loss": 1.5363, "step": 27575 }, { "epoch": 0.8132094943240454, "grad_norm": 2.739945448257385, "learning_rate": 6.851067560784414e-06, "loss": 1.4939, "step": 27580 }, { "epoch": 0.8133569217160549, "grad_norm": 2.728135514547293, "learning_rate": 6.850705817640621e-06, "loss": 1.434, "step": 27585 }, { "epoch": 0.8135043491080642, "grad_norm": 2.8007247903649715, "learning_rate": 6.850343983255019e-06, "loss": 1.5071, "step": 27590 }, { "epoch": 0.8136517765000737, "grad_norm": 2.8330300585291965, "learning_rate": 6.8499820576382555e-06, "loss": 1.496, "step": 27595 }, { "epoch": 0.8137992038920832, "grad_norm": 2.9306696407712014, "learning_rate": 6.849620040800979e-06, "loss": 1.4642, "step": 27600 }, { "epoch": 0.8139466312840926, "grad_norm": 2.8071245125056237, "learning_rate": 6.849257932753846e-06, "loss": 1.4459, "step": 27605 }, { "epoch": 0.8140940586761021, "grad_norm": 2.876547967943527, "learning_rate": 6.848895733507507e-06, "loss": 1.4957, "step": 27610 }, { "epoch": 0.8142414860681114, "grad_norm": 2.789404013455096, "learning_rate": 6.848533443072623e-06, "loss": 1.4273, "step": 27615 }, { "epoch": 0.8143889134601209, "grad_norm": 2.832219332483086, "learning_rate": 6.848171061459855e-06, "loss": 1.5087, "step": 27620 }, { "epoch": 0.8145363408521303, "grad_norm": 2.8047497635312664, "learning_rate": 6.847808588679864e-06, "loss": 1.5178, "step": 27625 }, { "epoch": 0.8146837682441398, "grad_norm": 2.8062343336477653, "learning_rate": 6.8474460247433205e-06, "loss": 1.523, "step": 27630 }, { "epoch": 0.8148311956361491, "grad_norm": 2.8117140411642767, "learning_rate": 6.847083369660889e-06, "loss": 1.4737, "step": 27635 }, { "epoch": 0.8149786230281586, "grad_norm": 2.761476269127636, "learning_rate": 6.846720623443243e-06, "loss": 1.4813, "step": 27640 }, { "epoch": 0.8151260504201681, "grad_norm": 2.662466486128738, "learning_rate": 6.846357786101058e-06, "loss": 1.4631, "step": 27645 }, { "epoch": 0.8152734778121775, "grad_norm": 2.7156122713059596, "learning_rate": 6.845994857645008e-06, "loss": 1.4609, "step": 27650 }, { "epoch": 0.815420905204187, "grad_norm": 2.748233521819443, "learning_rate": 6.845631838085774e-06, "loss": 1.5165, "step": 27655 }, { "epoch": 0.8155683325961963, "grad_norm": 2.7459271491098747, "learning_rate": 6.845268727434039e-06, "loss": 1.4511, "step": 27660 }, { "epoch": 0.8157157599882058, "grad_norm": 2.7503708837177387, "learning_rate": 6.844905525700487e-06, "loss": 1.4803, "step": 27665 }, { "epoch": 0.8158631873802152, "grad_norm": 2.8793054327136227, "learning_rate": 6.844542232895806e-06, "loss": 1.4878, "step": 27670 }, { "epoch": 0.8160106147722247, "grad_norm": 2.862845514439448, "learning_rate": 6.844178849030686e-06, "loss": 1.4677, "step": 27675 }, { "epoch": 0.8161580421642342, "grad_norm": 2.690245110781486, "learning_rate": 6.84381537411582e-06, "loss": 1.425, "step": 27680 }, { "epoch": 0.8163054695562435, "grad_norm": 2.7648282176244927, "learning_rate": 6.843451808161905e-06, "loss": 1.4833, "step": 27685 }, { "epoch": 0.816452896948253, "grad_norm": 2.7452609788529934, "learning_rate": 6.843088151179637e-06, "loss": 1.4901, "step": 27690 }, { "epoch": 0.8166003243402624, "grad_norm": 3.018545252076124, "learning_rate": 6.8427244031797195e-06, "loss": 1.4621, "step": 27695 }, { "epoch": 0.8167477517322719, "grad_norm": 2.766440605555839, "learning_rate": 6.842360564172853e-06, "loss": 1.4882, "step": 27700 }, { "epoch": 0.8168951791242813, "grad_norm": 2.7967836969962008, "learning_rate": 6.841996634169747e-06, "loss": 1.4523, "step": 27705 }, { "epoch": 0.8170426065162907, "grad_norm": 2.8379392659010634, "learning_rate": 6.8416326131811095e-06, "loss": 1.4848, "step": 27710 }, { "epoch": 0.8171900339083001, "grad_norm": 2.754971624316428, "learning_rate": 6.841268501217652e-06, "loss": 1.4839, "step": 27715 }, { "epoch": 0.8173374613003096, "grad_norm": 2.6739262546570517, "learning_rate": 6.840904298290089e-06, "loss": 1.4332, "step": 27720 }, { "epoch": 0.8174848886923191, "grad_norm": 2.742282282471313, "learning_rate": 6.840540004409136e-06, "loss": 1.4754, "step": 27725 }, { "epoch": 0.8176323160843285, "grad_norm": 2.838241121673147, "learning_rate": 6.840175619585517e-06, "loss": 1.5388, "step": 27730 }, { "epoch": 0.8177797434763379, "grad_norm": 2.9443497912899894, "learning_rate": 6.83981114382995e-06, "loss": 1.4505, "step": 27735 }, { "epoch": 0.8179271708683473, "grad_norm": 2.868945111225859, "learning_rate": 6.839446577153161e-06, "loss": 1.4731, "step": 27740 }, { "epoch": 0.8180745982603568, "grad_norm": 2.6919747516780936, "learning_rate": 6.839081919565881e-06, "loss": 1.4815, "step": 27745 }, { "epoch": 0.8182220256523662, "grad_norm": 2.8554518462054777, "learning_rate": 6.8387171710788376e-06, "loss": 1.4696, "step": 27750 }, { "epoch": 0.8183694530443757, "grad_norm": 2.740669356971579, "learning_rate": 6.838352331702764e-06, "loss": 1.5153, "step": 27755 }, { "epoch": 0.818516880436385, "grad_norm": 2.6687113143590695, "learning_rate": 6.837987401448396e-06, "loss": 1.4996, "step": 27760 }, { "epoch": 0.8186643078283945, "grad_norm": 2.6828894553496987, "learning_rate": 6.837622380326474e-06, "loss": 1.4774, "step": 27765 }, { "epoch": 0.818811735220404, "grad_norm": 2.7809650405623527, "learning_rate": 6.837257268347737e-06, "loss": 1.4311, "step": 27770 }, { "epoch": 0.8189591626124134, "grad_norm": 2.5710929863236403, "learning_rate": 6.8368920655229285e-06, "loss": 1.4431, "step": 27775 }, { "epoch": 0.8191065900044229, "grad_norm": 2.6395073880844584, "learning_rate": 6.836526771862798e-06, "loss": 1.504, "step": 27780 }, { "epoch": 0.8192540173964322, "grad_norm": 2.7984179517265293, "learning_rate": 6.8361613873780915e-06, "loss": 1.5114, "step": 27785 }, { "epoch": 0.8194014447884417, "grad_norm": 2.771621717764096, "learning_rate": 6.835795912079563e-06, "loss": 1.4998, "step": 27790 }, { "epoch": 0.8195488721804511, "grad_norm": 2.7940748665406465, "learning_rate": 6.835430345977966e-06, "loss": 1.4998, "step": 27795 }, { "epoch": 0.8196962995724606, "grad_norm": 2.819597173114161, "learning_rate": 6.835064689084058e-06, "loss": 1.4666, "step": 27800 }, { "epoch": 0.8198437269644699, "grad_norm": 2.611484497460632, "learning_rate": 6.834698941408598e-06, "loss": 1.5443, "step": 27805 }, { "epoch": 0.8199911543564794, "grad_norm": 3.000199870026343, "learning_rate": 6.83433310296235e-06, "loss": 1.5227, "step": 27810 }, { "epoch": 0.8201385817484889, "grad_norm": 2.8966506546977966, "learning_rate": 6.833967173756077e-06, "loss": 1.5011, "step": 27815 }, { "epoch": 0.8202860091404983, "grad_norm": 2.6435034518619855, "learning_rate": 6.83360115380055e-06, "loss": 1.4604, "step": 27820 }, { "epoch": 0.8204334365325078, "grad_norm": 2.7661535851945214, "learning_rate": 6.833235043106538e-06, "loss": 1.4745, "step": 27825 }, { "epoch": 0.8205808639245171, "grad_norm": 2.685643968788733, "learning_rate": 6.832868841684814e-06, "loss": 1.426, "step": 27830 }, { "epoch": 0.8207282913165266, "grad_norm": 2.881185758033632, "learning_rate": 6.832502549546153e-06, "loss": 1.4795, "step": 27835 }, { "epoch": 0.820875718708536, "grad_norm": 2.8338416472607757, "learning_rate": 6.832136166701336e-06, "loss": 1.4522, "step": 27840 }, { "epoch": 0.8210231461005455, "grad_norm": 3.1020897066496484, "learning_rate": 6.8317696931611425e-06, "loss": 1.515, "step": 27845 }, { "epoch": 0.821170573492555, "grad_norm": 2.8031688849862904, "learning_rate": 6.831403128936357e-06, "loss": 1.4862, "step": 27850 }, { "epoch": 0.8213180008845643, "grad_norm": 2.6104560026658294, "learning_rate": 6.831036474037764e-06, "loss": 1.4929, "step": 27855 }, { "epoch": 0.8214654282765738, "grad_norm": 2.8364815447232106, "learning_rate": 6.830669728476156e-06, "loss": 1.4684, "step": 27860 }, { "epoch": 0.8216128556685832, "grad_norm": 2.729482253526484, "learning_rate": 6.830302892262324e-06, "loss": 1.4668, "step": 27865 }, { "epoch": 0.8217602830605927, "grad_norm": 2.8935879715169337, "learning_rate": 6.829935965407061e-06, "loss": 1.4799, "step": 27870 }, { "epoch": 0.8219077104526021, "grad_norm": 2.976864143686776, "learning_rate": 6.829568947921167e-06, "loss": 1.5363, "step": 27875 }, { "epoch": 0.8220551378446115, "grad_norm": 2.7588594529153148, "learning_rate": 6.829201839815438e-06, "loss": 1.5244, "step": 27880 }, { "epoch": 0.8222025652366209, "grad_norm": 2.7325291991833494, "learning_rate": 6.8288346411006795e-06, "loss": 1.4778, "step": 27885 }, { "epoch": 0.8223499926286304, "grad_norm": 2.7809782586142555, "learning_rate": 6.828467351787697e-06, "loss": 1.5294, "step": 27890 }, { "epoch": 0.8224974200206399, "grad_norm": 2.7768775072283, "learning_rate": 6.828099971887296e-06, "loss": 1.4953, "step": 27895 }, { "epoch": 0.8226448474126493, "grad_norm": 2.551935721834197, "learning_rate": 6.827732501410289e-06, "loss": 1.5331, "step": 27900 }, { "epoch": 0.8227922748046587, "grad_norm": 2.8738626973622696, "learning_rate": 6.82736494036749e-06, "loss": 1.4736, "step": 27905 }, { "epoch": 0.8229397021966681, "grad_norm": 2.75876153028951, "learning_rate": 6.826997288769713e-06, "loss": 1.4718, "step": 27910 }, { "epoch": 0.8230871295886776, "grad_norm": 2.849239822030883, "learning_rate": 6.826629546627776e-06, "loss": 1.4753, "step": 27915 }, { "epoch": 0.823234556980687, "grad_norm": 2.725926098781481, "learning_rate": 6.826261713952503e-06, "loss": 1.466, "step": 27920 }, { "epoch": 0.8233819843726965, "grad_norm": 2.947260321701852, "learning_rate": 6.8258937907547155e-06, "loss": 1.4547, "step": 27925 }, { "epoch": 0.8235294117647058, "grad_norm": 2.8076126356768594, "learning_rate": 6.825525777045241e-06, "loss": 1.501, "step": 27930 }, { "epoch": 0.8236768391567153, "grad_norm": 2.92304956496887, "learning_rate": 6.825157672834908e-06, "loss": 1.4951, "step": 27935 }, { "epoch": 0.8238242665487248, "grad_norm": 2.5901466547183025, "learning_rate": 6.824789478134551e-06, "loss": 1.4621, "step": 27940 }, { "epoch": 0.8239716939407342, "grad_norm": 2.889863508130479, "learning_rate": 6.824421192955001e-06, "loss": 1.5353, "step": 27945 }, { "epoch": 0.8241191213327437, "grad_norm": 2.9316949649143975, "learning_rate": 6.824052817307096e-06, "loss": 1.5213, "step": 27950 }, { "epoch": 0.824266548724753, "grad_norm": 2.778900010978734, "learning_rate": 6.823684351201679e-06, "loss": 1.4934, "step": 27955 }, { "epoch": 0.8244139761167625, "grad_norm": 2.721965711121034, "learning_rate": 6.823315794649589e-06, "loss": 1.4915, "step": 27960 }, { "epoch": 0.8245614035087719, "grad_norm": 2.7264061904252777, "learning_rate": 6.822947147661671e-06, "loss": 1.532, "step": 27965 }, { "epoch": 0.8247088309007814, "grad_norm": 2.882159735205823, "learning_rate": 6.822578410248776e-06, "loss": 1.4528, "step": 27970 }, { "epoch": 0.8248562582927909, "grad_norm": 2.739418618578723, "learning_rate": 6.822209582421752e-06, "loss": 1.4374, "step": 27975 }, { "epoch": 0.8250036856848002, "grad_norm": 2.8824908951465398, "learning_rate": 6.821840664191454e-06, "loss": 1.4392, "step": 27980 }, { "epoch": 0.8251511130768097, "grad_norm": 2.7092173701142928, "learning_rate": 6.821471655568735e-06, "loss": 1.4462, "step": 27985 }, { "epoch": 0.8252985404688191, "grad_norm": 2.9513789820164162, "learning_rate": 6.821102556564457e-06, "loss": 1.503, "step": 27990 }, { "epoch": 0.8254459678608286, "grad_norm": 2.7588882078622694, "learning_rate": 6.820733367189479e-06, "loss": 1.4747, "step": 27995 }, { "epoch": 0.8255933952528379, "grad_norm": 2.8434294678383343, "learning_rate": 6.820364087454664e-06, "loss": 1.4789, "step": 28000 }, { "epoch": 0.8255933952528379, "eval_loss": 1.1513309478759766, "eval_runtime": 4.2373, "eval_samples_per_second": 93.457, "eval_steps_per_second": 3.068, "step": 28000 }, { "epoch": 0.8257408226448474, "grad_norm": 2.787747582887755, "learning_rate": 6.819994717370881e-06, "loss": 1.4702, "step": 28005 }, { "epoch": 0.8258882500368568, "grad_norm": 2.7242826110601333, "learning_rate": 6.819625256948998e-06, "loss": 1.4831, "step": 28010 }, { "epoch": 0.8260356774288663, "grad_norm": 2.810665676925389, "learning_rate": 6.819255706199886e-06, "loss": 1.5244, "step": 28015 }, { "epoch": 0.8261831048208758, "grad_norm": 2.7800599115267555, "learning_rate": 6.81888606513442e-06, "loss": 1.5194, "step": 28020 }, { "epoch": 0.8263305322128851, "grad_norm": 2.7546189416784372, "learning_rate": 6.818516333763479e-06, "loss": 1.5046, "step": 28025 }, { "epoch": 0.8264779596048946, "grad_norm": 2.825377567142257, "learning_rate": 6.818146512097939e-06, "loss": 1.4832, "step": 28030 }, { "epoch": 0.826625386996904, "grad_norm": 2.8919705413621632, "learning_rate": 6.817776600148685e-06, "loss": 1.5041, "step": 28035 }, { "epoch": 0.8267728143889135, "grad_norm": 2.8744954910597307, "learning_rate": 6.817406597926602e-06, "loss": 1.4539, "step": 28040 }, { "epoch": 0.8269202417809229, "grad_norm": 2.876244197839902, "learning_rate": 6.817036505442576e-06, "loss": 1.4777, "step": 28045 }, { "epoch": 0.8270676691729323, "grad_norm": 2.6745289707942987, "learning_rate": 6.816666322707499e-06, "loss": 1.4674, "step": 28050 }, { "epoch": 0.8272150965649417, "grad_norm": 2.7596067197368885, "learning_rate": 6.816296049732265e-06, "loss": 1.4927, "step": 28055 }, { "epoch": 0.8273625239569512, "grad_norm": 2.7377382704432462, "learning_rate": 6.815925686527767e-06, "loss": 1.4589, "step": 28060 }, { "epoch": 0.8275099513489607, "grad_norm": 2.8739640404346467, "learning_rate": 6.815555233104905e-06, "loss": 1.4549, "step": 28065 }, { "epoch": 0.8276573787409701, "grad_norm": 2.8410069209610023, "learning_rate": 6.815184689474581e-06, "loss": 1.4441, "step": 28070 }, { "epoch": 0.8278048061329795, "grad_norm": 2.7543304972965585, "learning_rate": 6.814814055647696e-06, "loss": 1.4797, "step": 28075 }, { "epoch": 0.8279522335249889, "grad_norm": 2.7503802998718663, "learning_rate": 6.814443331635159e-06, "loss": 1.5356, "step": 28080 }, { "epoch": 0.8280996609169984, "grad_norm": 2.9758492112924007, "learning_rate": 6.814072517447876e-06, "loss": 1.4763, "step": 28085 }, { "epoch": 0.8282470883090078, "grad_norm": 2.975288719794484, "learning_rate": 6.813701613096762e-06, "loss": 1.5196, "step": 28090 }, { "epoch": 0.8283945157010173, "grad_norm": 2.802865590108347, "learning_rate": 6.8133306185927295e-06, "loss": 1.5075, "step": 28095 }, { "epoch": 0.8285419430930266, "grad_norm": 2.778693781911712, "learning_rate": 6.812959533946694e-06, "loss": 1.4338, "step": 28100 }, { "epoch": 0.8286893704850361, "grad_norm": 2.8727545449317473, "learning_rate": 6.812588359169578e-06, "loss": 1.4802, "step": 28105 }, { "epoch": 0.8288367978770456, "grad_norm": 2.672738120406488, "learning_rate": 6.812217094272303e-06, "loss": 1.5178, "step": 28110 }, { "epoch": 0.828984225269055, "grad_norm": 2.693860587580225, "learning_rate": 6.811845739265791e-06, "loss": 1.4603, "step": 28115 }, { "epoch": 0.8291316526610645, "grad_norm": 2.936644167673663, "learning_rate": 6.811474294160974e-06, "loss": 1.5214, "step": 28120 }, { "epoch": 0.8292790800530738, "grad_norm": 2.787900116930008, "learning_rate": 6.81110275896878e-06, "loss": 1.4676, "step": 28125 }, { "epoch": 0.8294265074450833, "grad_norm": 2.843114655376011, "learning_rate": 6.8107311337001405e-06, "loss": 1.5246, "step": 28130 }, { "epoch": 0.8295739348370927, "grad_norm": 2.785222126606889, "learning_rate": 6.810359418365994e-06, "loss": 1.4704, "step": 28135 }, { "epoch": 0.8297213622291022, "grad_norm": 2.889100577196347, "learning_rate": 6.8099876129772765e-06, "loss": 1.4771, "step": 28140 }, { "epoch": 0.8298687896211117, "grad_norm": 2.6295318091587117, "learning_rate": 6.809615717544929e-06, "loss": 1.4335, "step": 28145 }, { "epoch": 0.830016217013121, "grad_norm": 2.7483428526044738, "learning_rate": 6.809243732079895e-06, "loss": 1.4888, "step": 28150 }, { "epoch": 0.8301636444051305, "grad_norm": 2.8376852662271834, "learning_rate": 6.808871656593122e-06, "loss": 1.4679, "step": 28155 }, { "epoch": 0.8303110717971399, "grad_norm": 2.8695994353695555, "learning_rate": 6.808499491095556e-06, "loss": 1.4908, "step": 28160 }, { "epoch": 0.8304584991891494, "grad_norm": 2.6135244046964647, "learning_rate": 6.808127235598152e-06, "loss": 1.5106, "step": 28165 }, { "epoch": 0.8306059265811587, "grad_norm": 2.8894037670748838, "learning_rate": 6.80775489011186e-06, "loss": 1.5011, "step": 28170 }, { "epoch": 0.8307533539731682, "grad_norm": 2.977782941450261, "learning_rate": 6.80738245464764e-06, "loss": 1.5486, "step": 28175 }, { "epoch": 0.8309007813651776, "grad_norm": 2.626669833421307, "learning_rate": 6.807009929216451e-06, "loss": 1.4947, "step": 28180 }, { "epoch": 0.8310482087571871, "grad_norm": 2.7281924771211092, "learning_rate": 6.806637313829252e-06, "loss": 1.4313, "step": 28185 }, { "epoch": 0.8311956361491966, "grad_norm": 2.706289010312689, "learning_rate": 6.806264608497011e-06, "loss": 1.4736, "step": 28190 }, { "epoch": 0.8313430635412059, "grad_norm": 3.0282328369261577, "learning_rate": 6.805891813230694e-06, "loss": 1.4528, "step": 28195 }, { "epoch": 0.8314904909332154, "grad_norm": 2.854300404490295, "learning_rate": 6.805518928041271e-06, "loss": 1.5251, "step": 28200 }, { "epoch": 0.8316379183252248, "grad_norm": 2.8207019270178653, "learning_rate": 6.8051459529397146e-06, "loss": 1.5182, "step": 28205 }, { "epoch": 0.8317853457172343, "grad_norm": 2.813244135041866, "learning_rate": 6.804772887936999e-06, "loss": 1.4779, "step": 28210 }, { "epoch": 0.8319327731092437, "grad_norm": 2.753703328326344, "learning_rate": 6.8043997330441035e-06, "loss": 1.5174, "step": 28215 }, { "epoch": 0.8320802005012531, "grad_norm": 2.7967343507461218, "learning_rate": 6.804026488272008e-06, "loss": 1.5043, "step": 28220 }, { "epoch": 0.8322276278932625, "grad_norm": 2.89176631136122, "learning_rate": 6.8036531536316955e-06, "loss": 1.4483, "step": 28225 }, { "epoch": 0.832375055285272, "grad_norm": 2.803243342038803, "learning_rate": 6.803279729134153e-06, "loss": 1.4643, "step": 28230 }, { "epoch": 0.8325224826772815, "grad_norm": 2.8495277513552986, "learning_rate": 6.802906214790367e-06, "loss": 1.4464, "step": 28235 }, { "epoch": 0.8326699100692909, "grad_norm": 2.757732581958737, "learning_rate": 6.80253261061133e-06, "loss": 1.4356, "step": 28240 }, { "epoch": 0.8328173374613003, "grad_norm": 2.7470484026947837, "learning_rate": 6.802158916608036e-06, "loss": 1.4952, "step": 28245 }, { "epoch": 0.8329647648533097, "grad_norm": 2.63163287824343, "learning_rate": 6.80178513279148e-06, "loss": 1.4984, "step": 28250 }, { "epoch": 0.8331121922453192, "grad_norm": 2.6530287906203025, "learning_rate": 6.801411259172662e-06, "loss": 1.426, "step": 28255 }, { "epoch": 0.8332596196373286, "grad_norm": 2.8369659090464143, "learning_rate": 6.801037295762582e-06, "loss": 1.4294, "step": 28260 }, { "epoch": 0.8334070470293381, "grad_norm": 2.7395115116037947, "learning_rate": 6.800663242572247e-06, "loss": 1.5463, "step": 28265 }, { "epoch": 0.8335544744213474, "grad_norm": 2.7867365001184825, "learning_rate": 6.800289099612663e-06, "loss": 1.4306, "step": 28270 }, { "epoch": 0.8337019018133569, "grad_norm": 2.713244276109203, "learning_rate": 6.799914866894838e-06, "loss": 1.4744, "step": 28275 }, { "epoch": 0.8338493292053664, "grad_norm": 2.900470059316777, "learning_rate": 6.799540544429786e-06, "loss": 1.4704, "step": 28280 }, { "epoch": 0.8339967565973758, "grad_norm": 2.8060777433260564, "learning_rate": 6.7991661322285215e-06, "loss": 1.4669, "step": 28285 }, { "epoch": 0.8341441839893853, "grad_norm": 2.781970934620356, "learning_rate": 6.798791630302062e-06, "loss": 1.4632, "step": 28290 }, { "epoch": 0.8342916113813946, "grad_norm": 2.6193999305663103, "learning_rate": 6.798417038661425e-06, "loss": 1.4513, "step": 28295 }, { "epoch": 0.8344390387734041, "grad_norm": 2.8671888898187876, "learning_rate": 6.7980423573176376e-06, "loss": 1.4963, "step": 28300 }, { "epoch": 0.8345864661654135, "grad_norm": 2.8293933286296427, "learning_rate": 6.797667586281724e-06, "loss": 1.4438, "step": 28305 }, { "epoch": 0.834733893557423, "grad_norm": 2.7978506182889853, "learning_rate": 6.797292725564711e-06, "loss": 1.4957, "step": 28310 }, { "epoch": 0.8348813209494325, "grad_norm": 2.6748943643351772, "learning_rate": 6.79691777517763e-06, "loss": 1.4851, "step": 28315 }, { "epoch": 0.8350287483414418, "grad_norm": 4.375300587515841, "learning_rate": 6.7965427351315135e-06, "loss": 1.5142, "step": 28320 }, { "epoch": 0.8351761757334513, "grad_norm": 2.7330876556849177, "learning_rate": 6.796167605437398e-06, "loss": 1.4372, "step": 28325 }, { "epoch": 0.8353236031254607, "grad_norm": 2.7624951508328466, "learning_rate": 6.795792386106324e-06, "loss": 1.4629, "step": 28330 }, { "epoch": 0.8354710305174702, "grad_norm": 2.8766357848319504, "learning_rate": 6.79541707714933e-06, "loss": 1.4662, "step": 28335 }, { "epoch": 0.8356184579094795, "grad_norm": 2.7553478697934173, "learning_rate": 6.795041678577461e-06, "loss": 1.5164, "step": 28340 }, { "epoch": 0.835765885301489, "grad_norm": 2.828468193768396, "learning_rate": 6.7946661904017646e-06, "loss": 1.4752, "step": 28345 }, { "epoch": 0.8359133126934984, "grad_norm": 2.9037882757463347, "learning_rate": 6.794290612633287e-06, "loss": 1.4969, "step": 28350 }, { "epoch": 0.8360607400855079, "grad_norm": 2.8303952095141995, "learning_rate": 6.793914945283083e-06, "loss": 1.4412, "step": 28355 }, { "epoch": 0.8362081674775174, "grad_norm": 2.804206070030461, "learning_rate": 6.793539188362207e-06, "loss": 1.4476, "step": 28360 }, { "epoch": 0.8363555948695267, "grad_norm": 2.7305962037531395, "learning_rate": 6.793163341881713e-06, "loss": 1.4734, "step": 28365 }, { "epoch": 0.8365030222615362, "grad_norm": 2.923482494441567, "learning_rate": 6.792787405852664e-06, "loss": 1.5445, "step": 28370 }, { "epoch": 0.8366504496535456, "grad_norm": 2.847799088328211, "learning_rate": 6.792411380286121e-06, "loss": 1.5019, "step": 28375 }, { "epoch": 0.8367978770455551, "grad_norm": 2.8500979550876737, "learning_rate": 6.792035265193149e-06, "loss": 1.453, "step": 28380 }, { "epoch": 0.8369453044375645, "grad_norm": 2.7010949939347513, "learning_rate": 6.791659060584816e-06, "loss": 1.4894, "step": 28385 }, { "epoch": 0.8370927318295739, "grad_norm": 2.742407087385926, "learning_rate": 6.7912827664721904e-06, "loss": 1.4795, "step": 28390 }, { "epoch": 0.8372401592215833, "grad_norm": 2.872280548293151, "learning_rate": 6.790906382866348e-06, "loss": 1.4649, "step": 28395 }, { "epoch": 0.8373875866135928, "grad_norm": 2.7753748758046264, "learning_rate": 6.790529909778363e-06, "loss": 1.4526, "step": 28400 }, { "epoch": 0.8375350140056023, "grad_norm": 2.6952707183755615, "learning_rate": 6.790153347219313e-06, "loss": 1.4472, "step": 28405 }, { "epoch": 0.8376824413976117, "grad_norm": 2.6360122483731185, "learning_rate": 6.78977669520028e-06, "loss": 1.4828, "step": 28410 }, { "epoch": 0.8378298687896211, "grad_norm": 2.772949603618864, "learning_rate": 6.789399953732345e-06, "loss": 1.5172, "step": 28415 }, { "epoch": 0.8379772961816305, "grad_norm": 2.8217455541502123, "learning_rate": 6.789023122826597e-06, "loss": 1.4858, "step": 28420 }, { "epoch": 0.83812472357364, "grad_norm": 2.882603125971729, "learning_rate": 6.788646202494123e-06, "loss": 1.4781, "step": 28425 }, { "epoch": 0.8382721509656494, "grad_norm": 2.806868102389195, "learning_rate": 6.788269192746015e-06, "loss": 1.5244, "step": 28430 }, { "epoch": 0.8384195783576589, "grad_norm": 2.857210857022848, "learning_rate": 6.787892093593367e-06, "loss": 1.4851, "step": 28435 }, { "epoch": 0.8385670057496682, "grad_norm": 2.9009122913079857, "learning_rate": 6.7875149050472754e-06, "loss": 1.5177, "step": 28440 }, { "epoch": 0.8387144331416777, "grad_norm": 2.7147372369419998, "learning_rate": 6.787137627118839e-06, "loss": 1.4843, "step": 28445 }, { "epoch": 0.8388618605336872, "grad_norm": 2.8323339760021233, "learning_rate": 6.786760259819159e-06, "loss": 1.5483, "step": 28450 }, { "epoch": 0.8390092879256966, "grad_norm": 2.7073670482622147, "learning_rate": 6.786382803159341e-06, "loss": 1.4899, "step": 28455 }, { "epoch": 0.8391567153177061, "grad_norm": 2.8700393794166446, "learning_rate": 6.786005257150492e-06, "loss": 1.4769, "step": 28460 }, { "epoch": 0.8393041427097154, "grad_norm": 2.825807794833509, "learning_rate": 6.785627621803722e-06, "loss": 1.4898, "step": 28465 }, { "epoch": 0.8394515701017249, "grad_norm": 2.79053092444062, "learning_rate": 6.785249897130142e-06, "loss": 1.4787, "step": 28470 }, { "epoch": 0.8395989974937343, "grad_norm": 2.754942222930829, "learning_rate": 6.784872083140869e-06, "loss": 1.5163, "step": 28475 }, { "epoch": 0.8397464248857438, "grad_norm": 2.6556574779094033, "learning_rate": 6.784494179847019e-06, "loss": 1.4643, "step": 28480 }, { "epoch": 0.8398938522777533, "grad_norm": 2.6247882570885093, "learning_rate": 6.7841161872597125e-06, "loss": 1.4361, "step": 28485 }, { "epoch": 0.8400412796697626, "grad_norm": 2.787488870286433, "learning_rate": 6.783738105390072e-06, "loss": 1.51, "step": 28490 }, { "epoch": 0.8401887070617721, "grad_norm": 2.707819730801482, "learning_rate": 6.783359934249225e-06, "loss": 1.4877, "step": 28495 }, { "epoch": 0.8403361344537815, "grad_norm": 2.8864673328626163, "learning_rate": 6.782981673848298e-06, "loss": 1.4949, "step": 28500 }, { "epoch": 0.8403361344537815, "eval_loss": 1.148848295211792, "eval_runtime": 4.1824, "eval_samples_per_second": 94.681, "eval_steps_per_second": 3.108, "step": 28500 }, { "epoch": 0.840483561845791, "grad_norm": 2.811771559778442, "learning_rate": 6.782603324198421e-06, "loss": 1.4265, "step": 28505 }, { "epoch": 0.8406309892378003, "grad_norm": 2.8093374269146683, "learning_rate": 6.7822248853107285e-06, "loss": 1.4347, "step": 28510 }, { "epoch": 0.8407784166298098, "grad_norm": 2.8949534417337244, "learning_rate": 6.781846357196356e-06, "loss": 1.4978, "step": 28515 }, { "epoch": 0.8409258440218192, "grad_norm": 2.832151806941381, "learning_rate": 6.781467739866443e-06, "loss": 1.5372, "step": 28520 }, { "epoch": 0.8410732714138287, "grad_norm": 2.7062304859007704, "learning_rate": 6.7810890333321315e-06, "loss": 1.4949, "step": 28525 }, { "epoch": 0.8412206988058382, "grad_norm": 2.786269900011882, "learning_rate": 6.780710237604562e-06, "loss": 1.5052, "step": 28530 }, { "epoch": 0.8413681261978475, "grad_norm": 2.80346243487231, "learning_rate": 6.780331352694883e-06, "loss": 1.5213, "step": 28535 }, { "epoch": 0.841515553589857, "grad_norm": 2.7518613338276534, "learning_rate": 6.7799523786142456e-06, "loss": 1.4615, "step": 28540 }, { "epoch": 0.8416629809818664, "grad_norm": 2.801678227922289, "learning_rate": 6.779573315373798e-06, "loss": 1.4608, "step": 28545 }, { "epoch": 0.8418104083738759, "grad_norm": 2.858363937138339, "learning_rate": 6.779194162984697e-06, "loss": 1.4597, "step": 28550 }, { "epoch": 0.8419578357658853, "grad_norm": 2.90931459072737, "learning_rate": 6.778814921458099e-06, "loss": 1.5008, "step": 28555 }, { "epoch": 0.8421052631578947, "grad_norm": 2.8088437379810776, "learning_rate": 6.778435590805162e-06, "loss": 1.4979, "step": 28560 }, { "epoch": 0.8422526905499041, "grad_norm": 2.620503247866082, "learning_rate": 6.778056171037052e-06, "loss": 1.4731, "step": 28565 }, { "epoch": 0.8424001179419136, "grad_norm": 2.673882634871605, "learning_rate": 6.777676662164929e-06, "loss": 1.4955, "step": 28570 }, { "epoch": 0.8425475453339231, "grad_norm": 2.8325067190999276, "learning_rate": 6.777297064199964e-06, "loss": 1.4625, "step": 28575 }, { "epoch": 0.8426949727259325, "grad_norm": 2.7695005417988003, "learning_rate": 6.776917377153327e-06, "loss": 1.4496, "step": 28580 }, { "epoch": 0.8428424001179419, "grad_norm": 2.822400575352894, "learning_rate": 6.7765376010361875e-06, "loss": 1.4899, "step": 28585 }, { "epoch": 0.8429898275099513, "grad_norm": 2.6727593900913624, "learning_rate": 6.776157735859725e-06, "loss": 1.4491, "step": 28590 }, { "epoch": 0.8431372549019608, "grad_norm": 2.790736453846437, "learning_rate": 6.775777781635115e-06, "loss": 1.4515, "step": 28595 }, { "epoch": 0.8432846822939702, "grad_norm": 2.6569958612939755, "learning_rate": 6.775397738373539e-06, "loss": 1.4816, "step": 28600 }, { "epoch": 0.8434321096859797, "grad_norm": 2.7502876233260554, "learning_rate": 6.775017606086179e-06, "loss": 1.4738, "step": 28605 }, { "epoch": 0.843579537077989, "grad_norm": 2.7533086913514424, "learning_rate": 6.774637384784223e-06, "loss": 1.4928, "step": 28610 }, { "epoch": 0.8437269644699985, "grad_norm": 2.7764831452015963, "learning_rate": 6.774257074478858e-06, "loss": 1.4751, "step": 28615 }, { "epoch": 0.843874391862008, "grad_norm": 2.6279021361641606, "learning_rate": 6.773876675181275e-06, "loss": 1.4158, "step": 28620 }, { "epoch": 0.8440218192540174, "grad_norm": 2.841818872196098, "learning_rate": 6.773496186902668e-06, "loss": 1.5064, "step": 28625 }, { "epoch": 0.8441692466460269, "grad_norm": 2.7861300661294393, "learning_rate": 6.773115609654232e-06, "loss": 1.5097, "step": 28630 }, { "epoch": 0.8443166740380362, "grad_norm": 2.696310243874165, "learning_rate": 6.772734943447168e-06, "loss": 1.4803, "step": 28635 }, { "epoch": 0.8444641014300457, "grad_norm": 2.838414026409883, "learning_rate": 6.772354188292677e-06, "loss": 1.5104, "step": 28640 }, { "epoch": 0.8446115288220551, "grad_norm": 2.63704760733275, "learning_rate": 6.7719733442019636e-06, "loss": 1.4479, "step": 28645 }, { "epoch": 0.8447589562140646, "grad_norm": 2.9849856407433664, "learning_rate": 6.771592411186232e-06, "loss": 1.4976, "step": 28650 }, { "epoch": 0.8449063836060741, "grad_norm": 2.8542144583197944, "learning_rate": 6.771211389256694e-06, "loss": 1.4648, "step": 28655 }, { "epoch": 0.8450538109980834, "grad_norm": 2.8110829458212505, "learning_rate": 6.770830278424562e-06, "loss": 1.4842, "step": 28660 }, { "epoch": 0.8452012383900929, "grad_norm": 2.5451369179518974, "learning_rate": 6.770449078701049e-06, "loss": 1.4409, "step": 28665 }, { "epoch": 0.8453486657821023, "grad_norm": 2.771241932242007, "learning_rate": 6.770067790097373e-06, "loss": 1.4576, "step": 28670 }, { "epoch": 0.8454960931741118, "grad_norm": 2.6865547099680325, "learning_rate": 6.769686412624754e-06, "loss": 1.4725, "step": 28675 }, { "epoch": 0.8456435205661211, "grad_norm": 2.633649932375519, "learning_rate": 6.769304946294413e-06, "loss": 1.5009, "step": 28680 }, { "epoch": 0.8457909479581306, "grad_norm": 2.6082296336918036, "learning_rate": 6.768923391117577e-06, "loss": 1.4822, "step": 28685 }, { "epoch": 0.84593837535014, "grad_norm": 2.8860504184919336, "learning_rate": 6.768541747105473e-06, "loss": 1.5091, "step": 28690 }, { "epoch": 0.8460858027421495, "grad_norm": 2.815074215571163, "learning_rate": 6.768160014269332e-06, "loss": 1.4718, "step": 28695 }, { "epoch": 0.846233230134159, "grad_norm": 2.8443765287410523, "learning_rate": 6.767778192620385e-06, "loss": 1.5165, "step": 28700 }, { "epoch": 0.8463806575261683, "grad_norm": 2.768139467132749, "learning_rate": 6.767396282169869e-06, "loss": 1.4554, "step": 28705 }, { "epoch": 0.8465280849181778, "grad_norm": 2.745330113831668, "learning_rate": 6.767014282929022e-06, "loss": 1.4463, "step": 28710 }, { "epoch": 0.8466755123101872, "grad_norm": 2.706580544089951, "learning_rate": 6.766632194909085e-06, "loss": 1.5114, "step": 28715 }, { "epoch": 0.8468229397021967, "grad_norm": 2.7767379180263103, "learning_rate": 6.766250018121301e-06, "loss": 1.5061, "step": 28720 }, { "epoch": 0.8469703670942061, "grad_norm": 2.863391939260316, "learning_rate": 6.765867752576916e-06, "loss": 1.4932, "step": 28725 }, { "epoch": 0.8471177944862155, "grad_norm": 2.815813434711371, "learning_rate": 6.765485398287178e-06, "loss": 1.4707, "step": 28730 }, { "epoch": 0.847265221878225, "grad_norm": 2.787009377294747, "learning_rate": 6.76510295526334e-06, "loss": 1.4891, "step": 28735 }, { "epoch": 0.8474126492702344, "grad_norm": 2.846059093113844, "learning_rate": 6.764720423516653e-06, "loss": 1.4852, "step": 28740 }, { "epoch": 0.8475600766622439, "grad_norm": 2.7041433357355897, "learning_rate": 6.7643378030583764e-06, "loss": 1.4595, "step": 28745 }, { "epoch": 0.8477075040542533, "grad_norm": 2.794666279996827, "learning_rate": 6.763955093899768e-06, "loss": 1.5247, "step": 28750 }, { "epoch": 0.8478549314462627, "grad_norm": 2.654486053446756, "learning_rate": 6.763572296052088e-06, "loss": 1.4699, "step": 28755 }, { "epoch": 0.8480023588382721, "grad_norm": 2.8074595347517035, "learning_rate": 6.763189409526603e-06, "loss": 1.5224, "step": 28760 }, { "epoch": 0.8481497862302816, "grad_norm": 2.7566863319915695, "learning_rate": 6.762806434334579e-06, "loss": 1.4878, "step": 28765 }, { "epoch": 0.848297213622291, "grad_norm": 2.852345452670446, "learning_rate": 6.762423370487285e-06, "loss": 1.5165, "step": 28770 }, { "epoch": 0.8484446410143005, "grad_norm": 2.8203177848119214, "learning_rate": 6.762040217995994e-06, "loss": 1.491, "step": 28775 }, { "epoch": 0.8485920684063099, "grad_norm": 2.725510114955916, "learning_rate": 6.76165697687198e-06, "loss": 1.4906, "step": 28780 }, { "epoch": 0.8487394957983193, "grad_norm": 2.8282841462334787, "learning_rate": 6.76127364712652e-06, "loss": 1.4763, "step": 28785 }, { "epoch": 0.8488869231903288, "grad_norm": 2.6888302283507075, "learning_rate": 6.760890228770895e-06, "loss": 1.4655, "step": 28790 }, { "epoch": 0.8490343505823382, "grad_norm": 2.63907554769823, "learning_rate": 6.7605067218163865e-06, "loss": 1.4829, "step": 28795 }, { "epoch": 0.8491817779743477, "grad_norm": 2.8465528963857896, "learning_rate": 6.760123126274281e-06, "loss": 1.4728, "step": 28800 }, { "epoch": 0.849329205366357, "grad_norm": 2.9587642530912213, "learning_rate": 6.759739442155865e-06, "loss": 1.5003, "step": 28805 }, { "epoch": 0.8494766327583665, "grad_norm": 2.7578216599067673, "learning_rate": 6.759355669472429e-06, "loss": 1.4647, "step": 28810 }, { "epoch": 0.849624060150376, "grad_norm": 2.7459465429020526, "learning_rate": 6.758971808235266e-06, "loss": 1.497, "step": 28815 }, { "epoch": 0.8497714875423854, "grad_norm": 2.78384297707902, "learning_rate": 6.758587858455671e-06, "loss": 1.4995, "step": 28820 }, { "epoch": 0.8499189149343949, "grad_norm": 2.8056944866566687, "learning_rate": 6.758203820144943e-06, "loss": 1.5012, "step": 28825 }, { "epoch": 0.8500663423264042, "grad_norm": 2.7491514657338305, "learning_rate": 6.757819693314384e-06, "loss": 1.504, "step": 28830 }, { "epoch": 0.8502137697184137, "grad_norm": 2.688109224768402, "learning_rate": 6.7574354779752955e-06, "loss": 1.4767, "step": 28835 }, { "epoch": 0.8503611971104231, "grad_norm": 2.6200112274952194, "learning_rate": 6.757051174138983e-06, "loss": 1.4814, "step": 28840 }, { "epoch": 0.8505086245024326, "grad_norm": 2.643132911791756, "learning_rate": 6.756666781816756e-06, "loss": 1.4211, "step": 28845 }, { "epoch": 0.8506560518944419, "grad_norm": 2.803350787056499, "learning_rate": 6.7562823010199265e-06, "loss": 1.4429, "step": 28850 }, { "epoch": 0.8508034792864514, "grad_norm": 2.845483837020962, "learning_rate": 6.7558977317598075e-06, "loss": 1.5066, "step": 28855 }, { "epoch": 0.8509509066784608, "grad_norm": 2.835441944853993, "learning_rate": 6.755513074047715e-06, "loss": 1.4789, "step": 28860 }, { "epoch": 0.8510983340704703, "grad_norm": 2.7276143619138407, "learning_rate": 6.755128327894968e-06, "loss": 1.4929, "step": 28865 }, { "epoch": 0.8512457614624798, "grad_norm": 2.670535935205343, "learning_rate": 6.754743493312888e-06, "loss": 1.4918, "step": 28870 }, { "epoch": 0.8513931888544891, "grad_norm": 2.848572177494651, "learning_rate": 6.754358570312801e-06, "loss": 1.5224, "step": 28875 }, { "epoch": 0.8515406162464986, "grad_norm": 2.800130106136993, "learning_rate": 6.753973558906032e-06, "loss": 1.4156, "step": 28880 }, { "epoch": 0.851688043638508, "grad_norm": 2.748005686743621, "learning_rate": 6.753588459103911e-06, "loss": 1.4598, "step": 28885 }, { "epoch": 0.8518354710305175, "grad_norm": 2.818758154696885, "learning_rate": 6.75320327091777e-06, "loss": 1.4542, "step": 28890 }, { "epoch": 0.851982898422527, "grad_norm": 2.737511689815225, "learning_rate": 6.752817994358943e-06, "loss": 1.4859, "step": 28895 }, { "epoch": 0.8521303258145363, "grad_norm": 2.6838469413547594, "learning_rate": 6.752432629438768e-06, "loss": 1.4937, "step": 28900 }, { "epoch": 0.8522777532065458, "grad_norm": 2.70803869779448, "learning_rate": 6.752047176168584e-06, "loss": 1.4988, "step": 28905 }, { "epoch": 0.8524251805985552, "grad_norm": 2.86353843569273, "learning_rate": 6.751661634559733e-06, "loss": 1.4795, "step": 28910 }, { "epoch": 0.8525726079905647, "grad_norm": 2.774151949615425, "learning_rate": 6.751276004623563e-06, "loss": 1.4841, "step": 28915 }, { "epoch": 0.8527200353825741, "grad_norm": 2.8432796750204345, "learning_rate": 6.750890286371418e-06, "loss": 1.4923, "step": 28920 }, { "epoch": 0.8528674627745835, "grad_norm": 2.741955572762305, "learning_rate": 6.750504479814651e-06, "loss": 1.5014, "step": 28925 }, { "epoch": 0.8530148901665929, "grad_norm": 2.797857749023493, "learning_rate": 6.750118584964612e-06, "loss": 1.5554, "step": 28930 }, { "epoch": 0.8531623175586024, "grad_norm": 2.7159530255062125, "learning_rate": 6.749732601832658e-06, "loss": 1.4616, "step": 28935 }, { "epoch": 0.8533097449506118, "grad_norm": 2.594440327043459, "learning_rate": 6.749346530430147e-06, "loss": 1.4408, "step": 28940 }, { "epoch": 0.8534571723426213, "grad_norm": 2.702288602845998, "learning_rate": 6.74896037076844e-06, "loss": 1.5214, "step": 28945 }, { "epoch": 0.8536045997346307, "grad_norm": 2.661626215229027, "learning_rate": 6.748574122858899e-06, "loss": 1.4879, "step": 28950 }, { "epoch": 0.8537520271266401, "grad_norm": 2.7696076785603685, "learning_rate": 6.748187786712891e-06, "loss": 1.4789, "step": 28955 }, { "epoch": 0.8538994545186496, "grad_norm": 2.8015809424222518, "learning_rate": 6.7478013623417825e-06, "loss": 1.4738, "step": 28960 }, { "epoch": 0.854046881910659, "grad_norm": 2.7112137753287655, "learning_rate": 6.7474148497569476e-06, "loss": 1.4814, "step": 28965 }, { "epoch": 0.8541943093026685, "grad_norm": 2.8462707165835663, "learning_rate": 6.747028248969758e-06, "loss": 1.5165, "step": 28970 }, { "epoch": 0.8543417366946778, "grad_norm": 2.659661639722234, "learning_rate": 6.746641559991589e-06, "loss": 1.4684, "step": 28975 }, { "epoch": 0.8544891640866873, "grad_norm": 2.7319590419457604, "learning_rate": 6.746254782833821e-06, "loss": 1.4488, "step": 28980 }, { "epoch": 0.8546365914786967, "grad_norm": 2.784109298757818, "learning_rate": 6.745867917507833e-06, "loss": 1.4781, "step": 28985 }, { "epoch": 0.8547840188707062, "grad_norm": 2.7991343810529186, "learning_rate": 6.745480964025012e-06, "loss": 1.5211, "step": 28990 }, { "epoch": 0.8549314462627157, "grad_norm": 2.969322803140235, "learning_rate": 6.745093922396742e-06, "loss": 1.4948, "step": 28995 }, { "epoch": 0.855078873654725, "grad_norm": 2.947251603008531, "learning_rate": 6.744706792634415e-06, "loss": 1.5116, "step": 29000 }, { "epoch": 0.855078873654725, "eval_loss": 1.1471129655838013, "eval_runtime": 4.2622, "eval_samples_per_second": 92.909, "eval_steps_per_second": 3.05, "step": 29000 }, { "epoch": 0.8552263010467345, "grad_norm": 2.7806808044299878, "learning_rate": 6.74431957474942e-06, "loss": 1.5118, "step": 29005 }, { "epoch": 0.8553737284387439, "grad_norm": 2.7725307931382517, "learning_rate": 6.7439322687531526e-06, "loss": 1.4679, "step": 29010 }, { "epoch": 0.8555211558307534, "grad_norm": 2.782798705504379, "learning_rate": 6.743544874657009e-06, "loss": 1.4745, "step": 29015 }, { "epoch": 0.8556685832227627, "grad_norm": 2.41197243000018, "learning_rate": 6.743157392472391e-06, "loss": 1.4609, "step": 29020 }, { "epoch": 0.8558160106147722, "grad_norm": 2.880322927770116, "learning_rate": 6.742769822210696e-06, "loss": 1.5043, "step": 29025 }, { "epoch": 0.8559634380067817, "grad_norm": 2.8949164827042626, "learning_rate": 6.742382163883334e-06, "loss": 1.4989, "step": 29030 }, { "epoch": 0.8561108653987911, "grad_norm": 2.7649380153934295, "learning_rate": 6.741994417501709e-06, "loss": 1.5269, "step": 29035 }, { "epoch": 0.8562582927908006, "grad_norm": 2.857192478098807, "learning_rate": 6.741606583077232e-06, "loss": 1.4648, "step": 29040 }, { "epoch": 0.8564057201828099, "grad_norm": 2.671366109301639, "learning_rate": 6.741218660621315e-06, "loss": 1.4724, "step": 29045 }, { "epoch": 0.8565531475748194, "grad_norm": 2.845151777800988, "learning_rate": 6.740830650145374e-06, "loss": 1.4388, "step": 29050 }, { "epoch": 0.8567005749668288, "grad_norm": 2.81222908026333, "learning_rate": 6.740442551660825e-06, "loss": 1.5283, "step": 29055 }, { "epoch": 0.8568480023588383, "grad_norm": 2.7866317470078155, "learning_rate": 6.7400543651790895e-06, "loss": 1.4991, "step": 29060 }, { "epoch": 0.8569954297508477, "grad_norm": 2.6892740456885513, "learning_rate": 6.7396660907115895e-06, "loss": 1.486, "step": 29065 }, { "epoch": 0.8571428571428571, "grad_norm": 2.8102893534508895, "learning_rate": 6.739277728269751e-06, "loss": 1.4846, "step": 29070 }, { "epoch": 0.8572902845348666, "grad_norm": 2.6595813171738794, "learning_rate": 6.7388892778650035e-06, "loss": 1.4833, "step": 29075 }, { "epoch": 0.857437711926876, "grad_norm": 2.6142509440001698, "learning_rate": 6.738500739508774e-06, "loss": 1.4456, "step": 29080 }, { "epoch": 0.8575851393188855, "grad_norm": 2.8069894152025063, "learning_rate": 6.7381121132124995e-06, "loss": 1.4713, "step": 29085 }, { "epoch": 0.8577325667108949, "grad_norm": 2.7433368884687046, "learning_rate": 6.737723398987613e-06, "loss": 1.5432, "step": 29090 }, { "epoch": 0.8578799941029043, "grad_norm": 2.7323681672047795, "learning_rate": 6.737334596845553e-06, "loss": 1.4998, "step": 29095 }, { "epoch": 0.8580274214949137, "grad_norm": 2.9213962960411144, "learning_rate": 6.736945706797764e-06, "loss": 1.4261, "step": 29100 }, { "epoch": 0.8581748488869232, "grad_norm": 2.652934647879043, "learning_rate": 6.736556728855684e-06, "loss": 1.5194, "step": 29105 }, { "epoch": 0.8583222762789326, "grad_norm": 2.7019988494910274, "learning_rate": 6.7361676630307635e-06, "loss": 1.4649, "step": 29110 }, { "epoch": 0.8584697036709421, "grad_norm": 2.7888843726514634, "learning_rate": 6.735778509334449e-06, "loss": 1.4878, "step": 29115 }, { "epoch": 0.8586171310629515, "grad_norm": 2.7358829862316605, "learning_rate": 6.735389267778193e-06, "loss": 1.4935, "step": 29120 }, { "epoch": 0.8587645584549609, "grad_norm": 2.8269480481956677, "learning_rate": 6.734999938373449e-06, "loss": 1.5169, "step": 29125 }, { "epoch": 0.8589119858469704, "grad_norm": 2.7865047899709814, "learning_rate": 6.734610521131672e-06, "loss": 1.4994, "step": 29130 }, { "epoch": 0.8590594132389798, "grad_norm": 2.7486704136630826, "learning_rate": 6.734221016064323e-06, "loss": 1.5091, "step": 29135 }, { "epoch": 0.8592068406309893, "grad_norm": 2.810358748184867, "learning_rate": 6.733831423182864e-06, "loss": 1.4743, "step": 29140 }, { "epoch": 0.8593542680229986, "grad_norm": 2.7091981646825767, "learning_rate": 6.733441742498757e-06, "loss": 1.4772, "step": 29145 }, { "epoch": 0.8595016954150081, "grad_norm": 2.7636162722346898, "learning_rate": 6.733051974023472e-06, "loss": 1.4484, "step": 29150 }, { "epoch": 0.8596491228070176, "grad_norm": 3.1568609083339356, "learning_rate": 6.732662117768474e-06, "loss": 1.472, "step": 29155 }, { "epoch": 0.859796550199027, "grad_norm": 2.8493438378329308, "learning_rate": 6.732272173745238e-06, "loss": 1.5128, "step": 29160 }, { "epoch": 0.8599439775910365, "grad_norm": 2.5778656742884776, "learning_rate": 6.731882141965238e-06, "loss": 1.3891, "step": 29165 }, { "epoch": 0.8600914049830458, "grad_norm": 2.927145189667633, "learning_rate": 6.7314920224399505e-06, "loss": 1.511, "step": 29170 }, { "epoch": 0.8602388323750553, "grad_norm": 2.8144313490218344, "learning_rate": 6.731101815180856e-06, "loss": 1.4681, "step": 29175 }, { "epoch": 0.8603862597670647, "grad_norm": 2.7464478308649474, "learning_rate": 6.730711520199437e-06, "loss": 1.4742, "step": 29180 }, { "epoch": 0.8605336871590742, "grad_norm": 2.6222405238819797, "learning_rate": 6.7303211375071784e-06, "loss": 1.4349, "step": 29185 }, { "epoch": 0.8606811145510835, "grad_norm": 2.8775375231677236, "learning_rate": 6.729930667115567e-06, "loss": 1.5555, "step": 29190 }, { "epoch": 0.860828541943093, "grad_norm": 2.754781256630513, "learning_rate": 6.7295401090360915e-06, "loss": 1.4879, "step": 29195 }, { "epoch": 0.8609759693351025, "grad_norm": 2.787892252946212, "learning_rate": 6.729149463280248e-06, "loss": 1.508, "step": 29200 }, { "epoch": 0.8611233967271119, "grad_norm": 2.8011496350651104, "learning_rate": 6.728758729859528e-06, "loss": 1.5304, "step": 29205 }, { "epoch": 0.8612708241191214, "grad_norm": 2.8246945734200963, "learning_rate": 6.728367908785432e-06, "loss": 1.5285, "step": 29210 }, { "epoch": 0.8614182515111307, "grad_norm": 2.712564521586047, "learning_rate": 6.727977000069459e-06, "loss": 1.4611, "step": 29215 }, { "epoch": 0.8615656789031402, "grad_norm": 2.7704414344394874, "learning_rate": 6.727586003723112e-06, "loss": 1.5234, "step": 29220 }, { "epoch": 0.8617131062951496, "grad_norm": 2.820458137384376, "learning_rate": 6.727194919757897e-06, "loss": 1.4744, "step": 29225 }, { "epoch": 0.8618605336871591, "grad_norm": 2.6530308734087322, "learning_rate": 6.726803748185322e-06, "loss": 1.4274, "step": 29230 }, { "epoch": 0.8620079610791685, "grad_norm": 2.6528480101110263, "learning_rate": 6.726412489016897e-06, "loss": 1.4281, "step": 29235 }, { "epoch": 0.8621553884711779, "grad_norm": 2.808499705815119, "learning_rate": 6.7260211422641365e-06, "loss": 1.4536, "step": 29240 }, { "epoch": 0.8623028158631874, "grad_norm": 2.7879443508552955, "learning_rate": 6.7256297079385565e-06, "loss": 1.4651, "step": 29245 }, { "epoch": 0.8624502432551968, "grad_norm": 2.7446342784585025, "learning_rate": 6.725238186051672e-06, "loss": 1.4854, "step": 29250 }, { "epoch": 0.8625976706472063, "grad_norm": 2.7262424373087186, "learning_rate": 6.724846576615009e-06, "loss": 1.4484, "step": 29255 }, { "epoch": 0.8627450980392157, "grad_norm": 2.833480670782014, "learning_rate": 6.724454879640088e-06, "loss": 1.5272, "step": 29260 }, { "epoch": 0.8628925254312251, "grad_norm": 2.82467198234935, "learning_rate": 6.724063095138435e-06, "loss": 1.5058, "step": 29265 }, { "epoch": 0.8630399528232345, "grad_norm": 2.5929726682963903, "learning_rate": 6.7236712231215806e-06, "loss": 1.5216, "step": 29270 }, { "epoch": 0.863187380215244, "grad_norm": 2.8315839814198647, "learning_rate": 6.723279263601053e-06, "loss": 1.4036, "step": 29275 }, { "epoch": 0.8633348076072535, "grad_norm": 3.13935338077624, "learning_rate": 6.7228872165883896e-06, "loss": 1.4906, "step": 29280 }, { "epoch": 0.8634822349992629, "grad_norm": 2.703867766732534, "learning_rate": 6.7224950820951245e-06, "loss": 1.496, "step": 29285 }, { "epoch": 0.8636296623912723, "grad_norm": 2.8419427456902815, "learning_rate": 6.722102860132796e-06, "loss": 1.4882, "step": 29290 }, { "epoch": 0.8637770897832817, "grad_norm": 2.7704793648013646, "learning_rate": 6.721710550712949e-06, "loss": 1.4531, "step": 29295 }, { "epoch": 0.8639245171752912, "grad_norm": 2.6784532076366, "learning_rate": 6.721318153847125e-06, "loss": 1.4733, "step": 29300 }, { "epoch": 0.8640719445673006, "grad_norm": 2.7186247334373896, "learning_rate": 6.720925669546872e-06, "loss": 1.4801, "step": 29305 }, { "epoch": 0.8642193719593101, "grad_norm": 2.7775530125500745, "learning_rate": 6.720533097823738e-06, "loss": 1.4549, "step": 29310 }, { "epoch": 0.8643667993513194, "grad_norm": 2.679763103122838, "learning_rate": 6.720140438689275e-06, "loss": 1.4808, "step": 29315 }, { "epoch": 0.8645142267433289, "grad_norm": 2.6700854101541025, "learning_rate": 6.719747692155039e-06, "loss": 1.4645, "step": 29320 }, { "epoch": 0.8646616541353384, "grad_norm": 2.8330486705996063, "learning_rate": 6.719354858232587e-06, "loss": 1.5106, "step": 29325 }, { "epoch": 0.8648090815273478, "grad_norm": 2.714807866476365, "learning_rate": 6.7189619369334756e-06, "loss": 1.4443, "step": 29330 }, { "epoch": 0.8649565089193573, "grad_norm": 2.708894019778259, "learning_rate": 6.718568928269271e-06, "loss": 1.5275, "step": 29335 }, { "epoch": 0.8651039363113666, "grad_norm": 3.6502224656370332, "learning_rate": 6.718175832251535e-06, "loss": 1.4838, "step": 29340 }, { "epoch": 0.8652513637033761, "grad_norm": 2.8587749246338054, "learning_rate": 6.717782648891837e-06, "loss": 1.5296, "step": 29345 }, { "epoch": 0.8653987910953855, "grad_norm": 2.83962950936982, "learning_rate": 6.717389378201743e-06, "loss": 1.5012, "step": 29350 }, { "epoch": 0.865546218487395, "grad_norm": 2.7677562140684686, "learning_rate": 6.716996020192829e-06, "loss": 1.4738, "step": 29355 }, { "epoch": 0.8656936458794043, "grad_norm": 3.2457170365057495, "learning_rate": 6.716602574876671e-06, "loss": 1.435, "step": 29360 }, { "epoch": 0.8658410732714138, "grad_norm": 2.9809402866539134, "learning_rate": 6.716209042264843e-06, "loss": 1.4709, "step": 29365 }, { "epoch": 0.8659885006634233, "grad_norm": 2.677266308468535, "learning_rate": 6.715815422368928e-06, "loss": 1.4795, "step": 29370 }, { "epoch": 0.8661359280554327, "grad_norm": 2.774793113358527, "learning_rate": 6.715421715200507e-06, "loss": 1.4703, "step": 29375 }, { "epoch": 0.8662833554474422, "grad_norm": 2.8894539280578426, "learning_rate": 6.715027920771165e-06, "loss": 1.5076, "step": 29380 }, { "epoch": 0.8664307828394515, "grad_norm": 2.703257789025015, "learning_rate": 6.714634039092492e-06, "loss": 1.4501, "step": 29385 }, { "epoch": 0.866578210231461, "grad_norm": 2.7841424711068314, "learning_rate": 6.714240070176077e-06, "loss": 1.5221, "step": 29390 }, { "epoch": 0.8667256376234704, "grad_norm": 2.736125656823057, "learning_rate": 6.713846014033513e-06, "loss": 1.4897, "step": 29395 }, { "epoch": 0.8668730650154799, "grad_norm": 2.8493697419587773, "learning_rate": 6.713451870676395e-06, "loss": 1.4258, "step": 29400 }, { "epoch": 0.8670204924074894, "grad_norm": 2.8370239694909647, "learning_rate": 6.713057640116324e-06, "loss": 1.4994, "step": 29405 }, { "epoch": 0.8671679197994987, "grad_norm": 2.790074057017617, "learning_rate": 6.712663322364896e-06, "loss": 1.4558, "step": 29410 }, { "epoch": 0.8673153471915082, "grad_norm": 2.8478958136808323, "learning_rate": 6.712268917433717e-06, "loss": 1.4867, "step": 29415 }, { "epoch": 0.8674627745835176, "grad_norm": 2.7856776229350584, "learning_rate": 6.7118744253343924e-06, "loss": 1.4833, "step": 29420 }, { "epoch": 0.8676102019755271, "grad_norm": 2.7771689791791245, "learning_rate": 6.711479846078532e-06, "loss": 1.4716, "step": 29425 }, { "epoch": 0.8677576293675365, "grad_norm": 2.6515910205935307, "learning_rate": 6.711085179677745e-06, "loss": 1.4973, "step": 29430 }, { "epoch": 0.8679050567595459, "grad_norm": 2.720981787213404, "learning_rate": 6.7106904261436455e-06, "loss": 1.506, "step": 29435 }, { "epoch": 0.8680524841515553, "grad_norm": 2.6544782719680704, "learning_rate": 6.71029558548785e-06, "loss": 1.4922, "step": 29440 }, { "epoch": 0.8681999115435648, "grad_norm": 2.7068324043126406, "learning_rate": 6.709900657721975e-06, "loss": 1.4574, "step": 29445 }, { "epoch": 0.8683473389355743, "grad_norm": 2.710586132751457, "learning_rate": 6.709505642857645e-06, "loss": 1.4931, "step": 29450 }, { "epoch": 0.8684947663275837, "grad_norm": 2.7292141388362645, "learning_rate": 6.709110540906481e-06, "loss": 1.3861, "step": 29455 }, { "epoch": 0.8686421937195931, "grad_norm": 2.862113679944328, "learning_rate": 6.7087153518801106e-06, "loss": 1.4805, "step": 29460 }, { "epoch": 0.8687896211116025, "grad_norm": 2.621863839269502, "learning_rate": 6.708320075790163e-06, "loss": 1.4605, "step": 29465 }, { "epoch": 0.868937048503612, "grad_norm": 2.672917084074337, "learning_rate": 6.707924712648268e-06, "loss": 1.4269, "step": 29470 }, { "epoch": 0.8690844758956214, "grad_norm": 2.73946924377328, "learning_rate": 6.7075292624660615e-06, "loss": 1.4668, "step": 29475 }, { "epoch": 0.8692319032876309, "grad_norm": 5.596900218121523, "learning_rate": 6.707133725255179e-06, "loss": 1.4691, "step": 29480 }, { "epoch": 0.8693793306796402, "grad_norm": 2.738698634792988, "learning_rate": 6.70673810102726e-06, "loss": 1.4726, "step": 29485 }, { "epoch": 0.8695267580716497, "grad_norm": 2.744215785263468, "learning_rate": 6.706342389793946e-06, "loss": 1.4947, "step": 29490 }, { "epoch": 0.8696741854636592, "grad_norm": 2.711588287954726, "learning_rate": 6.705946591566882e-06, "loss": 1.4904, "step": 29495 }, { "epoch": 0.8698216128556686, "grad_norm": 2.7505004581330943, "learning_rate": 6.705550706357713e-06, "loss": 1.4338, "step": 29500 }, { "epoch": 0.8698216128556686, "eval_loss": 1.1452524662017822, "eval_runtime": 4.1549, "eval_samples_per_second": 95.31, "eval_steps_per_second": 3.129, "step": 29500 }, { "epoch": 0.8699690402476781, "grad_norm": 2.677223816436564, "learning_rate": 6.705154734178091e-06, "loss": 1.4551, "step": 29505 }, { "epoch": 0.8701164676396874, "grad_norm": 2.686718018773229, "learning_rate": 6.704758675039666e-06, "loss": 1.4133, "step": 29510 }, { "epoch": 0.8702638950316969, "grad_norm": 2.7364881889283326, "learning_rate": 6.704362528954093e-06, "loss": 1.4907, "step": 29515 }, { "epoch": 0.8704113224237063, "grad_norm": 3.0540382625435782, "learning_rate": 6.703966295933029e-06, "loss": 1.4967, "step": 29520 }, { "epoch": 0.8705587498157158, "grad_norm": 2.7064190306679974, "learning_rate": 6.703569975988134e-06, "loss": 1.4783, "step": 29525 }, { "epoch": 0.8707061772077251, "grad_norm": 2.716032232397542, "learning_rate": 6.703173569131069e-06, "loss": 1.5086, "step": 29530 }, { "epoch": 0.8708536045997346, "grad_norm": 2.703418823727683, "learning_rate": 6.7027770753734996e-06, "loss": 1.5066, "step": 29535 }, { "epoch": 0.8710010319917441, "grad_norm": 2.7333582356907145, "learning_rate": 6.702380494727094e-06, "loss": 1.4385, "step": 29540 }, { "epoch": 0.8711484593837535, "grad_norm": 2.8031046914271602, "learning_rate": 6.70198382720352e-06, "loss": 1.4353, "step": 29545 }, { "epoch": 0.871295886775763, "grad_norm": 3.079524251226106, "learning_rate": 6.701587072814452e-06, "loss": 1.4358, "step": 29550 }, { "epoch": 0.8714433141677723, "grad_norm": 2.7755590024746963, "learning_rate": 6.7011902315715636e-06, "loss": 1.5094, "step": 29555 }, { "epoch": 0.8715907415597818, "grad_norm": 2.8581716391529164, "learning_rate": 6.700793303486533e-06, "loss": 1.4965, "step": 29560 }, { "epoch": 0.8717381689517912, "grad_norm": 2.8451872025351888, "learning_rate": 6.70039628857104e-06, "loss": 1.4842, "step": 29565 }, { "epoch": 0.8718855963438007, "grad_norm": 2.6734289619129497, "learning_rate": 6.6999991868367665e-06, "loss": 1.4338, "step": 29570 }, { "epoch": 0.8720330237358102, "grad_norm": 2.6845460766020217, "learning_rate": 6.699601998295399e-06, "loss": 1.468, "step": 29575 }, { "epoch": 0.8721804511278195, "grad_norm": 2.7360635525744614, "learning_rate": 6.699204722958624e-06, "loss": 1.5187, "step": 29580 }, { "epoch": 0.872327878519829, "grad_norm": 2.665128523382891, "learning_rate": 6.698807360838133e-06, "loss": 1.4876, "step": 29585 }, { "epoch": 0.8724753059118384, "grad_norm": 2.6633127443977465, "learning_rate": 6.6984099119456184e-06, "loss": 1.4926, "step": 29590 }, { "epoch": 0.8726227333038479, "grad_norm": 2.821566746989529, "learning_rate": 6.698012376292776e-06, "loss": 1.511, "step": 29595 }, { "epoch": 0.8727701606958573, "grad_norm": 2.664637447390906, "learning_rate": 6.697614753891301e-06, "loss": 1.4672, "step": 29600 }, { "epoch": 0.8729175880878667, "grad_norm": 2.764008926858907, "learning_rate": 6.6972170447528975e-06, "loss": 1.4641, "step": 29605 }, { "epoch": 0.8730650154798761, "grad_norm": 2.8173878312866893, "learning_rate": 6.696819248889266e-06, "loss": 1.5092, "step": 29610 }, { "epoch": 0.8732124428718856, "grad_norm": 2.7128389330730727, "learning_rate": 6.696421366312115e-06, "loss": 1.4615, "step": 29615 }, { "epoch": 0.873359870263895, "grad_norm": 2.7954643896684583, "learning_rate": 6.6960233970331495e-06, "loss": 1.5262, "step": 29620 }, { "epoch": 0.8735072976559045, "grad_norm": 2.52030585439685, "learning_rate": 6.6956253410640825e-06, "loss": 1.465, "step": 29625 }, { "epoch": 0.8736547250479139, "grad_norm": 2.718285382347284, "learning_rate": 6.6952271984166255e-06, "loss": 1.4781, "step": 29630 }, { "epoch": 0.8738021524399233, "grad_norm": 2.757698721314055, "learning_rate": 6.694828969102496e-06, "loss": 1.4243, "step": 29635 }, { "epoch": 0.8739495798319328, "grad_norm": 2.819834248293249, "learning_rate": 6.69443065313341e-06, "loss": 1.48, "step": 29640 }, { "epoch": 0.8740970072239422, "grad_norm": 2.7440132671831496, "learning_rate": 6.694032250521091e-06, "loss": 1.4357, "step": 29645 }, { "epoch": 0.8742444346159517, "grad_norm": 2.681936839682589, "learning_rate": 6.69363376127726e-06, "loss": 1.4787, "step": 29650 }, { "epoch": 0.874391862007961, "grad_norm": 2.994287799490445, "learning_rate": 6.693235185413647e-06, "loss": 1.4893, "step": 29655 }, { "epoch": 0.8745392893999705, "grad_norm": 2.7600557374446995, "learning_rate": 6.692836522941975e-06, "loss": 1.4576, "step": 29660 }, { "epoch": 0.87468671679198, "grad_norm": 2.8261038992249494, "learning_rate": 6.692437773873979e-06, "loss": 1.4618, "step": 29665 }, { "epoch": 0.8748341441839894, "grad_norm": 2.7616644780909807, "learning_rate": 6.692038938221392e-06, "loss": 1.4642, "step": 29670 }, { "epoch": 0.8749815715759989, "grad_norm": 2.8059051668976265, "learning_rate": 6.6916400159959486e-06, "loss": 1.5106, "step": 29675 }, { "epoch": 0.8751289989680082, "grad_norm": 2.652011518454555, "learning_rate": 6.69124100720939e-06, "loss": 1.4717, "step": 29680 }, { "epoch": 0.8752764263600177, "grad_norm": 2.787892903239362, "learning_rate": 6.690841911873456e-06, "loss": 1.5356, "step": 29685 }, { "epoch": 0.8754238537520271, "grad_norm": 2.845700504983941, "learning_rate": 6.690442729999891e-06, "loss": 1.5187, "step": 29690 }, { "epoch": 0.8755712811440366, "grad_norm": 2.7064594865137273, "learning_rate": 6.69004346160044e-06, "loss": 1.4516, "step": 29695 }, { "epoch": 0.8757187085360459, "grad_norm": 2.6709758881071886, "learning_rate": 6.689644106686853e-06, "loss": 1.4411, "step": 29700 }, { "epoch": 0.8758661359280554, "grad_norm": 2.5759970673060995, "learning_rate": 6.689244665270881e-06, "loss": 1.4217, "step": 29705 }, { "epoch": 0.8760135633200649, "grad_norm": 2.800313316347636, "learning_rate": 6.688845137364279e-06, "loss": 1.4914, "step": 29710 }, { "epoch": 0.8761609907120743, "grad_norm": 2.6489969671172884, "learning_rate": 6.688445522978804e-06, "loss": 1.5054, "step": 29715 }, { "epoch": 0.8763084181040838, "grad_norm": 2.7693683003192415, "learning_rate": 6.688045822126213e-06, "loss": 1.4695, "step": 29720 }, { "epoch": 0.8764558454960931, "grad_norm": 2.637719587442039, "learning_rate": 6.6876460348182695e-06, "loss": 1.5077, "step": 29725 }, { "epoch": 0.8766032728881026, "grad_norm": 2.636773762348118, "learning_rate": 6.687246161066737e-06, "loss": 1.4781, "step": 29730 }, { "epoch": 0.876750700280112, "grad_norm": 2.7112891452716394, "learning_rate": 6.6868462008833825e-06, "loss": 1.5099, "step": 29735 }, { "epoch": 0.8768981276721215, "grad_norm": 2.774430691507205, "learning_rate": 6.686446154279974e-06, "loss": 1.4839, "step": 29740 }, { "epoch": 0.877045555064131, "grad_norm": 2.632808917387845, "learning_rate": 6.686046021268284e-06, "loss": 1.4342, "step": 29745 }, { "epoch": 0.8771929824561403, "grad_norm": 2.874309972636245, "learning_rate": 6.685645801860089e-06, "loss": 1.5297, "step": 29750 }, { "epoch": 0.8773404098481498, "grad_norm": 2.793162943939214, "learning_rate": 6.685245496067164e-06, "loss": 1.4877, "step": 29755 }, { "epoch": 0.8774878372401592, "grad_norm": 2.8471531782893402, "learning_rate": 6.684845103901287e-06, "loss": 1.4759, "step": 29760 }, { "epoch": 0.8776352646321687, "grad_norm": 2.8165883716667173, "learning_rate": 6.684444625374242e-06, "loss": 1.4645, "step": 29765 }, { "epoch": 0.8777826920241781, "grad_norm": 2.6980863462696405, "learning_rate": 6.684044060497813e-06, "loss": 1.462, "step": 29770 }, { "epoch": 0.8779301194161875, "grad_norm": 2.7903498691760573, "learning_rate": 6.6836434092837875e-06, "loss": 1.4445, "step": 29775 }, { "epoch": 0.8780775468081969, "grad_norm": 2.7515184804869195, "learning_rate": 6.683242671743955e-06, "loss": 1.4559, "step": 29780 }, { "epoch": 0.8782249742002064, "grad_norm": 2.660570619186508, "learning_rate": 6.682841847890107e-06, "loss": 1.436, "step": 29785 }, { "epoch": 0.8783724015922159, "grad_norm": 2.6110804371234475, "learning_rate": 6.682440937734039e-06, "loss": 1.421, "step": 29790 }, { "epoch": 0.8785198289842253, "grad_norm": 3.427889508347559, "learning_rate": 6.682039941287549e-06, "loss": 1.5016, "step": 29795 }, { "epoch": 0.8786672563762347, "grad_norm": 2.7945781731583277, "learning_rate": 6.681638858562434e-06, "loss": 1.5018, "step": 29800 }, { "epoch": 0.8788146837682441, "grad_norm": 2.7368834400534974, "learning_rate": 6.681237689570498e-06, "loss": 1.4842, "step": 29805 }, { "epoch": 0.8789621111602536, "grad_norm": 2.8281527533928874, "learning_rate": 6.680836434323547e-06, "loss": 1.4693, "step": 29810 }, { "epoch": 0.879109538552263, "grad_norm": 2.768714645535095, "learning_rate": 6.6804350928333875e-06, "loss": 1.5168, "step": 29815 }, { "epoch": 0.8792569659442725, "grad_norm": 2.7093828598899603, "learning_rate": 6.680033665111828e-06, "loss": 1.4695, "step": 29820 }, { "epoch": 0.8794043933362818, "grad_norm": 2.8229237578028563, "learning_rate": 6.679632151170682e-06, "loss": 1.4478, "step": 29825 }, { "epoch": 0.8795518207282913, "grad_norm": 2.755817244345813, "learning_rate": 6.679230551021766e-06, "loss": 1.4816, "step": 29830 }, { "epoch": 0.8796992481203008, "grad_norm": 2.8638487944361635, "learning_rate": 6.6788288646768945e-06, "loss": 1.4903, "step": 29835 }, { "epoch": 0.8798466755123102, "grad_norm": 2.759938284524547, "learning_rate": 6.678427092147891e-06, "loss": 1.5341, "step": 29840 }, { "epoch": 0.8799941029043197, "grad_norm": 2.793785469300115, "learning_rate": 6.678025233446576e-06, "loss": 1.4082, "step": 29845 }, { "epoch": 0.880141530296329, "grad_norm": 2.8301748542605227, "learning_rate": 6.677623288584774e-06, "loss": 1.4946, "step": 29850 }, { "epoch": 0.8802889576883385, "grad_norm": 2.642166712535684, "learning_rate": 6.677221257574314e-06, "loss": 1.4094, "step": 29855 }, { "epoch": 0.8804363850803479, "grad_norm": 2.789068439760013, "learning_rate": 6.676819140427027e-06, "loss": 1.5443, "step": 29860 }, { "epoch": 0.8805838124723574, "grad_norm": 2.754180173082548, "learning_rate": 6.676416937154744e-06, "loss": 1.4779, "step": 29865 }, { "epoch": 0.8807312398643667, "grad_norm": 2.7010456890373296, "learning_rate": 6.676014647769302e-06, "loss": 1.4194, "step": 29870 }, { "epoch": 0.8808786672563762, "grad_norm": 2.640203093855955, "learning_rate": 6.675612272282536e-06, "loss": 1.4537, "step": 29875 }, { "epoch": 0.8810260946483857, "grad_norm": 2.8007610183415395, "learning_rate": 6.67520981070629e-06, "loss": 1.4893, "step": 29880 }, { "epoch": 0.8811735220403951, "grad_norm": 2.5845451925673677, "learning_rate": 6.674807263052404e-06, "loss": 1.4312, "step": 29885 }, { "epoch": 0.8813209494324046, "grad_norm": 2.776799051026219, "learning_rate": 6.6744046293327244e-06, "loss": 1.4778, "step": 29890 }, { "epoch": 0.8814683768244139, "grad_norm": 2.665919055074278, "learning_rate": 6.674001909559099e-06, "loss": 1.4591, "step": 29895 }, { "epoch": 0.8816158042164234, "grad_norm": 2.765060893908511, "learning_rate": 6.67359910374338e-06, "loss": 1.4884, "step": 29900 }, { "epoch": 0.8817632316084328, "grad_norm": 2.6092740783004214, "learning_rate": 6.673196211897418e-06, "loss": 1.4764, "step": 29905 }, { "epoch": 0.8819106590004423, "grad_norm": 2.7151819014030845, "learning_rate": 6.6727932340330695e-06, "loss": 1.448, "step": 29910 }, { "epoch": 0.8820580863924518, "grad_norm": 2.6888677225854942, "learning_rate": 6.672390170162193e-06, "loss": 1.4707, "step": 29915 }, { "epoch": 0.8822055137844611, "grad_norm": 2.760051826989727, "learning_rate": 6.6719870202966485e-06, "loss": 1.5278, "step": 29920 }, { "epoch": 0.8823529411764706, "grad_norm": 2.785967050950732, "learning_rate": 6.671583784448299e-06, "loss": 1.4979, "step": 29925 }, { "epoch": 0.88250036856848, "grad_norm": 2.8819989313253247, "learning_rate": 6.671180462629011e-06, "loss": 1.5301, "step": 29930 }, { "epoch": 0.8826477959604895, "grad_norm": 2.7184200194009263, "learning_rate": 6.670777054850652e-06, "loss": 1.4836, "step": 29935 }, { "epoch": 0.8827952233524989, "grad_norm": 2.741429286098279, "learning_rate": 6.670373561125093e-06, "loss": 1.5028, "step": 29940 }, { "epoch": 0.8829426507445083, "grad_norm": 2.9647340275282366, "learning_rate": 6.669969981464208e-06, "loss": 1.4654, "step": 29945 }, { "epoch": 0.8830900781365177, "grad_norm": 2.7535210281227416, "learning_rate": 6.669566315879871e-06, "loss": 1.4903, "step": 29950 }, { "epoch": 0.8832375055285272, "grad_norm": 2.907406872516276, "learning_rate": 6.669162564383962e-06, "loss": 1.5086, "step": 29955 }, { "epoch": 0.8833849329205367, "grad_norm": 2.5401942434906526, "learning_rate": 6.66875872698836e-06, "loss": 1.4748, "step": 29960 }, { "epoch": 0.8835323603125461, "grad_norm": 2.6940645289394816, "learning_rate": 6.668354803704952e-06, "loss": 1.4763, "step": 29965 }, { "epoch": 0.8836797877045555, "grad_norm": 2.8280152432777745, "learning_rate": 6.667950794545619e-06, "loss": 1.4413, "step": 29970 }, { "epoch": 0.8838272150965649, "grad_norm": 2.8487113548507588, "learning_rate": 6.667546699522253e-06, "loss": 1.4167, "step": 29975 }, { "epoch": 0.8839746424885744, "grad_norm": 2.684996367284031, "learning_rate": 6.667142518646745e-06, "loss": 1.5095, "step": 29980 }, { "epoch": 0.8841220698805838, "grad_norm": 2.851623006877377, "learning_rate": 6.666738251930986e-06, "loss": 1.4443, "step": 29985 }, { "epoch": 0.8842694972725933, "grad_norm": 2.760948620487543, "learning_rate": 6.666333899386873e-06, "loss": 1.5169, "step": 29990 }, { "epoch": 0.8844169246646026, "grad_norm": 2.7243593261684733, "learning_rate": 6.6659294610263055e-06, "loss": 1.4801, "step": 29995 }, { "epoch": 0.8845643520566121, "grad_norm": 2.6679133515203493, "learning_rate": 6.665524936861184e-06, "loss": 1.4656, "step": 30000 }, { "epoch": 0.8845643520566121, "eval_loss": 1.1446033716201782, "eval_runtime": 4.2547, "eval_samples_per_second": 93.073, "eval_steps_per_second": 3.055, "step": 30000 }, { "epoch": 0.8847117794486216, "grad_norm": 2.7047784377905764, "learning_rate": 6.665120326903412e-06, "loss": 1.4717, "step": 30005 }, { "epoch": 0.884859206840631, "grad_norm": 2.612138906738102, "learning_rate": 6.664715631164896e-06, "loss": 1.5322, "step": 30010 }, { "epoch": 0.8850066342326405, "grad_norm": 2.705051761585367, "learning_rate": 6.664310849657543e-06, "loss": 1.4731, "step": 30015 }, { "epoch": 0.8851540616246498, "grad_norm": 2.677687710090679, "learning_rate": 6.663905982393268e-06, "loss": 1.47, "step": 30020 }, { "epoch": 0.8853014890166593, "grad_norm": 2.766358834746887, "learning_rate": 6.6635010293839796e-06, "loss": 1.4339, "step": 30025 }, { "epoch": 0.8854489164086687, "grad_norm": 2.8956653124939113, "learning_rate": 6.663095990641598e-06, "loss": 1.5061, "step": 30030 }, { "epoch": 0.8855963438006782, "grad_norm": 2.8710735282623245, "learning_rate": 6.66269086617804e-06, "loss": 1.4948, "step": 30035 }, { "epoch": 0.8857437711926877, "grad_norm": 2.781680574373452, "learning_rate": 6.662285656005226e-06, "loss": 1.5259, "step": 30040 }, { "epoch": 0.885891198584697, "grad_norm": 2.764311353767144, "learning_rate": 6.6618803601350836e-06, "loss": 1.5188, "step": 30045 }, { "epoch": 0.8860386259767065, "grad_norm": 2.770411436786608, "learning_rate": 6.661474978579535e-06, "loss": 1.44, "step": 30050 }, { "epoch": 0.8861860533687159, "grad_norm": 2.6435293478100395, "learning_rate": 6.661069511350511e-06, "loss": 1.4416, "step": 30055 }, { "epoch": 0.8863334807607254, "grad_norm": 2.732659585996625, "learning_rate": 6.660663958459943e-06, "loss": 1.5158, "step": 30060 }, { "epoch": 0.8864809081527347, "grad_norm": 2.7859017057053475, "learning_rate": 6.660258319919763e-06, "loss": 1.5117, "step": 30065 }, { "epoch": 0.8866283355447442, "grad_norm": 2.9351634891304594, "learning_rate": 6.65985259574191e-06, "loss": 1.509, "step": 30070 }, { "epoch": 0.8867757629367536, "grad_norm": 2.8659433016617823, "learning_rate": 6.659446785938321e-06, "loss": 1.4705, "step": 30075 }, { "epoch": 0.8869231903287631, "grad_norm": 2.8009462371673353, "learning_rate": 6.659040890520939e-06, "loss": 1.5, "step": 30080 }, { "epoch": 0.8870706177207726, "grad_norm": 2.7161308656635614, "learning_rate": 6.6586349095017065e-06, "loss": 1.4336, "step": 30085 }, { "epoch": 0.8872180451127819, "grad_norm": 2.602352418116552, "learning_rate": 6.6582288428925706e-06, "loss": 1.4209, "step": 30090 }, { "epoch": 0.8873654725047914, "grad_norm": 2.763486119352239, "learning_rate": 6.657822690705479e-06, "loss": 1.4462, "step": 30095 }, { "epoch": 0.8875128998968008, "grad_norm": 2.7442773703713326, "learning_rate": 6.657416452952386e-06, "loss": 1.4969, "step": 30100 }, { "epoch": 0.8876603272888103, "grad_norm": 2.786528789634313, "learning_rate": 6.657010129645242e-06, "loss": 1.4738, "step": 30105 }, { "epoch": 0.8878077546808197, "grad_norm": 2.836618457728195, "learning_rate": 6.6566037207960076e-06, "loss": 1.5007, "step": 30110 }, { "epoch": 0.8879551820728291, "grad_norm": 2.7242760498613907, "learning_rate": 6.656197226416639e-06, "loss": 1.4687, "step": 30115 }, { "epoch": 0.8881026094648385, "grad_norm": 2.655425779235449, "learning_rate": 6.655790646519099e-06, "loss": 1.4545, "step": 30120 }, { "epoch": 0.888250036856848, "grad_norm": 2.8153723773882167, "learning_rate": 6.655383981115351e-06, "loss": 1.4047, "step": 30125 }, { "epoch": 0.8883974642488575, "grad_norm": 2.805492171202333, "learning_rate": 6.654977230217361e-06, "loss": 1.3755, "step": 30130 }, { "epoch": 0.8885448916408669, "grad_norm": 2.644989760326553, "learning_rate": 6.6545703938370975e-06, "loss": 1.4383, "step": 30135 }, { "epoch": 0.8886923190328763, "grad_norm": 2.76372544568023, "learning_rate": 6.654163471986536e-06, "loss": 1.4907, "step": 30140 }, { "epoch": 0.8888397464248857, "grad_norm": 2.7741936434030983, "learning_rate": 6.653756464677647e-06, "loss": 1.4623, "step": 30145 }, { "epoch": 0.8889871738168952, "grad_norm": 2.784518331812464, "learning_rate": 6.653349371922409e-06, "loss": 1.4672, "step": 30150 }, { "epoch": 0.8891346012089046, "grad_norm": 2.8433504184499525, "learning_rate": 6.6529421937328e-06, "loss": 1.4259, "step": 30155 }, { "epoch": 0.8892820286009141, "grad_norm": 2.7723320023096214, "learning_rate": 6.652534930120801e-06, "loss": 1.4621, "step": 30160 }, { "epoch": 0.8894294559929234, "grad_norm": 2.5842569207439636, "learning_rate": 6.652127581098399e-06, "loss": 1.4795, "step": 30165 }, { "epoch": 0.8895768833849329, "grad_norm": 2.6884603228098864, "learning_rate": 6.651720146677578e-06, "loss": 1.4938, "step": 30170 }, { "epoch": 0.8897243107769424, "grad_norm": 2.7289181084575937, "learning_rate": 6.651312626870329e-06, "loss": 1.4796, "step": 30175 }, { "epoch": 0.8898717381689518, "grad_norm": 2.8124628462262815, "learning_rate": 6.650905021688643e-06, "loss": 1.4847, "step": 30180 }, { "epoch": 0.8900191655609613, "grad_norm": 2.719726875072872, "learning_rate": 6.650497331144514e-06, "loss": 1.43, "step": 30185 }, { "epoch": 0.8901665929529706, "grad_norm": 2.780621066234574, "learning_rate": 6.6500895552499395e-06, "loss": 1.4564, "step": 30190 }, { "epoch": 0.8903140203449801, "grad_norm": 2.7295479208954583, "learning_rate": 6.649681694016918e-06, "loss": 1.4554, "step": 30195 }, { "epoch": 0.8904614477369895, "grad_norm": 2.7722709430907098, "learning_rate": 6.649273747457451e-06, "loss": 1.4951, "step": 30200 }, { "epoch": 0.890608875128999, "grad_norm": 2.700211269163617, "learning_rate": 6.648865715583543e-06, "loss": 1.417, "step": 30205 }, { "epoch": 0.8907563025210085, "grad_norm": 2.78251459001166, "learning_rate": 6.648457598407202e-06, "loss": 1.5148, "step": 30210 }, { "epoch": 0.8909037299130178, "grad_norm": 2.8038480645603925, "learning_rate": 6.648049395940437e-06, "loss": 1.4256, "step": 30215 }, { "epoch": 0.8910511573050273, "grad_norm": 2.7392099865956308, "learning_rate": 6.647641108195258e-06, "loss": 1.5073, "step": 30220 }, { "epoch": 0.8911985846970367, "grad_norm": 2.8720275648839735, "learning_rate": 6.647232735183682e-06, "loss": 1.4498, "step": 30225 }, { "epoch": 0.8913460120890462, "grad_norm": 2.6461231842071067, "learning_rate": 6.646824276917724e-06, "loss": 1.4875, "step": 30230 }, { "epoch": 0.8914934394810555, "grad_norm": 2.7411024611037136, "learning_rate": 6.646415733409403e-06, "loss": 1.4691, "step": 30235 }, { "epoch": 0.891640866873065, "grad_norm": 2.7882492538342833, "learning_rate": 6.6460071046707424e-06, "loss": 1.4478, "step": 30240 }, { "epoch": 0.8917882942650744, "grad_norm": 2.6035802159399486, "learning_rate": 6.645598390713766e-06, "loss": 1.4263, "step": 30245 }, { "epoch": 0.8919357216570839, "grad_norm": 2.7702931516039855, "learning_rate": 6.6451895915505e-06, "loss": 1.4223, "step": 30250 }, { "epoch": 0.8920831490490934, "grad_norm": 2.92596375878587, "learning_rate": 6.644780707192974e-06, "loss": 1.4894, "step": 30255 }, { "epoch": 0.8922305764411027, "grad_norm": 2.687530654645311, "learning_rate": 6.6443717376532205e-06, "loss": 1.5286, "step": 30260 }, { "epoch": 0.8923780038331122, "grad_norm": 5.430138457137091, "learning_rate": 6.643962682943275e-06, "loss": 1.5064, "step": 30265 }, { "epoch": 0.8925254312251216, "grad_norm": 2.7039712970362197, "learning_rate": 6.643553543075172e-06, "loss": 1.4542, "step": 30270 }, { "epoch": 0.8926728586171311, "grad_norm": 2.749278384006499, "learning_rate": 6.643144318060952e-06, "loss": 1.4911, "step": 30275 }, { "epoch": 0.8928202860091405, "grad_norm": 2.829338661744897, "learning_rate": 6.6427350079126555e-06, "loss": 1.5275, "step": 30280 }, { "epoch": 0.8929677134011499, "grad_norm": 2.788449047640064, "learning_rate": 6.64232561264233e-06, "loss": 1.4531, "step": 30285 }, { "epoch": 0.8931151407931593, "grad_norm": 2.948314189143954, "learning_rate": 6.64191613226202e-06, "loss": 1.473, "step": 30290 }, { "epoch": 0.8932625681851688, "grad_norm": 2.723172789025148, "learning_rate": 6.641506566783775e-06, "loss": 1.4778, "step": 30295 }, { "epoch": 0.8934099955771783, "grad_norm": 2.6750158428314648, "learning_rate": 6.6410969162196476e-06, "loss": 1.4733, "step": 30300 }, { "epoch": 0.8935574229691877, "grad_norm": 2.7216166727332327, "learning_rate": 6.640687180581693e-06, "loss": 1.4861, "step": 30305 }, { "epoch": 0.8937048503611971, "grad_norm": 2.858865915738878, "learning_rate": 6.6402773598819675e-06, "loss": 1.4793, "step": 30310 }, { "epoch": 0.8938522777532065, "grad_norm": 2.8821604058522037, "learning_rate": 6.63986745413253e-06, "loss": 1.5248, "step": 30315 }, { "epoch": 0.893999705145216, "grad_norm": 2.84226040636102, "learning_rate": 6.639457463345442e-06, "loss": 1.5, "step": 30320 }, { "epoch": 0.8941471325372254, "grad_norm": 2.873525342308824, "learning_rate": 6.63904738753277e-06, "loss": 1.5008, "step": 30325 }, { "epoch": 0.8942945599292349, "grad_norm": 2.844087251937715, "learning_rate": 6.638637226706578e-06, "loss": 1.5162, "step": 30330 }, { "epoch": 0.8944419873212442, "grad_norm": 2.7920889973256857, "learning_rate": 6.638226980878938e-06, "loss": 1.4841, "step": 30335 }, { "epoch": 0.8945894147132537, "grad_norm": 2.7029824779255454, "learning_rate": 6.637816650061922e-06, "loss": 1.4654, "step": 30340 }, { "epoch": 0.8947368421052632, "grad_norm": 2.708823469888502, "learning_rate": 6.637406234267602e-06, "loss": 1.4971, "step": 30345 }, { "epoch": 0.8948842694972726, "grad_norm": 2.806347609288753, "learning_rate": 6.636995733508057e-06, "loss": 1.4654, "step": 30350 }, { "epoch": 0.8950316968892821, "grad_norm": 2.9067326792044277, "learning_rate": 6.636585147795366e-06, "loss": 1.5137, "step": 30355 }, { "epoch": 0.8951791242812914, "grad_norm": 2.678089667028465, "learning_rate": 6.636174477141612e-06, "loss": 1.4571, "step": 30360 }, { "epoch": 0.8953265516733009, "grad_norm": 2.852586858068769, "learning_rate": 6.635763721558877e-06, "loss": 1.5175, "step": 30365 }, { "epoch": 0.8954739790653103, "grad_norm": 2.585957091151385, "learning_rate": 6.635352881059249e-06, "loss": 1.5045, "step": 30370 }, { "epoch": 0.8956214064573198, "grad_norm": 2.7227054138882756, "learning_rate": 6.634941955654819e-06, "loss": 1.472, "step": 30375 }, { "epoch": 0.8957688338493293, "grad_norm": 2.7849606104118316, "learning_rate": 6.634530945357677e-06, "loss": 1.4406, "step": 30380 }, { "epoch": 0.8959162612413386, "grad_norm": 2.6220231278823505, "learning_rate": 6.634119850179918e-06, "loss": 1.5128, "step": 30385 }, { "epoch": 0.8960636886333481, "grad_norm": 2.620609273519775, "learning_rate": 6.63370867013364e-06, "loss": 1.4772, "step": 30390 }, { "epoch": 0.8962111160253575, "grad_norm": 2.7000978843674135, "learning_rate": 6.63329740523094e-06, "loss": 1.4717, "step": 30395 }, { "epoch": 0.896358543417367, "grad_norm": 2.811683971499945, "learning_rate": 6.632886055483922e-06, "loss": 1.5244, "step": 30400 }, { "epoch": 0.8965059708093763, "grad_norm": 2.6564807850818477, "learning_rate": 6.632474620904691e-06, "loss": 1.4877, "step": 30405 }, { "epoch": 0.8966533982013858, "grad_norm": 2.682053655876183, "learning_rate": 6.632063101505352e-06, "loss": 1.5744, "step": 30410 }, { "epoch": 0.8968008255933952, "grad_norm": 2.634505309384117, "learning_rate": 6.631651497298015e-06, "loss": 1.471, "step": 30415 }, { "epoch": 0.8969482529854047, "grad_norm": 2.8473265289382628, "learning_rate": 6.631239808294793e-06, "loss": 1.468, "step": 30420 }, { "epoch": 0.8970956803774142, "grad_norm": 2.906436245529168, "learning_rate": 6.630828034507798e-06, "loss": 1.4837, "step": 30425 }, { "epoch": 0.8972431077694235, "grad_norm": 2.758171288451528, "learning_rate": 6.63041617594915e-06, "loss": 1.4996, "step": 30430 }, { "epoch": 0.897390535161433, "grad_norm": 2.6511199943168653, "learning_rate": 6.630004232630967e-06, "loss": 1.458, "step": 30435 }, { "epoch": 0.8975379625534424, "grad_norm": 2.706692146237565, "learning_rate": 6.629592204565371e-06, "loss": 1.4361, "step": 30440 }, { "epoch": 0.8976853899454519, "grad_norm": 2.5532270855069195, "learning_rate": 6.629180091764485e-06, "loss": 1.4351, "step": 30445 }, { "epoch": 0.8978328173374613, "grad_norm": 2.7724027946280874, "learning_rate": 6.628767894240438e-06, "loss": 1.4945, "step": 30450 }, { "epoch": 0.8979802447294707, "grad_norm": 2.5429385920514704, "learning_rate": 6.628355612005358e-06, "loss": 1.4515, "step": 30455 }, { "epoch": 0.8981276721214801, "grad_norm": 2.6647200489927183, "learning_rate": 6.6279432450713775e-06, "loss": 1.4264, "step": 30460 }, { "epoch": 0.8982750995134896, "grad_norm": 2.7346794477954024, "learning_rate": 6.627530793450631e-06, "loss": 1.4577, "step": 30465 }, { "epoch": 0.8984225269054991, "grad_norm": 2.7037886507339888, "learning_rate": 6.6271182571552545e-06, "loss": 1.4858, "step": 30470 }, { "epoch": 0.8985699542975085, "grad_norm": 2.6794827031793345, "learning_rate": 6.626705636197389e-06, "loss": 1.4062, "step": 30475 }, { "epoch": 0.8987173816895179, "grad_norm": 2.794641980058137, "learning_rate": 6.626292930589175e-06, "loss": 1.4711, "step": 30480 }, { "epoch": 0.8988648090815273, "grad_norm": 2.663643931731008, "learning_rate": 6.625880140342757e-06, "loss": 1.4428, "step": 30485 }, { "epoch": 0.8990122364735368, "grad_norm": 2.7428682494274303, "learning_rate": 6.625467265470282e-06, "loss": 1.5081, "step": 30490 }, { "epoch": 0.8991596638655462, "grad_norm": 2.5343290894843657, "learning_rate": 6.625054305983898e-06, "loss": 1.4882, "step": 30495 }, { "epoch": 0.8993070912575557, "grad_norm": 2.705045687303495, "learning_rate": 6.62464126189576e-06, "loss": 1.4542, "step": 30500 }, { "epoch": 0.8993070912575557, "eval_loss": 1.1427456140518188, "eval_runtime": 4.1825, "eval_samples_per_second": 94.68, "eval_steps_per_second": 3.108, "step": 30500 }, { "epoch": 0.899454518649565, "grad_norm": 2.6317721503593137, "learning_rate": 6.62422813321802e-06, "loss": 1.4827, "step": 30505 }, { "epoch": 0.8996019460415745, "grad_norm": 2.8102465555263008, "learning_rate": 6.623814919962834e-06, "loss": 1.3734, "step": 30510 }, { "epoch": 0.899749373433584, "grad_norm": 2.973096255286934, "learning_rate": 6.6234016221423635e-06, "loss": 1.4962, "step": 30515 }, { "epoch": 0.8998968008255934, "grad_norm": 2.8548734235072106, "learning_rate": 6.622988239768769e-06, "loss": 1.4772, "step": 30520 }, { "epoch": 0.9000442282176029, "grad_norm": 2.7439040244144906, "learning_rate": 6.622574772854214e-06, "loss": 1.496, "step": 30525 }, { "epoch": 0.9001916556096122, "grad_norm": 2.639133102627166, "learning_rate": 6.622161221410867e-06, "loss": 1.4306, "step": 30530 }, { "epoch": 0.9003390830016217, "grad_norm": 2.774452802680049, "learning_rate": 6.621747585450897e-06, "loss": 1.4315, "step": 30535 }, { "epoch": 0.9004865103936311, "grad_norm": 2.752202396424397, "learning_rate": 6.621333864986474e-06, "loss": 1.4738, "step": 30540 }, { "epoch": 0.9006339377856406, "grad_norm": 2.825931808144071, "learning_rate": 6.620920060029774e-06, "loss": 1.5037, "step": 30545 }, { "epoch": 0.9007813651776501, "grad_norm": 2.730299100662999, "learning_rate": 6.620506170592973e-06, "loss": 1.4538, "step": 30550 }, { "epoch": 0.9009287925696594, "grad_norm": 2.7297558256417322, "learning_rate": 6.6200921966882494e-06, "loss": 1.4927, "step": 30555 }, { "epoch": 0.9010762199616689, "grad_norm": 2.7677473100598893, "learning_rate": 6.619678138327786e-06, "loss": 1.4728, "step": 30560 }, { "epoch": 0.9012236473536783, "grad_norm": 2.756890505804404, "learning_rate": 6.619263995523767e-06, "loss": 1.4446, "step": 30565 }, { "epoch": 0.9013710747456878, "grad_norm": 2.7155385826589518, "learning_rate": 6.6188497682883785e-06, "loss": 1.4521, "step": 30570 }, { "epoch": 0.9015185021376971, "grad_norm": 2.703422861488423, "learning_rate": 6.618435456633809e-06, "loss": 1.399, "step": 30575 }, { "epoch": 0.9016659295297066, "grad_norm": 2.662749799848838, "learning_rate": 6.618021060572252e-06, "loss": 1.4426, "step": 30580 }, { "epoch": 0.901813356921716, "grad_norm": 2.8603977792588626, "learning_rate": 6.617606580115898e-06, "loss": 1.4787, "step": 30585 }, { "epoch": 0.9019607843137255, "grad_norm": 2.730213785260035, "learning_rate": 6.617192015276949e-06, "loss": 1.4515, "step": 30590 }, { "epoch": 0.902108211705735, "grad_norm": 2.736650795687391, "learning_rate": 6.616777366067599e-06, "loss": 1.506, "step": 30595 }, { "epoch": 0.9022556390977443, "grad_norm": 3.0166373667467226, "learning_rate": 6.616362632500052e-06, "loss": 1.4598, "step": 30600 }, { "epoch": 0.9024030664897538, "grad_norm": 2.6927226229503938, "learning_rate": 6.6159478145865115e-06, "loss": 1.5065, "step": 30605 }, { "epoch": 0.9025504938817632, "grad_norm": 2.7762275730883106, "learning_rate": 6.615532912339184e-06, "loss": 1.5646, "step": 30610 }, { "epoch": 0.9026979212737727, "grad_norm": 2.637833448639362, "learning_rate": 6.615117925770278e-06, "loss": 1.4635, "step": 30615 }, { "epoch": 0.9028453486657821, "grad_norm": 2.952319029041787, "learning_rate": 6.614702854892006e-06, "loss": 1.5095, "step": 30620 }, { "epoch": 0.9029927760577915, "grad_norm": 2.576218685048307, "learning_rate": 6.614287699716581e-06, "loss": 1.4765, "step": 30625 }, { "epoch": 0.903140203449801, "grad_norm": 2.8194796887973896, "learning_rate": 6.61387246025622e-06, "loss": 1.5184, "step": 30630 }, { "epoch": 0.9032876308418104, "grad_norm": 2.893890209440355, "learning_rate": 6.613457136523142e-06, "loss": 1.4098, "step": 30635 }, { "epoch": 0.9034350582338199, "grad_norm": 2.7476998100942254, "learning_rate": 6.613041728529567e-06, "loss": 1.5366, "step": 30640 }, { "epoch": 0.9035824856258293, "grad_norm": 2.7580973141933183, "learning_rate": 6.612626236287721e-06, "loss": 1.5022, "step": 30645 }, { "epoch": 0.9037299130178387, "grad_norm": 2.8918467445347305, "learning_rate": 6.6122106598098285e-06, "loss": 1.4888, "step": 30650 }, { "epoch": 0.9038773404098481, "grad_norm": 2.7598270228176194, "learning_rate": 6.61179499910812e-06, "loss": 1.4446, "step": 30655 }, { "epoch": 0.9040247678018576, "grad_norm": 2.7033205853821887, "learning_rate": 6.611379254194824e-06, "loss": 1.4511, "step": 30660 }, { "epoch": 0.904172195193867, "grad_norm": 2.740316039259821, "learning_rate": 6.610963425082177e-06, "loss": 1.4414, "step": 30665 }, { "epoch": 0.9043196225858765, "grad_norm": 3.613725392307988, "learning_rate": 6.610547511782415e-06, "loss": 1.5208, "step": 30670 }, { "epoch": 0.9044670499778859, "grad_norm": 2.9976884099613694, "learning_rate": 6.6101315143077756e-06, "loss": 1.4632, "step": 30675 }, { "epoch": 0.9046144773698953, "grad_norm": 2.649693701257072, "learning_rate": 6.609715432670499e-06, "loss": 1.451, "step": 30680 }, { "epoch": 0.9047619047619048, "grad_norm": 2.849942059484863, "learning_rate": 6.609299266882834e-06, "loss": 1.4667, "step": 30685 }, { "epoch": 0.9049093321539142, "grad_norm": 2.891960977593856, "learning_rate": 6.608883016957019e-06, "loss": 1.5169, "step": 30690 }, { "epoch": 0.9050567595459237, "grad_norm": 2.711367868330033, "learning_rate": 6.608466682905309e-06, "loss": 1.4921, "step": 30695 }, { "epoch": 0.905204186937933, "grad_norm": 2.7152217602776574, "learning_rate": 6.608050264739953e-06, "loss": 1.4963, "step": 30700 }, { "epoch": 0.9053516143299425, "grad_norm": 2.7712783875016562, "learning_rate": 6.607633762473203e-06, "loss": 1.479, "step": 30705 }, { "epoch": 0.905499041721952, "grad_norm": 2.6794969982486205, "learning_rate": 6.607217176117318e-06, "loss": 1.5173, "step": 30710 }, { "epoch": 0.9056464691139614, "grad_norm": 2.6099772813147815, "learning_rate": 6.606800505684554e-06, "loss": 1.499, "step": 30715 }, { "epoch": 0.9057938965059709, "grad_norm": 2.775196206843175, "learning_rate": 6.606383751187173e-06, "loss": 1.4647, "step": 30720 }, { "epoch": 0.9059413238979802, "grad_norm": 2.6418484957553376, "learning_rate": 6.605966912637439e-06, "loss": 1.4946, "step": 30725 }, { "epoch": 0.9060887512899897, "grad_norm": 2.688438071910545, "learning_rate": 6.605549990047617e-06, "loss": 1.4182, "step": 30730 }, { "epoch": 0.9062361786819991, "grad_norm": 2.7543313684544297, "learning_rate": 6.605132983429976e-06, "loss": 1.4845, "step": 30735 }, { "epoch": 0.9063836060740086, "grad_norm": 2.6658394204724485, "learning_rate": 6.604715892796787e-06, "loss": 1.4562, "step": 30740 }, { "epoch": 0.9065310334660179, "grad_norm": 2.700847494513932, "learning_rate": 6.604298718160323e-06, "loss": 1.4829, "step": 30745 }, { "epoch": 0.9066784608580274, "grad_norm": 2.7925393173395814, "learning_rate": 6.60388145953286e-06, "loss": 1.5082, "step": 30750 }, { "epoch": 0.9068258882500368, "grad_norm": 2.7301176599740926, "learning_rate": 6.603464116926676e-06, "loss": 1.5587, "step": 30755 }, { "epoch": 0.9069733156420463, "grad_norm": 2.5613203439539203, "learning_rate": 6.603046690354052e-06, "loss": 1.4414, "step": 30760 }, { "epoch": 0.9071207430340558, "grad_norm": 2.6501781743171064, "learning_rate": 6.602629179827271e-06, "loss": 1.5172, "step": 30765 }, { "epoch": 0.9072681704260651, "grad_norm": 2.8130869013390827, "learning_rate": 6.602211585358619e-06, "loss": 1.4677, "step": 30770 }, { "epoch": 0.9074155978180746, "grad_norm": 2.649175606573429, "learning_rate": 6.601793906960386e-06, "loss": 1.5017, "step": 30775 }, { "epoch": 0.907563025210084, "grad_norm": 2.724742128174748, "learning_rate": 6.601376144644859e-06, "loss": 1.4921, "step": 30780 }, { "epoch": 0.9077104526020935, "grad_norm": 2.842904219007049, "learning_rate": 6.600958298424333e-06, "loss": 1.4514, "step": 30785 }, { "epoch": 0.907857879994103, "grad_norm": 2.873562315243375, "learning_rate": 6.600540368311105e-06, "loss": 1.4444, "step": 30790 }, { "epoch": 0.9080053073861123, "grad_norm": 2.7662420663600984, "learning_rate": 6.600122354317471e-06, "loss": 1.4821, "step": 30795 }, { "epoch": 0.9081527347781218, "grad_norm": 2.6035610547605637, "learning_rate": 6.5997042564557315e-06, "loss": 1.3789, "step": 30800 }, { "epoch": 0.9083001621701312, "grad_norm": 2.7552801977294608, "learning_rate": 6.5992860747381915e-06, "loss": 1.4698, "step": 30805 }, { "epoch": 0.9084475895621407, "grad_norm": 2.7148045095491584, "learning_rate": 6.598867809177154e-06, "loss": 1.4633, "step": 30810 }, { "epoch": 0.9085950169541501, "grad_norm": 2.786380659466315, "learning_rate": 6.598449459784928e-06, "loss": 1.4545, "step": 30815 }, { "epoch": 0.9087424443461595, "grad_norm": 2.5836317900155046, "learning_rate": 6.598031026573826e-06, "loss": 1.4735, "step": 30820 }, { "epoch": 0.9088898717381689, "grad_norm": 2.6507359315909618, "learning_rate": 6.597612509556157e-06, "loss": 1.4479, "step": 30825 }, { "epoch": 0.9090372991301784, "grad_norm": 2.742391473218091, "learning_rate": 6.597193908744239e-06, "loss": 1.5272, "step": 30830 }, { "epoch": 0.9091847265221878, "grad_norm": 2.8672456067516996, "learning_rate": 6.596775224150389e-06, "loss": 1.5023, "step": 30835 }, { "epoch": 0.9093321539141973, "grad_norm": 2.791782267554934, "learning_rate": 6.596356455786927e-06, "loss": 1.4754, "step": 30840 }, { "epoch": 0.9094795813062067, "grad_norm": 2.7021894562525794, "learning_rate": 6.595937603666177e-06, "loss": 1.4942, "step": 30845 }, { "epoch": 0.9096270086982161, "grad_norm": 2.7870881597529715, "learning_rate": 6.595518667800462e-06, "loss": 1.4698, "step": 30850 }, { "epoch": 0.9097744360902256, "grad_norm": 2.7435845319297902, "learning_rate": 6.595099648202112e-06, "loss": 1.4678, "step": 30855 }, { "epoch": 0.909921863482235, "grad_norm": 2.729220328182142, "learning_rate": 6.594680544883456e-06, "loss": 1.4358, "step": 30860 }, { "epoch": 0.9100692908742445, "grad_norm": 2.682501625857525, "learning_rate": 6.594261357856826e-06, "loss": 1.4818, "step": 30865 }, { "epoch": 0.9102167182662538, "grad_norm": 2.739278669195351, "learning_rate": 6.59384208713456e-06, "loss": 1.5061, "step": 30870 }, { "epoch": 0.9103641456582633, "grad_norm": 2.8355354420944847, "learning_rate": 6.5934227327289906e-06, "loss": 1.4888, "step": 30875 }, { "epoch": 0.9105115730502727, "grad_norm": 2.7352030843512756, "learning_rate": 6.593003294652461e-06, "loss": 1.4803, "step": 30880 }, { "epoch": 0.9106590004422822, "grad_norm": 2.772323429743417, "learning_rate": 6.592583772917313e-06, "loss": 1.4483, "step": 30885 }, { "epoch": 0.9108064278342917, "grad_norm": 2.737758457251122, "learning_rate": 6.592164167535893e-06, "loss": 1.4606, "step": 30890 }, { "epoch": 0.910953855226301, "grad_norm": 2.6902294788370216, "learning_rate": 6.591744478520545e-06, "loss": 1.4948, "step": 30895 }, { "epoch": 0.9111012826183105, "grad_norm": 2.8022078960959633, "learning_rate": 6.591324705883623e-06, "loss": 1.4305, "step": 30900 }, { "epoch": 0.9112487100103199, "grad_norm": 2.618373962742329, "learning_rate": 6.590904849637475e-06, "loss": 1.4261, "step": 30905 }, { "epoch": 0.9113961374023294, "grad_norm": 2.8183814651083194, "learning_rate": 6.5904849097944585e-06, "loss": 1.489, "step": 30910 }, { "epoch": 0.9115435647943387, "grad_norm": 2.700852240136454, "learning_rate": 6.59006488636693e-06, "loss": 1.4649, "step": 30915 }, { "epoch": 0.9116909921863482, "grad_norm": 2.8478310351696163, "learning_rate": 6.58964477936725e-06, "loss": 1.4942, "step": 30920 }, { "epoch": 0.9118384195783577, "grad_norm": 2.7867660351839247, "learning_rate": 6.589224588807778e-06, "loss": 1.4896, "step": 30925 }, { "epoch": 0.9119858469703671, "grad_norm": 2.637046975243537, "learning_rate": 6.588804314700882e-06, "loss": 1.4564, "step": 30930 }, { "epoch": 0.9121332743623766, "grad_norm": 2.6651695546461602, "learning_rate": 6.588383957058928e-06, "loss": 1.4164, "step": 30935 }, { "epoch": 0.9122807017543859, "grad_norm": 2.7984743109869, "learning_rate": 6.587963515894284e-06, "loss": 1.4687, "step": 30940 }, { "epoch": 0.9124281291463954, "grad_norm": 2.77988876709717, "learning_rate": 6.587542991219324e-06, "loss": 1.4774, "step": 30945 }, { "epoch": 0.9125755565384048, "grad_norm": 2.822271245283891, "learning_rate": 6.5871223830464195e-06, "loss": 1.4646, "step": 30950 }, { "epoch": 0.9127229839304143, "grad_norm": 2.9816203741512295, "learning_rate": 6.58670169138795e-06, "loss": 1.4895, "step": 30955 }, { "epoch": 0.9128704113224237, "grad_norm": 2.732780025544252, "learning_rate": 6.586280916256294e-06, "loss": 1.4549, "step": 30960 }, { "epoch": 0.9130178387144331, "grad_norm": 2.7729276152601274, "learning_rate": 6.5858600576638335e-06, "loss": 1.4983, "step": 30965 }, { "epoch": 0.9131652661064426, "grad_norm": 2.6786461854620836, "learning_rate": 6.585439115622952e-06, "loss": 1.4203, "step": 30970 }, { "epoch": 0.913312693498452, "grad_norm": 2.931406206617935, "learning_rate": 6.585018090146038e-06, "loss": 1.463, "step": 30975 }, { "epoch": 0.9134601208904615, "grad_norm": 2.643935062639868, "learning_rate": 6.584596981245479e-06, "loss": 1.5209, "step": 30980 }, { "epoch": 0.9136075482824709, "grad_norm": 2.6503902910703796, "learning_rate": 6.584175788933667e-06, "loss": 1.4847, "step": 30985 }, { "epoch": 0.9137549756744803, "grad_norm": 2.627274026355356, "learning_rate": 6.583754513222996e-06, "loss": 1.4769, "step": 30990 }, { "epoch": 0.9139024030664897, "grad_norm": 2.6805840056764634, "learning_rate": 6.583333154125862e-06, "loss": 1.4923, "step": 30995 }, { "epoch": 0.9140498304584992, "grad_norm": 2.7688876049663675, "learning_rate": 6.582911711654666e-06, "loss": 1.5095, "step": 31000 }, { "epoch": 0.9140498304584992, "eval_loss": 1.1415408849716187, "eval_runtime": 4.2986, "eval_samples_per_second": 92.123, "eval_steps_per_second": 3.024, "step": 31000 }, { "epoch": 0.9141972578505086, "grad_norm": 2.6347114180752063, "learning_rate": 6.582490185821806e-06, "loss": 1.4963, "step": 31005 }, { "epoch": 0.9143446852425181, "grad_norm": 2.7986617327145633, "learning_rate": 6.58206857663969e-06, "loss": 1.5275, "step": 31010 }, { "epoch": 0.9144921126345275, "grad_norm": 2.9273962050491846, "learning_rate": 6.581646884120721e-06, "loss": 1.5302, "step": 31015 }, { "epoch": 0.9146395400265369, "grad_norm": 3.058501466465298, "learning_rate": 6.58122510827731e-06, "loss": 1.4643, "step": 31020 }, { "epoch": 0.9147869674185464, "grad_norm": 2.6630357359757806, "learning_rate": 6.580803249121867e-06, "loss": 1.4657, "step": 31025 }, { "epoch": 0.9149343948105558, "grad_norm": 2.572065796629405, "learning_rate": 6.580381306666805e-06, "loss": 1.4516, "step": 31030 }, { "epoch": 0.9150818222025653, "grad_norm": 2.63721071423369, "learning_rate": 6.579959280924542e-06, "loss": 1.446, "step": 31035 }, { "epoch": 0.9152292495945746, "grad_norm": 2.611195340311224, "learning_rate": 6.5795371719074956e-06, "loss": 1.4148, "step": 31040 }, { "epoch": 0.9153766769865841, "grad_norm": 2.6254852522131547, "learning_rate": 6.579114979628087e-06, "loss": 1.4501, "step": 31045 }, { "epoch": 0.9155241043785936, "grad_norm": 2.7550128522278428, "learning_rate": 6.5786927040987395e-06, "loss": 1.4837, "step": 31050 }, { "epoch": 0.915671531770603, "grad_norm": 2.7305824167872017, "learning_rate": 6.578270345331879e-06, "loss": 1.4488, "step": 31055 }, { "epoch": 0.9158189591626125, "grad_norm": 2.7686026667074928, "learning_rate": 6.5778479033399336e-06, "loss": 1.4928, "step": 31060 }, { "epoch": 0.9159663865546218, "grad_norm": 2.638136790503215, "learning_rate": 6.577425378135336e-06, "loss": 1.4993, "step": 31065 }, { "epoch": 0.9161138139466313, "grad_norm": 2.8001960547384366, "learning_rate": 6.577002769730517e-06, "loss": 1.5448, "step": 31070 }, { "epoch": 0.9162612413386407, "grad_norm": 2.7071904293087194, "learning_rate": 6.576580078137914e-06, "loss": 1.4738, "step": 31075 }, { "epoch": 0.9164086687306502, "grad_norm": 2.7617928158626794, "learning_rate": 6.576157303369963e-06, "loss": 1.4961, "step": 31080 }, { "epoch": 0.9165560961226595, "grad_norm": 2.7599654094262642, "learning_rate": 6.575734445439108e-06, "loss": 1.4733, "step": 31085 }, { "epoch": 0.916703523514669, "grad_norm": 2.716707291810026, "learning_rate": 6.57531150435779e-06, "loss": 1.4716, "step": 31090 }, { "epoch": 0.9168509509066785, "grad_norm": 2.751554371454452, "learning_rate": 6.574888480138454e-06, "loss": 1.4491, "step": 31095 }, { "epoch": 0.9169983782986879, "grad_norm": 2.860274253807769, "learning_rate": 6.57446537279355e-06, "loss": 1.4649, "step": 31100 }, { "epoch": 0.9171458056906974, "grad_norm": 2.7787891566799536, "learning_rate": 6.574042182335526e-06, "loss": 1.4653, "step": 31105 }, { "epoch": 0.9172932330827067, "grad_norm": 2.6197778662036244, "learning_rate": 6.573618908776836e-06, "loss": 1.4273, "step": 31110 }, { "epoch": 0.9174406604747162, "grad_norm": 2.763340463055901, "learning_rate": 6.5731955521299365e-06, "loss": 1.5385, "step": 31115 }, { "epoch": 0.9175880878667256, "grad_norm": 2.771761610849548, "learning_rate": 6.572772112407283e-06, "loss": 1.5165, "step": 31120 }, { "epoch": 0.9177355152587351, "grad_norm": 2.839951983476359, "learning_rate": 6.572348589621337e-06, "loss": 1.4801, "step": 31125 }, { "epoch": 0.9178829426507445, "grad_norm": 2.7970262724722943, "learning_rate": 6.571924983784562e-06, "loss": 1.5069, "step": 31130 }, { "epoch": 0.9180303700427539, "grad_norm": 2.6683505719523546, "learning_rate": 6.5715012949094215e-06, "loss": 1.4418, "step": 31135 }, { "epoch": 0.9181777974347634, "grad_norm": 2.7362691772375993, "learning_rate": 6.5710775230083835e-06, "loss": 1.5133, "step": 31140 }, { "epoch": 0.9183252248267728, "grad_norm": 2.7987109451013295, "learning_rate": 6.570653668093918e-06, "loss": 1.4973, "step": 31145 }, { "epoch": 0.9184726522187823, "grad_norm": 2.661483322293431, "learning_rate": 6.570229730178498e-06, "loss": 1.4179, "step": 31150 }, { "epoch": 0.9186200796107917, "grad_norm": 2.8442504495888214, "learning_rate": 6.569805709274596e-06, "loss": 1.4929, "step": 31155 }, { "epoch": 0.9187675070028011, "grad_norm": 2.6893433149039394, "learning_rate": 6.569381605394693e-06, "loss": 1.4419, "step": 31160 }, { "epoch": 0.9189149343948105, "grad_norm": 2.648895745337925, "learning_rate": 6.5689574185512664e-06, "loss": 1.4664, "step": 31165 }, { "epoch": 0.91906236178682, "grad_norm": 2.80091899472538, "learning_rate": 6.568533148756799e-06, "loss": 1.4842, "step": 31170 }, { "epoch": 0.9192097891788295, "grad_norm": 2.7206589022308476, "learning_rate": 6.5681087960237745e-06, "loss": 1.4714, "step": 31175 }, { "epoch": 0.9193572165708389, "grad_norm": 2.599018910503775, "learning_rate": 6.567684360364681e-06, "loss": 1.4173, "step": 31180 }, { "epoch": 0.9195046439628483, "grad_norm": 2.631208357647536, "learning_rate": 6.567259841792008e-06, "loss": 1.456, "step": 31185 }, { "epoch": 0.9196520713548577, "grad_norm": 2.5193180322166313, "learning_rate": 6.5668352403182475e-06, "loss": 1.439, "step": 31190 }, { "epoch": 0.9197994987468672, "grad_norm": 2.731275583843615, "learning_rate": 6.566410555955893e-06, "loss": 1.475, "step": 31195 }, { "epoch": 0.9199469261388766, "grad_norm": 2.813618718994313, "learning_rate": 6.565985788717442e-06, "loss": 1.5076, "step": 31200 }, { "epoch": 0.9200943535308861, "grad_norm": 2.615896551110342, "learning_rate": 6.565560938615395e-06, "loss": 1.4906, "step": 31205 }, { "epoch": 0.9202417809228954, "grad_norm": 2.6342369690068117, "learning_rate": 6.56513600566225e-06, "loss": 1.4698, "step": 31210 }, { "epoch": 0.9203892083149049, "grad_norm": 2.7591912808732335, "learning_rate": 6.564710989870515e-06, "loss": 1.5155, "step": 31215 }, { "epoch": 0.9205366357069144, "grad_norm": 2.7630315153141245, "learning_rate": 6.564285891252695e-06, "loss": 1.4472, "step": 31220 }, { "epoch": 0.9206840630989238, "grad_norm": 2.8819284730464165, "learning_rate": 6.563860709821299e-06, "loss": 1.4675, "step": 31225 }, { "epoch": 0.9208314904909333, "grad_norm": 2.6591016900512794, "learning_rate": 6.563435445588838e-06, "loss": 1.5072, "step": 31230 }, { "epoch": 0.9209789178829426, "grad_norm": 2.8969481429189545, "learning_rate": 6.563010098567827e-06, "loss": 1.4705, "step": 31235 }, { "epoch": 0.9211263452749521, "grad_norm": 2.961402979520067, "learning_rate": 6.562584668770782e-06, "loss": 1.477, "step": 31240 }, { "epoch": 0.9212737726669615, "grad_norm": 2.697836005091434, "learning_rate": 6.562159156210219e-06, "loss": 1.4636, "step": 31245 }, { "epoch": 0.921421200058971, "grad_norm": 2.681190338280176, "learning_rate": 6.561733560898664e-06, "loss": 1.4859, "step": 31250 }, { "epoch": 0.9215686274509803, "grad_norm": 2.6986597750276067, "learning_rate": 6.561307882848638e-06, "loss": 1.4478, "step": 31255 }, { "epoch": 0.9217160548429898, "grad_norm": 2.734395776716572, "learning_rate": 6.560882122072668e-06, "loss": 1.4502, "step": 31260 }, { "epoch": 0.9218634822349993, "grad_norm": 2.7865395073482127, "learning_rate": 6.5604562785832806e-06, "loss": 1.5404, "step": 31265 }, { "epoch": 0.9220109096270087, "grad_norm": 2.8381155485400154, "learning_rate": 6.560030352393009e-06, "loss": 1.4593, "step": 31270 }, { "epoch": 0.9221583370190182, "grad_norm": 2.7359273595270235, "learning_rate": 6.5596043435143845e-06, "loss": 1.4894, "step": 31275 }, { "epoch": 0.9223057644110275, "grad_norm": 2.7032846115384936, "learning_rate": 6.559178251959945e-06, "loss": 1.5151, "step": 31280 }, { "epoch": 0.922453191803037, "grad_norm": 2.715266866472794, "learning_rate": 6.558752077742227e-06, "loss": 1.5385, "step": 31285 }, { "epoch": 0.9226006191950464, "grad_norm": 4.561969786860043, "learning_rate": 6.558325820873772e-06, "loss": 1.4648, "step": 31290 }, { "epoch": 0.9227480465870559, "grad_norm": 2.6732205786812053, "learning_rate": 6.557899481367123e-06, "loss": 1.4891, "step": 31295 }, { "epoch": 0.9228954739790654, "grad_norm": 2.5986517962030646, "learning_rate": 6.557473059234826e-06, "loss": 1.4377, "step": 31300 }, { "epoch": 0.9230429013710747, "grad_norm": 2.763105461667207, "learning_rate": 6.557046554489428e-06, "loss": 1.4469, "step": 31305 }, { "epoch": 0.9231903287630842, "grad_norm": 2.898835062264812, "learning_rate": 6.556619967143479e-06, "loss": 1.4817, "step": 31310 }, { "epoch": 0.9233377561550936, "grad_norm": 2.681171326603284, "learning_rate": 6.556193297209534e-06, "loss": 1.4807, "step": 31315 }, { "epoch": 0.9234851835471031, "grad_norm": 2.899235591199323, "learning_rate": 6.555766544700146e-06, "loss": 1.4731, "step": 31320 }, { "epoch": 0.9236326109391125, "grad_norm": 2.6314405610016243, "learning_rate": 6.555339709627874e-06, "loss": 1.4571, "step": 31325 }, { "epoch": 0.9237800383311219, "grad_norm": 2.615446018964705, "learning_rate": 6.554912792005277e-06, "loss": 1.4439, "step": 31330 }, { "epoch": 0.9239274657231313, "grad_norm": 2.8047105836580313, "learning_rate": 6.554485791844919e-06, "loss": 1.4668, "step": 31335 }, { "epoch": 0.9240748931151408, "grad_norm": 2.9003849064399456, "learning_rate": 6.554058709159365e-06, "loss": 1.4963, "step": 31340 }, { "epoch": 0.9242223205071503, "grad_norm": 2.809625515683637, "learning_rate": 6.55363154396118e-06, "loss": 1.5039, "step": 31345 }, { "epoch": 0.9243697478991597, "grad_norm": 2.8625124867106675, "learning_rate": 6.553204296262936e-06, "loss": 1.472, "step": 31350 }, { "epoch": 0.9245171752911691, "grad_norm": 2.663536052519325, "learning_rate": 6.552776966077204e-06, "loss": 1.4671, "step": 31355 }, { "epoch": 0.9246646026831785, "grad_norm": 2.8768618606874496, "learning_rate": 6.55234955341656e-06, "loss": 1.5197, "step": 31360 }, { "epoch": 0.924812030075188, "grad_norm": 2.763871860535877, "learning_rate": 6.551922058293581e-06, "loss": 1.4895, "step": 31365 }, { "epoch": 0.9249594574671974, "grad_norm": 2.532545363411033, "learning_rate": 6.551494480720845e-06, "loss": 1.4437, "step": 31370 }, { "epoch": 0.9251068848592069, "grad_norm": 2.8161278579199434, "learning_rate": 6.551066820710936e-06, "loss": 1.4777, "step": 31375 }, { "epoch": 0.9252543122512162, "grad_norm": 2.7314526767277845, "learning_rate": 6.550639078276435e-06, "loss": 1.511, "step": 31380 }, { "epoch": 0.9254017396432257, "grad_norm": 2.518576889441839, "learning_rate": 6.550211253429934e-06, "loss": 1.4958, "step": 31385 }, { "epoch": 0.9255491670352352, "grad_norm": 2.6909513802794116, "learning_rate": 6.549783346184017e-06, "loss": 1.4192, "step": 31390 }, { "epoch": 0.9256965944272446, "grad_norm": 2.5446889942694915, "learning_rate": 6.54935535655128e-06, "loss": 1.3767, "step": 31395 }, { "epoch": 0.9258440218192541, "grad_norm": 2.7869871123336516, "learning_rate": 6.548927284544315e-06, "loss": 1.5257, "step": 31400 }, { "epoch": 0.9259914492112634, "grad_norm": 2.8276261023149973, "learning_rate": 6.548499130175717e-06, "loss": 1.5065, "step": 31405 }, { "epoch": 0.9261388766032729, "grad_norm": 2.7920406020351933, "learning_rate": 6.5480708934580875e-06, "loss": 1.4511, "step": 31410 }, { "epoch": 0.9262863039952823, "grad_norm": 2.711723232179744, "learning_rate": 6.5476425744040265e-06, "loss": 1.4754, "step": 31415 }, { "epoch": 0.9264337313872918, "grad_norm": 2.818192885492901, "learning_rate": 6.547214173026139e-06, "loss": 1.4845, "step": 31420 }, { "epoch": 0.9265811587793011, "grad_norm": 2.7380956484562704, "learning_rate": 6.546785689337029e-06, "loss": 1.5154, "step": 31425 }, { "epoch": 0.9267285861713106, "grad_norm": 2.6290041390838788, "learning_rate": 6.546357123349308e-06, "loss": 1.4726, "step": 31430 }, { "epoch": 0.9268760135633201, "grad_norm": 2.797415932617732, "learning_rate": 6.5459284750755845e-06, "loss": 1.4679, "step": 31435 }, { "epoch": 0.9270234409553295, "grad_norm": 2.631483125185206, "learning_rate": 6.545499744528473e-06, "loss": 1.4494, "step": 31440 }, { "epoch": 0.927170868347339, "grad_norm": 2.7132445468395914, "learning_rate": 6.545070931720591e-06, "loss": 1.4463, "step": 31445 }, { "epoch": 0.9273182957393483, "grad_norm": 2.7249026749020473, "learning_rate": 6.544642036664554e-06, "loss": 1.4362, "step": 31450 }, { "epoch": 0.9274657231313578, "grad_norm": 2.7101059966139984, "learning_rate": 6.544213059372983e-06, "loss": 1.4414, "step": 31455 }, { "epoch": 0.9276131505233672, "grad_norm": 2.7268361782427455, "learning_rate": 6.5437839998585035e-06, "loss": 1.4893, "step": 31460 }, { "epoch": 0.9277605779153767, "grad_norm": 2.667058145364752, "learning_rate": 6.543354858133739e-06, "loss": 1.5233, "step": 31465 }, { "epoch": 0.9279080053073862, "grad_norm": 2.8824854356309233, "learning_rate": 6.542925634211319e-06, "loss": 1.5042, "step": 31470 }, { "epoch": 0.9280554326993955, "grad_norm": 2.7290827072565, "learning_rate": 6.5424963281038715e-06, "loss": 1.5136, "step": 31475 }, { "epoch": 0.928202860091405, "grad_norm": 2.6592458339924145, "learning_rate": 6.542066939824031e-06, "loss": 1.4699, "step": 31480 }, { "epoch": 0.9283502874834144, "grad_norm": 2.6125878780557645, "learning_rate": 6.541637469384433e-06, "loss": 1.4776, "step": 31485 }, { "epoch": 0.9284977148754239, "grad_norm": 2.706614474258994, "learning_rate": 6.541207916797717e-06, "loss": 1.4672, "step": 31490 }, { "epoch": 0.9286451422674333, "grad_norm": 2.73411821999612, "learning_rate": 6.5407782820765175e-06, "loss": 1.4669, "step": 31495 }, { "epoch": 0.9287925696594427, "grad_norm": 2.842350544437298, "learning_rate": 6.540348565233481e-06, "loss": 1.5156, "step": 31500 }, { "epoch": 0.9287925696594427, "eval_loss": 1.1398552656173706, "eval_runtime": 4.1451, "eval_samples_per_second": 95.534, "eval_steps_per_second": 3.136, "step": 31500 }, { "epoch": 0.9289399970514521, "grad_norm": 2.657779539349887, "learning_rate": 6.5399187662812526e-06, "loss": 1.4428, "step": 31505 }, { "epoch": 0.9290874244434616, "grad_norm": 2.746935405639474, "learning_rate": 6.539488885232478e-06, "loss": 1.4835, "step": 31510 }, { "epoch": 0.929234851835471, "grad_norm": 2.699407291619094, "learning_rate": 6.539058922099809e-06, "loss": 1.473, "step": 31515 }, { "epoch": 0.9293822792274805, "grad_norm": 2.7074980646608253, "learning_rate": 6.538628876895895e-06, "loss": 1.4404, "step": 31520 }, { "epoch": 0.9295297066194899, "grad_norm": 2.792242246430335, "learning_rate": 6.538198749633393e-06, "loss": 1.5187, "step": 31525 }, { "epoch": 0.9296771340114993, "grad_norm": 2.7004139169211943, "learning_rate": 6.537768540324958e-06, "loss": 1.4844, "step": 31530 }, { "epoch": 0.9298245614035088, "grad_norm": 2.854277175156053, "learning_rate": 6.537338248983252e-06, "loss": 1.483, "step": 31535 }, { "epoch": 0.9299719887955182, "grad_norm": 2.75031303552277, "learning_rate": 6.536907875620934e-06, "loss": 1.5078, "step": 31540 }, { "epoch": 0.9301194161875277, "grad_norm": 2.6649014421665407, "learning_rate": 6.536477420250669e-06, "loss": 1.4822, "step": 31545 }, { "epoch": 0.930266843579537, "grad_norm": 2.5750415315488726, "learning_rate": 6.536046882885126e-06, "loss": 1.4625, "step": 31550 }, { "epoch": 0.9304142709715465, "grad_norm": 2.607585118182848, "learning_rate": 6.5356162635369706e-06, "loss": 1.4747, "step": 31555 }, { "epoch": 0.930561698363556, "grad_norm": 2.5306235561664314, "learning_rate": 6.5351855622188755e-06, "loss": 1.465, "step": 31560 }, { "epoch": 0.9307091257555654, "grad_norm": 3.129264560869618, "learning_rate": 6.534754778943516e-06, "loss": 1.432, "step": 31565 }, { "epoch": 0.9308565531475749, "grad_norm": 2.7042457945689766, "learning_rate": 6.534323913723567e-06, "loss": 1.4768, "step": 31570 }, { "epoch": 0.9310039805395842, "grad_norm": 2.7679092584587126, "learning_rate": 6.533892966571706e-06, "loss": 1.4343, "step": 31575 }, { "epoch": 0.9311514079315937, "grad_norm": 2.917929039536439, "learning_rate": 6.533461937500617e-06, "loss": 1.4276, "step": 31580 }, { "epoch": 0.9312988353236031, "grad_norm": 2.6276057793889223, "learning_rate": 6.5330308265229825e-06, "loss": 1.4948, "step": 31585 }, { "epoch": 0.9314462627156126, "grad_norm": 2.7216153928133604, "learning_rate": 6.5325996336514875e-06, "loss": 1.4437, "step": 31590 }, { "epoch": 0.9315936901076219, "grad_norm": 2.6224663438240254, "learning_rate": 6.532168358898821e-06, "loss": 1.4664, "step": 31595 }, { "epoch": 0.9317411174996314, "grad_norm": 2.834135774089675, "learning_rate": 6.531737002277673e-06, "loss": 1.4708, "step": 31600 }, { "epoch": 0.9318885448916409, "grad_norm": 2.672703689851467, "learning_rate": 6.531305563800738e-06, "loss": 1.4751, "step": 31605 }, { "epoch": 0.9320359722836503, "grad_norm": 2.8925116565107123, "learning_rate": 6.530874043480711e-06, "loss": 1.4869, "step": 31610 }, { "epoch": 0.9321833996756598, "grad_norm": 2.6850125643768594, "learning_rate": 6.53044244133029e-06, "loss": 1.4657, "step": 31615 }, { "epoch": 0.9323308270676691, "grad_norm": 2.8103384518287586, "learning_rate": 6.5300107573621745e-06, "loss": 1.496, "step": 31620 }, { "epoch": 0.9324782544596786, "grad_norm": 2.794871762786306, "learning_rate": 6.529578991589068e-06, "loss": 1.468, "step": 31625 }, { "epoch": 0.932625681851688, "grad_norm": 2.7858914698119883, "learning_rate": 6.529147144023676e-06, "loss": 1.5206, "step": 31630 }, { "epoch": 0.9327731092436975, "grad_norm": 2.859630124249304, "learning_rate": 6.528715214678706e-06, "loss": 1.5292, "step": 31635 }, { "epoch": 0.932920536635707, "grad_norm": 2.667723711249016, "learning_rate": 6.528283203566867e-06, "loss": 1.4086, "step": 31640 }, { "epoch": 0.9330679640277163, "grad_norm": 2.587073670354286, "learning_rate": 6.527851110700874e-06, "loss": 1.4737, "step": 31645 }, { "epoch": 0.9332153914197258, "grad_norm": 2.709481014409479, "learning_rate": 6.527418936093438e-06, "loss": 1.4119, "step": 31650 }, { "epoch": 0.9333628188117352, "grad_norm": 2.6743202439720135, "learning_rate": 6.5269866797572805e-06, "loss": 1.4744, "step": 31655 }, { "epoch": 0.9335102462037447, "grad_norm": 2.680049304799659, "learning_rate": 6.526554341705117e-06, "loss": 1.4914, "step": 31660 }, { "epoch": 0.9336576735957541, "grad_norm": 2.8006289798525175, "learning_rate": 6.5261219219496725e-06, "loss": 1.5003, "step": 31665 }, { "epoch": 0.9338051009877635, "grad_norm": 2.8123562583987782, "learning_rate": 6.52568942050367e-06, "loss": 1.478, "step": 31670 }, { "epoch": 0.9339525283797729, "grad_norm": 2.6624105863757044, "learning_rate": 6.525256837379837e-06, "loss": 1.4674, "step": 31675 }, { "epoch": 0.9340999557717824, "grad_norm": 2.7486112873407857, "learning_rate": 6.524824172590903e-06, "loss": 1.4443, "step": 31680 }, { "epoch": 0.9342473831637919, "grad_norm": 2.712411996131703, "learning_rate": 6.524391426149599e-06, "loss": 1.4969, "step": 31685 }, { "epoch": 0.9343948105558013, "grad_norm": 2.7027309492747653, "learning_rate": 6.5239585980686585e-06, "loss": 1.4598, "step": 31690 }, { "epoch": 0.9345422379478107, "grad_norm": 2.7387121908257885, "learning_rate": 6.523525688360819e-06, "loss": 1.5164, "step": 31695 }, { "epoch": 0.9346896653398201, "grad_norm": 2.6670850360916245, "learning_rate": 6.523092697038819e-06, "loss": 1.4769, "step": 31700 }, { "epoch": 0.9348370927318296, "grad_norm": 2.792930844928182, "learning_rate": 6.522659624115401e-06, "loss": 1.4911, "step": 31705 }, { "epoch": 0.934984520123839, "grad_norm": 2.6781707337331504, "learning_rate": 6.522226469603307e-06, "loss": 1.428, "step": 31710 }, { "epoch": 0.9351319475158485, "grad_norm": 2.6695571504356455, "learning_rate": 6.5217932335152835e-06, "loss": 1.4328, "step": 31715 }, { "epoch": 0.9352793749078578, "grad_norm": 2.692619637447434, "learning_rate": 6.521359915864078e-06, "loss": 1.4731, "step": 31720 }, { "epoch": 0.9354268022998673, "grad_norm": 2.6500638584666096, "learning_rate": 6.520926516662444e-06, "loss": 1.4232, "step": 31725 }, { "epoch": 0.9355742296918768, "grad_norm": 2.5915767510651015, "learning_rate": 6.520493035923133e-06, "loss": 1.4371, "step": 31730 }, { "epoch": 0.9357216570838862, "grad_norm": 2.6869320801519927, "learning_rate": 6.5200594736589e-06, "loss": 1.491, "step": 31735 }, { "epoch": 0.9358690844758957, "grad_norm": 2.7388086186099256, "learning_rate": 6.519625829882503e-06, "loss": 1.454, "step": 31740 }, { "epoch": 0.936016511867905, "grad_norm": 2.7429316215532222, "learning_rate": 6.5191921046067055e-06, "loss": 1.5247, "step": 31745 }, { "epoch": 0.9361639392599145, "grad_norm": 2.697278858066536, "learning_rate": 6.518758297844268e-06, "loss": 1.4394, "step": 31750 }, { "epoch": 0.9363113666519239, "grad_norm": 2.7522007652345817, "learning_rate": 6.518324409607955e-06, "loss": 1.4402, "step": 31755 }, { "epoch": 0.9364587940439334, "grad_norm": 2.6938443521449025, "learning_rate": 6.5178904399105355e-06, "loss": 1.4656, "step": 31760 }, { "epoch": 0.9366062214359427, "grad_norm": 2.7826176231287305, "learning_rate": 6.517456388764779e-06, "loss": 1.4797, "step": 31765 }, { "epoch": 0.9367536488279522, "grad_norm": 2.697145778965614, "learning_rate": 6.517022256183458e-06, "loss": 1.4276, "step": 31770 }, { "epoch": 0.9369010762199617, "grad_norm": 2.607170742264661, "learning_rate": 6.516588042179349e-06, "loss": 1.4663, "step": 31775 }, { "epoch": 0.9370485036119711, "grad_norm": 2.701389248336787, "learning_rate": 6.516153746765226e-06, "loss": 1.4381, "step": 31780 }, { "epoch": 0.9371959310039806, "grad_norm": 2.7166787457638732, "learning_rate": 6.5157193699538705e-06, "loss": 1.4993, "step": 31785 }, { "epoch": 0.9373433583959899, "grad_norm": 2.602032046704501, "learning_rate": 6.515284911758066e-06, "loss": 1.4829, "step": 31790 }, { "epoch": 0.9374907857879994, "grad_norm": 2.685804961530684, "learning_rate": 6.5148503721905935e-06, "loss": 1.4677, "step": 31795 }, { "epoch": 0.9376382131800088, "grad_norm": 2.688161776735967, "learning_rate": 6.514415751264243e-06, "loss": 1.4973, "step": 31800 }, { "epoch": 0.9377856405720183, "grad_norm": 2.667173554514235, "learning_rate": 6.5139810489918016e-06, "loss": 1.5117, "step": 31805 }, { "epoch": 0.9379330679640278, "grad_norm": 2.6846896361442942, "learning_rate": 6.5135462653860615e-06, "loss": 1.4666, "step": 31810 }, { "epoch": 0.9380804953560371, "grad_norm": 2.9671534354683415, "learning_rate": 6.513111400459817e-06, "loss": 1.5347, "step": 31815 }, { "epoch": 0.9382279227480466, "grad_norm": 2.8252107689215915, "learning_rate": 6.5126764542258646e-06, "loss": 1.4494, "step": 31820 }, { "epoch": 0.938375350140056, "grad_norm": 2.607106153505256, "learning_rate": 6.512241426697003e-06, "loss": 1.4545, "step": 31825 }, { "epoch": 0.9385227775320655, "grad_norm": 2.699405548072121, "learning_rate": 6.511806317886034e-06, "loss": 1.4686, "step": 31830 }, { "epoch": 0.9386702049240749, "grad_norm": 2.799118574118891, "learning_rate": 6.51137112780576e-06, "loss": 1.5056, "step": 31835 }, { "epoch": 0.9388176323160843, "grad_norm": 2.7874261075194733, "learning_rate": 6.510935856468987e-06, "loss": 1.4807, "step": 31840 }, { "epoch": 0.9389650597080937, "grad_norm": 2.5394005741854997, "learning_rate": 6.5105005038885236e-06, "loss": 1.5066, "step": 31845 }, { "epoch": 0.9391124871001032, "grad_norm": 2.651225563628626, "learning_rate": 6.510065070077181e-06, "loss": 1.4329, "step": 31850 }, { "epoch": 0.9392599144921127, "grad_norm": 2.794036723103147, "learning_rate": 6.509629555047773e-06, "loss": 1.4389, "step": 31855 }, { "epoch": 0.9394073418841221, "grad_norm": 2.7150252436838884, "learning_rate": 6.5091939588131124e-06, "loss": 1.4786, "step": 31860 }, { "epoch": 0.9395547692761315, "grad_norm": 3.0484361966782463, "learning_rate": 6.50875828138602e-06, "loss": 1.5603, "step": 31865 }, { "epoch": 0.9397021966681409, "grad_norm": 2.7522933294919856, "learning_rate": 6.5083225227793145e-06, "loss": 1.4889, "step": 31870 }, { "epoch": 0.9398496240601504, "grad_norm": 2.8563800319308483, "learning_rate": 6.50788668300582e-06, "loss": 1.4651, "step": 31875 }, { "epoch": 0.9399970514521598, "grad_norm": 2.5136339837249864, "learning_rate": 6.50745076207836e-06, "loss": 1.4511, "step": 31880 }, { "epoch": 0.9401444788441693, "grad_norm": 2.626763673010691, "learning_rate": 6.507014760009763e-06, "loss": 1.5065, "step": 31885 }, { "epoch": 0.9402919062361786, "grad_norm": 2.785581323262638, "learning_rate": 6.5065786768128575e-06, "loss": 1.4499, "step": 31890 }, { "epoch": 0.9404393336281881, "grad_norm": 2.6045072741387867, "learning_rate": 6.506142512500477e-06, "loss": 1.4761, "step": 31895 }, { "epoch": 0.9405867610201976, "grad_norm": 2.7321913894936154, "learning_rate": 6.5057062670854565e-06, "loss": 1.496, "step": 31900 }, { "epoch": 0.940734188412207, "grad_norm": 2.71083496127539, "learning_rate": 6.505269940580633e-06, "loss": 1.4471, "step": 31905 }, { "epoch": 0.9408816158042165, "grad_norm": 2.6534023019267665, "learning_rate": 6.504833532998843e-06, "loss": 1.4653, "step": 31910 }, { "epoch": 0.9410290431962258, "grad_norm": 2.7728988770653324, "learning_rate": 6.504397044352933e-06, "loss": 1.4938, "step": 31915 }, { "epoch": 0.9411764705882353, "grad_norm": 2.643654954615737, "learning_rate": 6.503960474655744e-06, "loss": 1.4768, "step": 31920 }, { "epoch": 0.9413238979802447, "grad_norm": 2.6146795643537732, "learning_rate": 6.503523823920124e-06, "loss": 1.4127, "step": 31925 }, { "epoch": 0.9414713253722542, "grad_norm": 2.7393645909942466, "learning_rate": 6.50308709215892e-06, "loss": 1.4374, "step": 31930 }, { "epoch": 0.9416187527642637, "grad_norm": 2.741822495905883, "learning_rate": 6.502650279384984e-06, "loss": 1.494, "step": 31935 }, { "epoch": 0.941766180156273, "grad_norm": 2.7400791044013406, "learning_rate": 6.5022133856111715e-06, "loss": 1.435, "step": 31940 }, { "epoch": 0.9419136075482825, "grad_norm": 2.8234104198830647, "learning_rate": 6.501776410850339e-06, "loss": 1.4631, "step": 31945 }, { "epoch": 0.9420610349402919, "grad_norm": 2.8271678639098816, "learning_rate": 6.50133935511534e-06, "loss": 1.4896, "step": 31950 }, { "epoch": 0.9422084623323014, "grad_norm": 2.6808500644032915, "learning_rate": 6.500902218419042e-06, "loss": 1.5029, "step": 31955 }, { "epoch": 0.9423558897243107, "grad_norm": 2.557674840183882, "learning_rate": 6.500465000774302e-06, "loss": 1.4625, "step": 31960 }, { "epoch": 0.9425033171163202, "grad_norm": 2.585704652731673, "learning_rate": 6.50002770219399e-06, "loss": 1.4954, "step": 31965 }, { "epoch": 0.9426507445083296, "grad_norm": 2.7955014203669024, "learning_rate": 6.499590322690974e-06, "loss": 1.4675, "step": 31970 }, { "epoch": 0.9427981719003391, "grad_norm": 2.7747767244647865, "learning_rate": 6.499152862278122e-06, "loss": 1.491, "step": 31975 }, { "epoch": 0.9429455992923486, "grad_norm": 2.7918053935703244, "learning_rate": 6.4987153209683075e-06, "loss": 1.5087, "step": 31980 }, { "epoch": 0.9430930266843579, "grad_norm": 2.6754192338965788, "learning_rate": 6.498277698774406e-06, "loss": 1.4433, "step": 31985 }, { "epoch": 0.9432404540763674, "grad_norm": 2.7793811972313645, "learning_rate": 6.4978399957092954e-06, "loss": 1.4783, "step": 31990 }, { "epoch": 0.9433878814683768, "grad_norm": 2.6992479236520404, "learning_rate": 6.497402211785856e-06, "loss": 1.4972, "step": 31995 }, { "epoch": 0.9435353088603863, "grad_norm": 2.7094108504314343, "learning_rate": 6.496964347016968e-06, "loss": 1.4379, "step": 32000 }, { "epoch": 0.9435353088603863, "eval_loss": 1.138964056968689, "eval_runtime": 4.2814, "eval_samples_per_second": 92.493, "eval_steps_per_second": 3.036, "step": 32000 }, { "epoch": 0.9436827362523957, "grad_norm": 2.7532586718858907, "learning_rate": 6.496526401415518e-06, "loss": 1.4345, "step": 32005 }, { "epoch": 0.9438301636444051, "grad_norm": 2.626979663019449, "learning_rate": 6.496088374994393e-06, "loss": 1.4114, "step": 32010 }, { "epoch": 0.9439775910364145, "grad_norm": 2.9790654258266054, "learning_rate": 6.4956502677664806e-06, "loss": 1.475, "step": 32015 }, { "epoch": 0.944125018428424, "grad_norm": 2.8056403623848816, "learning_rate": 6.495212079744674e-06, "loss": 1.515, "step": 32020 }, { "epoch": 0.9442724458204335, "grad_norm": 2.739706075778799, "learning_rate": 6.494773810941867e-06, "loss": 1.4616, "step": 32025 }, { "epoch": 0.9444198732124429, "grad_norm": 2.5598700678628896, "learning_rate": 6.494335461370958e-06, "loss": 1.4434, "step": 32030 }, { "epoch": 0.9445673006044523, "grad_norm": 2.7352571552193905, "learning_rate": 6.493897031044843e-06, "loss": 1.4431, "step": 32035 }, { "epoch": 0.9447147279964617, "grad_norm": 2.7108346961367396, "learning_rate": 6.4934585199764254e-06, "loss": 1.4936, "step": 32040 }, { "epoch": 0.9448621553884712, "grad_norm": 2.688437087845731, "learning_rate": 6.493019928178608e-06, "loss": 1.4941, "step": 32045 }, { "epoch": 0.9450095827804806, "grad_norm": 2.581299637723083, "learning_rate": 6.492581255664296e-06, "loss": 1.4981, "step": 32050 }, { "epoch": 0.9451570101724901, "grad_norm": 2.700264973439434, "learning_rate": 6.4921425024464e-06, "loss": 1.4822, "step": 32055 }, { "epoch": 0.9453044375644994, "grad_norm": 2.7112615496671695, "learning_rate": 6.49170366853783e-06, "loss": 1.4731, "step": 32060 }, { "epoch": 0.9454518649565089, "grad_norm": 2.842780736589007, "learning_rate": 6.491264753951497e-06, "loss": 1.4599, "step": 32065 }, { "epoch": 0.9455992923485184, "grad_norm": 2.6348885476847292, "learning_rate": 6.4908257587003194e-06, "loss": 1.4649, "step": 32070 }, { "epoch": 0.9457467197405278, "grad_norm": 2.6687505074593574, "learning_rate": 6.490386682797215e-06, "loss": 1.473, "step": 32075 }, { "epoch": 0.9458941471325373, "grad_norm": 2.700112469634198, "learning_rate": 6.489947526255102e-06, "loss": 1.4728, "step": 32080 }, { "epoch": 0.9460415745245466, "grad_norm": 2.6937618949903426, "learning_rate": 6.489508289086905e-06, "loss": 1.4932, "step": 32085 }, { "epoch": 0.9461890019165561, "grad_norm": 2.643384878285095, "learning_rate": 6.489068971305548e-06, "loss": 1.4404, "step": 32090 }, { "epoch": 0.9463364293085655, "grad_norm": 2.6625338680184316, "learning_rate": 6.488629572923961e-06, "loss": 1.4405, "step": 32095 }, { "epoch": 0.946483856700575, "grad_norm": 2.6985579485559237, "learning_rate": 6.488190093955069e-06, "loss": 1.4622, "step": 32100 }, { "epoch": 0.9466312840925845, "grad_norm": 2.836747781697268, "learning_rate": 6.487750534411808e-06, "loss": 1.4847, "step": 32105 }, { "epoch": 0.9467787114845938, "grad_norm": 2.6285000960422944, "learning_rate": 6.48731089430711e-06, "loss": 1.4198, "step": 32110 }, { "epoch": 0.9469261388766033, "grad_norm": 2.8025274684728783, "learning_rate": 6.4868711736539145e-06, "loss": 1.4875, "step": 32115 }, { "epoch": 0.9470735662686127, "grad_norm": 2.76749868564219, "learning_rate": 6.486431372465159e-06, "loss": 1.4485, "step": 32120 }, { "epoch": 0.9472209936606222, "grad_norm": 2.7287766046438144, "learning_rate": 6.485991490753786e-06, "loss": 1.4669, "step": 32125 }, { "epoch": 0.9473684210526315, "grad_norm": 2.746658064755782, "learning_rate": 6.485551528532739e-06, "loss": 1.4726, "step": 32130 }, { "epoch": 0.947515848444641, "grad_norm": 2.7596625696901316, "learning_rate": 6.485111485814965e-06, "loss": 1.4671, "step": 32135 }, { "epoch": 0.9476632758366504, "grad_norm": 2.7225711072211998, "learning_rate": 6.484671362613413e-06, "loss": 1.4326, "step": 32140 }, { "epoch": 0.9478107032286599, "grad_norm": 2.871798088683567, "learning_rate": 6.484231158941033e-06, "loss": 1.4766, "step": 32145 }, { "epoch": 0.9479581306206694, "grad_norm": 3.056870718937362, "learning_rate": 6.483790874810778e-06, "loss": 1.5351, "step": 32150 }, { "epoch": 0.9481055580126787, "grad_norm": 2.7365640106207607, "learning_rate": 6.483350510235605e-06, "loss": 1.4444, "step": 32155 }, { "epoch": 0.9482529854046882, "grad_norm": 2.69374592466503, "learning_rate": 6.4829100652284725e-06, "loss": 1.4873, "step": 32160 }, { "epoch": 0.9484004127966976, "grad_norm": 2.758501598897021, "learning_rate": 6.482469539802341e-06, "loss": 1.4425, "step": 32165 }, { "epoch": 0.9485478401887071, "grad_norm": 2.623063721149008, "learning_rate": 6.482028933970173e-06, "loss": 1.4551, "step": 32170 }, { "epoch": 0.9486952675807165, "grad_norm": 2.8284927032608693, "learning_rate": 6.481588247744934e-06, "loss": 1.4646, "step": 32175 }, { "epoch": 0.9488426949727259, "grad_norm": 2.7391129867699093, "learning_rate": 6.481147481139591e-06, "loss": 1.5166, "step": 32180 }, { "epoch": 0.9489901223647353, "grad_norm": 2.760912501816775, "learning_rate": 6.480706634167114e-06, "loss": 1.4692, "step": 32185 }, { "epoch": 0.9491375497567448, "grad_norm": 2.5829936351502027, "learning_rate": 6.480265706840478e-06, "loss": 1.4353, "step": 32190 }, { "epoch": 0.9492849771487543, "grad_norm": 2.7700143648337843, "learning_rate": 6.479824699172655e-06, "loss": 1.4839, "step": 32195 }, { "epoch": 0.9494324045407637, "grad_norm": 2.6527168391175455, "learning_rate": 6.4793836111766234e-06, "loss": 1.4611, "step": 32200 }, { "epoch": 0.9495798319327731, "grad_norm": 2.688151585537952, "learning_rate": 6.478942442865362e-06, "loss": 1.4572, "step": 32205 }, { "epoch": 0.9497272593247825, "grad_norm": 2.6297909336167673, "learning_rate": 6.478501194251855e-06, "loss": 1.4362, "step": 32210 }, { "epoch": 0.949874686716792, "grad_norm": 2.773007885562118, "learning_rate": 6.478059865349082e-06, "loss": 1.5083, "step": 32215 }, { "epoch": 0.9500221141088014, "grad_norm": 2.5933824630033255, "learning_rate": 6.477618456170034e-06, "loss": 1.4621, "step": 32220 }, { "epoch": 0.9501695415008109, "grad_norm": 2.862479898517299, "learning_rate": 6.477176966727698e-06, "loss": 1.5049, "step": 32225 }, { "epoch": 0.9503169688928202, "grad_norm": 2.73965116019624, "learning_rate": 6.476735397035066e-06, "loss": 1.4518, "step": 32230 }, { "epoch": 0.9504643962848297, "grad_norm": 2.757101324595459, "learning_rate": 6.476293747105131e-06, "loss": 1.5379, "step": 32235 }, { "epoch": 0.9506118236768392, "grad_norm": 2.719963386310087, "learning_rate": 6.475852016950888e-06, "loss": 1.4769, "step": 32240 }, { "epoch": 0.9507592510688486, "grad_norm": 2.716132812877031, "learning_rate": 6.475410206585339e-06, "loss": 1.4784, "step": 32245 }, { "epoch": 0.9509066784608581, "grad_norm": 2.907267222956067, "learning_rate": 6.474968316021481e-06, "loss": 1.4869, "step": 32250 }, { "epoch": 0.9510541058528674, "grad_norm": 2.868760435201068, "learning_rate": 6.47452634527232e-06, "loss": 1.489, "step": 32255 }, { "epoch": 0.9512015332448769, "grad_norm": 2.7138248155458298, "learning_rate": 6.474084294350859e-06, "loss": 1.416, "step": 32260 }, { "epoch": 0.9513489606368863, "grad_norm": 2.9142046116323184, "learning_rate": 6.473642163270108e-06, "loss": 1.563, "step": 32265 }, { "epoch": 0.9514963880288958, "grad_norm": 2.6720897449128285, "learning_rate": 6.473199952043077e-06, "loss": 1.4727, "step": 32270 }, { "epoch": 0.9516438154209053, "grad_norm": 3.067061600707822, "learning_rate": 6.472757660682776e-06, "loss": 1.4683, "step": 32275 }, { "epoch": 0.9517912428129146, "grad_norm": 2.5848266494349406, "learning_rate": 6.472315289202221e-06, "loss": 1.4305, "step": 32280 }, { "epoch": 0.9519386702049241, "grad_norm": 2.7115654185922318, "learning_rate": 6.471872837614432e-06, "loss": 1.4977, "step": 32285 }, { "epoch": 0.9520860975969335, "grad_norm": 2.8213781473958885, "learning_rate": 6.471430305932426e-06, "loss": 1.5001, "step": 32290 }, { "epoch": 0.952233524988943, "grad_norm": 2.664206364577819, "learning_rate": 6.470987694169227e-06, "loss": 1.4973, "step": 32295 }, { "epoch": 0.9523809523809523, "grad_norm": 2.8751710412317357, "learning_rate": 6.4705450023378566e-06, "loss": 1.4767, "step": 32300 }, { "epoch": 0.9525283797729618, "grad_norm": 2.8426898483280505, "learning_rate": 6.470102230451343e-06, "loss": 1.5101, "step": 32305 }, { "epoch": 0.9526758071649712, "grad_norm": 2.482813553403605, "learning_rate": 6.469659378522717e-06, "loss": 1.416, "step": 32310 }, { "epoch": 0.9528232345569807, "grad_norm": 2.9764360364434337, "learning_rate": 6.469216446565006e-06, "loss": 1.4671, "step": 32315 }, { "epoch": 0.9529706619489902, "grad_norm": 2.776206583927165, "learning_rate": 6.468773434591247e-06, "loss": 1.4959, "step": 32320 }, { "epoch": 0.9531180893409995, "grad_norm": 2.618712171800007, "learning_rate": 6.4683303426144755e-06, "loss": 1.504, "step": 32325 }, { "epoch": 0.953265516733009, "grad_norm": 2.747553125881928, "learning_rate": 6.467887170647729e-06, "loss": 1.4472, "step": 32330 }, { "epoch": 0.9534129441250184, "grad_norm": 2.825390892061219, "learning_rate": 6.467443918704049e-06, "loss": 1.5037, "step": 32335 }, { "epoch": 0.9535603715170279, "grad_norm": 2.6534659420418225, "learning_rate": 6.467000586796479e-06, "loss": 1.4722, "step": 32340 }, { "epoch": 0.9537077989090373, "grad_norm": 2.7087364631927775, "learning_rate": 6.4665571749380635e-06, "loss": 1.4445, "step": 32345 }, { "epoch": 0.9538552263010467, "grad_norm": 2.6383593410461432, "learning_rate": 6.466113683141853e-06, "loss": 1.4678, "step": 32350 }, { "epoch": 0.9540026536930561, "grad_norm": 2.766394327340637, "learning_rate": 6.4656701114208935e-06, "loss": 1.4542, "step": 32355 }, { "epoch": 0.9541500810850656, "grad_norm": 2.643185020682548, "learning_rate": 6.465226459788242e-06, "loss": 1.4416, "step": 32360 }, { "epoch": 0.9542975084770751, "grad_norm": 2.6961290192628575, "learning_rate": 6.46478272825695e-06, "loss": 1.4979, "step": 32365 }, { "epoch": 0.9544449358690845, "grad_norm": 2.739900984576174, "learning_rate": 6.464338916840077e-06, "loss": 1.5287, "step": 32370 }, { "epoch": 0.9545923632610939, "grad_norm": 2.7916531456634166, "learning_rate": 6.4638950255506825e-06, "loss": 1.47, "step": 32375 }, { "epoch": 0.9547397906531033, "grad_norm": 2.7519541794817926, "learning_rate": 6.463451054401827e-06, "loss": 1.4361, "step": 32380 }, { "epoch": 0.9548872180451128, "grad_norm": 2.907118777983705, "learning_rate": 6.4630070034065765e-06, "loss": 1.4476, "step": 32385 }, { "epoch": 0.9550346454371222, "grad_norm": 2.8194709803418023, "learning_rate": 6.462562872577996e-06, "loss": 1.4575, "step": 32390 }, { "epoch": 0.9551820728291317, "grad_norm": 2.7789396355292277, "learning_rate": 6.462118661929156e-06, "loss": 1.4653, "step": 32395 }, { "epoch": 0.955329500221141, "grad_norm": 2.738418832321225, "learning_rate": 6.461674371473129e-06, "loss": 1.4868, "step": 32400 }, { "epoch": 0.9554769276131505, "grad_norm": 2.8128265523862335, "learning_rate": 6.461230001222986e-06, "loss": 1.4529, "step": 32405 }, { "epoch": 0.95562435500516, "grad_norm": 2.6408920983096236, "learning_rate": 6.460785551191804e-06, "loss": 1.422, "step": 32410 }, { "epoch": 0.9557717823971694, "grad_norm": 2.8017138961101895, "learning_rate": 6.460341021392661e-06, "loss": 1.5081, "step": 32415 }, { "epoch": 0.9559192097891789, "grad_norm": 2.6390714651091063, "learning_rate": 6.459896411838639e-06, "loss": 1.4798, "step": 32420 }, { "epoch": 0.9560666371811882, "grad_norm": 2.6405694680043896, "learning_rate": 6.459451722542822e-06, "loss": 1.4405, "step": 32425 }, { "epoch": 0.9562140645731977, "grad_norm": 2.731918014719477, "learning_rate": 6.459006953518293e-06, "loss": 1.4572, "step": 32430 }, { "epoch": 0.9563614919652071, "grad_norm": 2.797718074377262, "learning_rate": 6.458562104778142e-06, "loss": 1.4937, "step": 32435 }, { "epoch": 0.9565089193572166, "grad_norm": 2.721129685348266, "learning_rate": 6.458117176335457e-06, "loss": 1.4817, "step": 32440 }, { "epoch": 0.9566563467492261, "grad_norm": 2.8334983214200795, "learning_rate": 6.457672168203333e-06, "loss": 1.4468, "step": 32445 }, { "epoch": 0.9568037741412354, "grad_norm": 2.577322691749729, "learning_rate": 6.457227080394863e-06, "loss": 1.4675, "step": 32450 }, { "epoch": 0.9569512015332449, "grad_norm": 2.640963616548547, "learning_rate": 6.456781912923144e-06, "loss": 1.436, "step": 32455 }, { "epoch": 0.9570986289252543, "grad_norm": 2.8116841784226345, "learning_rate": 6.456336665801278e-06, "loss": 1.4641, "step": 32460 }, { "epoch": 0.9572460563172638, "grad_norm": 2.654631429196071, "learning_rate": 6.455891339042364e-06, "loss": 1.4794, "step": 32465 }, { "epoch": 0.9573934837092731, "grad_norm": 2.9039929814073018, "learning_rate": 6.455445932659508e-06, "loss": 1.5377, "step": 32470 }, { "epoch": 0.9575409111012826, "grad_norm": 2.808432645658781, "learning_rate": 6.455000446665816e-06, "loss": 1.5107, "step": 32475 }, { "epoch": 0.957688338493292, "grad_norm": 2.9462253753395555, "learning_rate": 6.454554881074397e-06, "loss": 1.4464, "step": 32480 }, { "epoch": 0.9578357658853015, "grad_norm": 2.673571912525533, "learning_rate": 6.454109235898363e-06, "loss": 1.4622, "step": 32485 }, { "epoch": 0.957983193277311, "grad_norm": 2.6815359625821773, "learning_rate": 6.453663511150826e-06, "loss": 1.4523, "step": 32490 }, { "epoch": 0.9581306206693203, "grad_norm": 2.694007900312812, "learning_rate": 6.4532177068449035e-06, "loss": 1.4581, "step": 32495 }, { "epoch": 0.9582780480613298, "grad_norm": 2.6067947781814484, "learning_rate": 6.4527718229937136e-06, "loss": 1.4185, "step": 32500 }, { "epoch": 0.9582780480613298, "eval_loss": 1.1373131275177002, "eval_runtime": 4.1909, "eval_samples_per_second": 94.491, "eval_steps_per_second": 3.102, "step": 32500 }, { "epoch": 0.9584254754533392, "grad_norm": 2.6559211127776123, "learning_rate": 6.452325859610376e-06, "loss": 1.4819, "step": 32505 }, { "epoch": 0.9585729028453487, "grad_norm": 2.743250017782925, "learning_rate": 6.451879816708014e-06, "loss": 1.4841, "step": 32510 }, { "epoch": 0.9587203302373581, "grad_norm": 2.793815444758947, "learning_rate": 6.451433694299753e-06, "loss": 1.4958, "step": 32515 }, { "epoch": 0.9588677576293675, "grad_norm": 2.7157756950075096, "learning_rate": 6.450987492398723e-06, "loss": 1.4522, "step": 32520 }, { "epoch": 0.959015185021377, "grad_norm": 2.681472146020321, "learning_rate": 6.450541211018049e-06, "loss": 1.4585, "step": 32525 }, { "epoch": 0.9591626124133864, "grad_norm": 2.7216850070335616, "learning_rate": 6.450094850170869e-06, "loss": 1.4622, "step": 32530 }, { "epoch": 0.9593100398053959, "grad_norm": 2.811993962298179, "learning_rate": 6.449648409870313e-06, "loss": 1.4389, "step": 32535 }, { "epoch": 0.9594574671974053, "grad_norm": 2.7747347804104963, "learning_rate": 6.44920189012952e-06, "loss": 1.4638, "step": 32540 }, { "epoch": 0.9596048945894147, "grad_norm": 2.8013417296274414, "learning_rate": 6.448755290961628e-06, "loss": 1.4823, "step": 32545 }, { "epoch": 0.9597523219814241, "grad_norm": 2.622558288640992, "learning_rate": 6.4483086123797825e-06, "loss": 1.3816, "step": 32550 }, { "epoch": 0.9598997493734336, "grad_norm": 2.6234530400666616, "learning_rate": 6.447861854397123e-06, "loss": 1.4262, "step": 32555 }, { "epoch": 0.960047176765443, "grad_norm": 2.724170812014126, "learning_rate": 6.4474150170267985e-06, "loss": 1.448, "step": 32560 }, { "epoch": 0.9601946041574525, "grad_norm": 2.677696569469099, "learning_rate": 6.446968100281957e-06, "loss": 1.4919, "step": 32565 }, { "epoch": 0.9603420315494618, "grad_norm": 2.7728778013262745, "learning_rate": 6.4465211041757495e-06, "loss": 1.433, "step": 32570 }, { "epoch": 0.9604894589414713, "grad_norm": 2.8758781289683433, "learning_rate": 6.446074028721329e-06, "loss": 1.5, "step": 32575 }, { "epoch": 0.9606368863334808, "grad_norm": 2.534781167098419, "learning_rate": 6.4456268739318524e-06, "loss": 1.4676, "step": 32580 }, { "epoch": 0.9607843137254902, "grad_norm": 2.8477161066977423, "learning_rate": 6.445179639820477e-06, "loss": 1.4954, "step": 32585 }, { "epoch": 0.9609317411174997, "grad_norm": 2.758742379398708, "learning_rate": 6.444732326400363e-06, "loss": 1.4791, "step": 32590 }, { "epoch": 0.961079168509509, "grad_norm": 2.7948152971979985, "learning_rate": 6.444284933684673e-06, "loss": 1.4358, "step": 32595 }, { "epoch": 0.9612265959015185, "grad_norm": 2.739129594348815, "learning_rate": 6.443837461686571e-06, "loss": 1.4709, "step": 32600 }, { "epoch": 0.961374023293528, "grad_norm": 2.684189370031181, "learning_rate": 6.443389910419228e-06, "loss": 1.4684, "step": 32605 }, { "epoch": 0.9615214506855374, "grad_norm": 2.7320800480612886, "learning_rate": 6.44294227989581e-06, "loss": 1.491, "step": 32610 }, { "epoch": 0.9616688780775469, "grad_norm": 2.7303046247180935, "learning_rate": 6.44249457012949e-06, "loss": 1.4271, "step": 32615 }, { "epoch": 0.9618163054695562, "grad_norm": 2.772126513748823, "learning_rate": 6.442046781133444e-06, "loss": 1.4624, "step": 32620 }, { "epoch": 0.9619637328615657, "grad_norm": 2.839960403756425, "learning_rate": 6.4415989129208465e-06, "loss": 1.4852, "step": 32625 }, { "epoch": 0.9621111602535751, "grad_norm": 2.7538259013472053, "learning_rate": 6.441150965504878e-06, "loss": 1.5047, "step": 32630 }, { "epoch": 0.9622585876455846, "grad_norm": 2.646008045219146, "learning_rate": 6.440702938898719e-06, "loss": 1.4142, "step": 32635 }, { "epoch": 0.9624060150375939, "grad_norm": 2.5231226635047435, "learning_rate": 6.440254833115554e-06, "loss": 1.4103, "step": 32640 }, { "epoch": 0.9625534424296034, "grad_norm": 2.6849034594627215, "learning_rate": 6.439806648168568e-06, "loss": 1.4474, "step": 32645 }, { "epoch": 0.9627008698216128, "grad_norm": 2.598785859170896, "learning_rate": 6.43935838407095e-06, "loss": 1.4682, "step": 32650 }, { "epoch": 0.9628482972136223, "grad_norm": 2.92807621566113, "learning_rate": 6.4389100408358905e-06, "loss": 1.5049, "step": 32655 }, { "epoch": 0.9629957246056318, "grad_norm": 2.8048481994908685, "learning_rate": 6.4384616184765845e-06, "loss": 1.508, "step": 32660 }, { "epoch": 0.9631431519976411, "grad_norm": 2.6842431143807572, "learning_rate": 6.438013117006223e-06, "loss": 1.4332, "step": 32665 }, { "epoch": 0.9632905793896506, "grad_norm": 2.813046941411191, "learning_rate": 6.437564536438008e-06, "loss": 1.5157, "step": 32670 }, { "epoch": 0.96343800678166, "grad_norm": 2.6112121233151524, "learning_rate": 6.437115876785137e-06, "loss": 1.5096, "step": 32675 }, { "epoch": 0.9635854341736695, "grad_norm": 2.7320541999794883, "learning_rate": 6.4366671380608125e-06, "loss": 1.4494, "step": 32680 }, { "epoch": 0.9637328615656789, "grad_norm": 2.673898189087849, "learning_rate": 6.43621832027824e-06, "loss": 1.4767, "step": 32685 }, { "epoch": 0.9638802889576883, "grad_norm": 2.6525412128826953, "learning_rate": 6.435769423450626e-06, "loss": 1.4573, "step": 32690 }, { "epoch": 0.9640277163496977, "grad_norm": 2.929848091604548, "learning_rate": 6.43532044759118e-06, "loss": 1.4553, "step": 32695 }, { "epoch": 0.9641751437417072, "grad_norm": 2.657790477051324, "learning_rate": 6.434871392713113e-06, "loss": 1.4961, "step": 32700 }, { "epoch": 0.9643225711337167, "grad_norm": 2.709824963642813, "learning_rate": 6.43442225882964e-06, "loss": 1.4855, "step": 32705 }, { "epoch": 0.9644699985257261, "grad_norm": 2.796333761334423, "learning_rate": 6.433973045953978e-06, "loss": 1.4643, "step": 32710 }, { "epoch": 0.9646174259177355, "grad_norm": 2.719622377538956, "learning_rate": 6.433523754099342e-06, "loss": 1.4884, "step": 32715 }, { "epoch": 0.9647648533097449, "grad_norm": 2.7272577212274522, "learning_rate": 6.433074383278955e-06, "loss": 1.4818, "step": 32720 }, { "epoch": 0.9649122807017544, "grad_norm": 2.7375257626632306, "learning_rate": 6.4326249335060435e-06, "loss": 1.4474, "step": 32725 }, { "epoch": 0.9650597080937638, "grad_norm": 2.8243489120204717, "learning_rate": 6.4321754047938276e-06, "loss": 1.5194, "step": 32730 }, { "epoch": 0.9652071354857733, "grad_norm": 2.5806269370098422, "learning_rate": 6.431725797155538e-06, "loss": 1.4224, "step": 32735 }, { "epoch": 0.9653545628777827, "grad_norm": 2.8035329330186394, "learning_rate": 6.431276110604406e-06, "loss": 1.4676, "step": 32740 }, { "epoch": 0.9655019902697921, "grad_norm": 2.735351336493587, "learning_rate": 6.430826345153661e-06, "loss": 1.4677, "step": 32745 }, { "epoch": 0.9656494176618016, "grad_norm": 2.6165721343340382, "learning_rate": 6.430376500816541e-06, "loss": 1.5017, "step": 32750 }, { "epoch": 0.965796845053811, "grad_norm": 3.0276138943965716, "learning_rate": 6.429926577606281e-06, "loss": 1.408, "step": 32755 }, { "epoch": 0.9659442724458205, "grad_norm": 2.7361316878979123, "learning_rate": 6.429476575536121e-06, "loss": 1.5047, "step": 32760 }, { "epoch": 0.9660916998378298, "grad_norm": 2.8284406824434996, "learning_rate": 6.429026494619304e-06, "loss": 1.4455, "step": 32765 }, { "epoch": 0.9662391272298393, "grad_norm": 2.6925823712222643, "learning_rate": 6.428576334869072e-06, "loss": 1.4835, "step": 32770 }, { "epoch": 0.9663865546218487, "grad_norm": 2.834553589296862, "learning_rate": 6.428126096298674e-06, "loss": 1.4856, "step": 32775 }, { "epoch": 0.9665339820138582, "grad_norm": 2.6881535407810033, "learning_rate": 6.427675778921358e-06, "loss": 1.4827, "step": 32780 }, { "epoch": 0.9666814094058677, "grad_norm": 2.5354345365572475, "learning_rate": 6.427225382750373e-06, "loss": 1.448, "step": 32785 }, { "epoch": 0.966828836797877, "grad_norm": 2.765001278893001, "learning_rate": 6.4267749077989754e-06, "loss": 1.4524, "step": 32790 }, { "epoch": 0.9669762641898865, "grad_norm": 2.7487720456196025, "learning_rate": 6.42632435408042e-06, "loss": 1.4787, "step": 32795 }, { "epoch": 0.9671236915818959, "grad_norm": 2.665205278459923, "learning_rate": 6.425873721607962e-06, "loss": 1.5246, "step": 32800 }, { "epoch": 0.9672711189739054, "grad_norm": 2.677045754188893, "learning_rate": 6.425423010394866e-06, "loss": 1.4483, "step": 32805 }, { "epoch": 0.9674185463659147, "grad_norm": 2.743624888597425, "learning_rate": 6.4249722204543924e-06, "loss": 1.4339, "step": 32810 }, { "epoch": 0.9675659737579242, "grad_norm": 2.7163865152924753, "learning_rate": 6.424521351799807e-06, "loss": 1.4689, "step": 32815 }, { "epoch": 0.9677134011499336, "grad_norm": 2.7014658193042136, "learning_rate": 6.424070404444376e-06, "loss": 1.4361, "step": 32820 }, { "epoch": 0.9678608285419431, "grad_norm": 2.6624353775192033, "learning_rate": 6.423619378401371e-06, "loss": 1.4307, "step": 32825 }, { "epoch": 0.9680082559339526, "grad_norm": 2.783776663451777, "learning_rate": 6.423168273684062e-06, "loss": 1.4356, "step": 32830 }, { "epoch": 0.9681556833259619, "grad_norm": 2.643418750942177, "learning_rate": 6.422717090305725e-06, "loss": 1.4789, "step": 32835 }, { "epoch": 0.9683031107179714, "grad_norm": 2.7244038195395848, "learning_rate": 6.422265828279636e-06, "loss": 1.4407, "step": 32840 }, { "epoch": 0.9684505381099808, "grad_norm": 2.6785689523992717, "learning_rate": 6.4218144876190725e-06, "loss": 1.4902, "step": 32845 }, { "epoch": 0.9685979655019903, "grad_norm": 2.908902691789138, "learning_rate": 6.421363068337317e-06, "loss": 1.4465, "step": 32850 }, { "epoch": 0.9687453928939997, "grad_norm": 2.649008142255504, "learning_rate": 6.420911570447654e-06, "loss": 1.4317, "step": 32855 }, { "epoch": 0.9688928202860091, "grad_norm": 2.849961016988128, "learning_rate": 6.420459993963366e-06, "loss": 1.4904, "step": 32860 }, { "epoch": 0.9690402476780186, "grad_norm": 2.7355849418447082, "learning_rate": 6.420008338897745e-06, "loss": 1.4057, "step": 32865 }, { "epoch": 0.969187675070028, "grad_norm": 2.7510311650100365, "learning_rate": 6.419556605264079e-06, "loss": 1.4782, "step": 32870 }, { "epoch": 0.9693351024620375, "grad_norm": 2.76566104125002, "learning_rate": 6.419104793075661e-06, "loss": 1.4546, "step": 32875 }, { "epoch": 0.9694825298540469, "grad_norm": 2.614668401391008, "learning_rate": 6.418652902345788e-06, "loss": 1.4541, "step": 32880 }, { "epoch": 0.9696299572460563, "grad_norm": 2.9107004856167973, "learning_rate": 6.4182009330877565e-06, "loss": 1.4702, "step": 32885 }, { "epoch": 0.9697773846380657, "grad_norm": 2.76287549350715, "learning_rate": 6.417748885314864e-06, "loss": 1.435, "step": 32890 }, { "epoch": 0.9699248120300752, "grad_norm": 2.8372717115129316, "learning_rate": 6.4172967590404155e-06, "loss": 1.4954, "step": 32895 }, { "epoch": 0.9700722394220846, "grad_norm": 2.7232366374204995, "learning_rate": 6.416844554277714e-06, "loss": 1.5439, "step": 32900 }, { "epoch": 0.9702196668140941, "grad_norm": 2.753458555549338, "learning_rate": 6.416392271040065e-06, "loss": 1.4666, "step": 32905 }, { "epoch": 0.9703670942061035, "grad_norm": 2.4999578064045904, "learning_rate": 6.4159399093407805e-06, "loss": 1.5268, "step": 32910 }, { "epoch": 0.9705145215981129, "grad_norm": 2.7015365157699023, "learning_rate": 6.415487469193169e-06, "loss": 1.45, "step": 32915 }, { "epoch": 0.9706619489901224, "grad_norm": 2.662722426110706, "learning_rate": 6.415034950610545e-06, "loss": 1.4531, "step": 32920 }, { "epoch": 0.9708093763821318, "grad_norm": 2.6570710183896886, "learning_rate": 6.414582353606226e-06, "loss": 1.4394, "step": 32925 }, { "epoch": 0.9709568037741413, "grad_norm": 2.6600102041693767, "learning_rate": 6.414129678193526e-06, "loss": 1.4495, "step": 32930 }, { "epoch": 0.9711042311661506, "grad_norm": 2.6643788935481525, "learning_rate": 6.41367692438577e-06, "loss": 1.3925, "step": 32935 }, { "epoch": 0.9712516585581601, "grad_norm": 2.8257731960081762, "learning_rate": 6.413224092196278e-06, "loss": 1.4815, "step": 32940 }, { "epoch": 0.9713990859501695, "grad_norm": 2.718275584997157, "learning_rate": 6.412771181638377e-06, "loss": 1.4081, "step": 32945 }, { "epoch": 0.971546513342179, "grad_norm": 2.7752463051312275, "learning_rate": 6.4123181927253934e-06, "loss": 1.4644, "step": 32950 }, { "epoch": 0.9716939407341885, "grad_norm": 2.658734743543748, "learning_rate": 6.411865125470655e-06, "loss": 1.4841, "step": 32955 }, { "epoch": 0.9718413681261978, "grad_norm": 2.792063618134813, "learning_rate": 6.411411979887498e-06, "loss": 1.4945, "step": 32960 }, { "epoch": 0.9719887955182073, "grad_norm": 2.751518727359862, "learning_rate": 6.410958755989254e-06, "loss": 1.4739, "step": 32965 }, { "epoch": 0.9721362229102167, "grad_norm": 2.7913353320455956, "learning_rate": 6.410505453789259e-06, "loss": 1.4573, "step": 32970 }, { "epoch": 0.9722836503022262, "grad_norm": 2.5824258613914925, "learning_rate": 6.410052073300853e-06, "loss": 1.5174, "step": 32975 }, { "epoch": 0.9724310776942355, "grad_norm": 2.7237953242200312, "learning_rate": 6.409598614537378e-06, "loss": 1.421, "step": 32980 }, { "epoch": 0.972578505086245, "grad_norm": 2.5703271164230665, "learning_rate": 6.409145077512177e-06, "loss": 1.5026, "step": 32985 }, { "epoch": 0.9727259324782545, "grad_norm": 2.8051402886742456, "learning_rate": 6.4086914622385955e-06, "loss": 1.4556, "step": 32990 }, { "epoch": 0.9728733598702639, "grad_norm": 2.6674711020762483, "learning_rate": 6.408237768729982e-06, "loss": 1.4888, "step": 32995 }, { "epoch": 0.9730207872622734, "grad_norm": 2.629377058326987, "learning_rate": 6.407783996999687e-06, "loss": 1.4765, "step": 33000 }, { "epoch": 0.9730207872622734, "eval_loss": 1.135459303855896, "eval_runtime": 4.2823, "eval_samples_per_second": 92.474, "eval_steps_per_second": 3.036, "step": 33000 }, { "epoch": 0.9731682146542827, "grad_norm": 2.668815467043759, "learning_rate": 6.407330147061062e-06, "loss": 1.5042, "step": 33005 }, { "epoch": 0.9733156420462922, "grad_norm": 2.5551751244105843, "learning_rate": 6.406876218927464e-06, "loss": 1.4609, "step": 33010 }, { "epoch": 0.9734630694383016, "grad_norm": 2.6544530203707972, "learning_rate": 6.40642221261225e-06, "loss": 1.5188, "step": 33015 }, { "epoch": 0.9736104968303111, "grad_norm": 2.6955185334027094, "learning_rate": 6.405968128128777e-06, "loss": 1.4575, "step": 33020 }, { "epoch": 0.9737579242223205, "grad_norm": 2.7530231373352736, "learning_rate": 6.4055139654904105e-06, "loss": 1.4734, "step": 33025 }, { "epoch": 0.9739053516143299, "grad_norm": 2.739785556996425, "learning_rate": 6.405059724710513e-06, "loss": 1.4683, "step": 33030 }, { "epoch": 0.9740527790063394, "grad_norm": 2.531949349065621, "learning_rate": 6.4046054058024516e-06, "loss": 1.4733, "step": 33035 }, { "epoch": 0.9742002063983488, "grad_norm": 2.777549997157257, "learning_rate": 6.4041510087795945e-06, "loss": 1.4388, "step": 33040 }, { "epoch": 0.9743476337903583, "grad_norm": 2.7018625300442043, "learning_rate": 6.403696533655312e-06, "loss": 1.4792, "step": 33045 }, { "epoch": 0.9744950611823677, "grad_norm": 2.7461012504533464, "learning_rate": 6.403241980442981e-06, "loss": 1.4953, "step": 33050 }, { "epoch": 0.9746424885743771, "grad_norm": 2.551507755933375, "learning_rate": 6.402787349155974e-06, "loss": 1.4413, "step": 33055 }, { "epoch": 0.9747899159663865, "grad_norm": 2.783138637559236, "learning_rate": 6.4023326398076705e-06, "loss": 1.5337, "step": 33060 }, { "epoch": 0.974937343358396, "grad_norm": 2.5786870654066205, "learning_rate": 6.401877852411451e-06, "loss": 1.4403, "step": 33065 }, { "epoch": 0.9750847707504054, "grad_norm": 2.7013838439245013, "learning_rate": 6.401422986980696e-06, "loss": 1.5019, "step": 33070 }, { "epoch": 0.9752321981424149, "grad_norm": 2.7390558559253857, "learning_rate": 6.400968043528793e-06, "loss": 1.4672, "step": 33075 }, { "epoch": 0.9753796255344243, "grad_norm": 2.635259071558036, "learning_rate": 6.400513022069128e-06, "loss": 1.4542, "step": 33080 }, { "epoch": 0.9755270529264337, "grad_norm": 2.7499646048992443, "learning_rate": 6.40005792261509e-06, "loss": 1.5035, "step": 33085 }, { "epoch": 0.9756744803184432, "grad_norm": 2.8102282839609973, "learning_rate": 6.399602745180074e-06, "loss": 1.5349, "step": 33090 }, { "epoch": 0.9758219077104526, "grad_norm": 2.6359544700058457, "learning_rate": 6.39914748977747e-06, "loss": 1.475, "step": 33095 }, { "epoch": 0.9759693351024621, "grad_norm": 2.7486932838472504, "learning_rate": 6.398692156420675e-06, "loss": 1.4662, "step": 33100 }, { "epoch": 0.9761167624944714, "grad_norm": 2.9432790304823175, "learning_rate": 6.398236745123091e-06, "loss": 1.5667, "step": 33105 }, { "epoch": 0.9762641898864809, "grad_norm": 3.5558690515821096, "learning_rate": 6.397781255898114e-06, "loss": 1.4405, "step": 33110 }, { "epoch": 0.9764116172784904, "grad_norm": 2.7037251478794824, "learning_rate": 6.397325688759153e-06, "loss": 1.4845, "step": 33115 }, { "epoch": 0.9765590446704998, "grad_norm": 2.8015490363387747, "learning_rate": 6.396870043719609e-06, "loss": 1.4878, "step": 33120 }, { "epoch": 0.9767064720625093, "grad_norm": 2.80514682169168, "learning_rate": 6.396414320792893e-06, "loss": 1.5153, "step": 33125 }, { "epoch": 0.9768538994545186, "grad_norm": 2.659098849378103, "learning_rate": 6.395958519992411e-06, "loss": 1.4549, "step": 33130 }, { "epoch": 0.9770013268465281, "grad_norm": 2.7376200530930053, "learning_rate": 6.395502641331581e-06, "loss": 1.4405, "step": 33135 }, { "epoch": 0.9771487542385375, "grad_norm": 2.7079928887093994, "learning_rate": 6.395046684823813e-06, "loss": 1.5118, "step": 33140 }, { "epoch": 0.977296181630547, "grad_norm": 2.683659855148543, "learning_rate": 6.394590650482527e-06, "loss": 1.4462, "step": 33145 }, { "epoch": 0.9774436090225563, "grad_norm": 2.671592748393328, "learning_rate": 6.394134538321142e-06, "loss": 1.4964, "step": 33150 }, { "epoch": 0.9775910364145658, "grad_norm": 2.6113065103730824, "learning_rate": 6.393678348353078e-06, "loss": 1.462, "step": 33155 }, { "epoch": 0.9777384638065753, "grad_norm": 2.7975993958558147, "learning_rate": 6.39322208059176e-06, "loss": 1.4779, "step": 33160 }, { "epoch": 0.9778858911985847, "grad_norm": 2.720982828192534, "learning_rate": 6.3927657350506135e-06, "loss": 1.4977, "step": 33165 }, { "epoch": 0.9780333185905942, "grad_norm": 2.6652898696871614, "learning_rate": 6.392309311743068e-06, "loss": 1.4308, "step": 33170 }, { "epoch": 0.9781807459826035, "grad_norm": 2.737284617014193, "learning_rate": 6.391852810682555e-06, "loss": 1.4556, "step": 33175 }, { "epoch": 0.978328173374613, "grad_norm": 2.8129679880232925, "learning_rate": 6.391396231882505e-06, "loss": 1.4787, "step": 33180 }, { "epoch": 0.9784756007666224, "grad_norm": 2.5694964795770154, "learning_rate": 6.390939575356354e-06, "loss": 1.4454, "step": 33185 }, { "epoch": 0.9786230281586319, "grad_norm": 2.667460178521527, "learning_rate": 6.390482841117542e-06, "loss": 1.5015, "step": 33190 }, { "epoch": 0.9787704555506413, "grad_norm": 2.7241036257485893, "learning_rate": 6.390026029179506e-06, "loss": 1.5019, "step": 33195 }, { "epoch": 0.9789178829426507, "grad_norm": 2.7772616345009236, "learning_rate": 6.38956913955569e-06, "loss": 1.4608, "step": 33200 }, { "epoch": 0.9790653103346602, "grad_norm": 2.5976355788083847, "learning_rate": 6.389112172259539e-06, "loss": 1.4594, "step": 33205 }, { "epoch": 0.9792127377266696, "grad_norm": 2.6321948738482495, "learning_rate": 6.388655127304497e-06, "loss": 1.4504, "step": 33210 }, { "epoch": 0.9793601651186791, "grad_norm": 2.663230782554165, "learning_rate": 6.388198004704016e-06, "loss": 1.4724, "step": 33215 }, { "epoch": 0.9795075925106885, "grad_norm": 3.3828374679846895, "learning_rate": 6.387740804471546e-06, "loss": 1.4581, "step": 33220 }, { "epoch": 0.9796550199026979, "grad_norm": 2.69238391737682, "learning_rate": 6.38728352662054e-06, "loss": 1.4929, "step": 33225 }, { "epoch": 0.9798024472947073, "grad_norm": 2.8030508741638935, "learning_rate": 6.386826171164455e-06, "loss": 1.4394, "step": 33230 }, { "epoch": 0.9799498746867168, "grad_norm": 2.6894299441931495, "learning_rate": 6.38636873811675e-06, "loss": 1.4716, "step": 33235 }, { "epoch": 0.9800973020787263, "grad_norm": 2.8055737534934546, "learning_rate": 6.385911227490885e-06, "loss": 1.4841, "step": 33240 }, { "epoch": 0.9802447294707357, "grad_norm": 2.7492047237737616, "learning_rate": 6.385453639300321e-06, "loss": 1.4238, "step": 33245 }, { "epoch": 0.9803921568627451, "grad_norm": 2.954526350356859, "learning_rate": 6.384995973558524e-06, "loss": 1.4641, "step": 33250 }, { "epoch": 0.9805395842547545, "grad_norm": 2.7607973584711427, "learning_rate": 6.384538230278962e-06, "loss": 1.5, "step": 33255 }, { "epoch": 0.980687011646764, "grad_norm": 2.6542353091784974, "learning_rate": 6.384080409475104e-06, "loss": 1.4422, "step": 33260 }, { "epoch": 0.9808344390387734, "grad_norm": 2.5802084199267075, "learning_rate": 6.383622511160423e-06, "loss": 1.4544, "step": 33265 }, { "epoch": 0.9809818664307829, "grad_norm": 2.6569335557535045, "learning_rate": 6.3831645353483905e-06, "loss": 1.4528, "step": 33270 }, { "epoch": 0.9811292938227922, "grad_norm": 2.537455604957913, "learning_rate": 6.3827064820524876e-06, "loss": 1.5037, "step": 33275 }, { "epoch": 0.9812767212148017, "grad_norm": 2.708070941955833, "learning_rate": 6.382248351286188e-06, "loss": 1.4962, "step": 33280 }, { "epoch": 0.9814241486068112, "grad_norm": 2.6554889863314872, "learning_rate": 6.381790143062975e-06, "loss": 1.4381, "step": 33285 }, { "epoch": 0.9815715759988206, "grad_norm": 2.7592076725542327, "learning_rate": 6.381331857396333e-06, "loss": 1.4841, "step": 33290 }, { "epoch": 0.9817190033908301, "grad_norm": 2.471046472346933, "learning_rate": 6.380873494299746e-06, "loss": 1.3822, "step": 33295 }, { "epoch": 0.9818664307828394, "grad_norm": 2.6518341153999017, "learning_rate": 6.380415053786702e-06, "loss": 1.5342, "step": 33300 }, { "epoch": 0.9820138581748489, "grad_norm": 2.779557451173044, "learning_rate": 6.379956535870691e-06, "loss": 1.4501, "step": 33305 }, { "epoch": 0.9821612855668583, "grad_norm": 2.8070698343578195, "learning_rate": 6.3794979405652075e-06, "loss": 1.4996, "step": 33310 }, { "epoch": 0.9823087129588678, "grad_norm": 2.6506422687542943, "learning_rate": 6.379039267883743e-06, "loss": 1.4284, "step": 33315 }, { "epoch": 0.9824561403508771, "grad_norm": 2.7420704902680644, "learning_rate": 6.3785805178397965e-06, "loss": 1.4409, "step": 33320 }, { "epoch": 0.9826035677428866, "grad_norm": 2.714528413585445, "learning_rate": 6.378121690446867e-06, "loss": 1.4784, "step": 33325 }, { "epoch": 0.982750995134896, "grad_norm": 2.750123932391411, "learning_rate": 6.377662785718456e-06, "loss": 1.4702, "step": 33330 }, { "epoch": 0.9828984225269055, "grad_norm": 2.707850041596211, "learning_rate": 6.377203803668067e-06, "loss": 1.4304, "step": 33335 }, { "epoch": 0.983045849918915, "grad_norm": 2.6690184974157565, "learning_rate": 6.376744744309205e-06, "loss": 1.4774, "step": 33340 }, { "epoch": 0.9831932773109243, "grad_norm": 2.6871947617663143, "learning_rate": 6.37628560765538e-06, "loss": 1.4409, "step": 33345 }, { "epoch": 0.9833407047029338, "grad_norm": 2.830067422081824, "learning_rate": 6.375826393720102e-06, "loss": 1.5201, "step": 33350 }, { "epoch": 0.9834881320949432, "grad_norm": 2.7337869961918537, "learning_rate": 6.375367102516885e-06, "loss": 1.5023, "step": 33355 }, { "epoch": 0.9836355594869527, "grad_norm": 2.77216998141194, "learning_rate": 6.3749077340592415e-06, "loss": 1.4793, "step": 33360 }, { "epoch": 0.9837829868789622, "grad_norm": 2.894760041264678, "learning_rate": 6.374448288360692e-06, "loss": 1.492, "step": 33365 }, { "epoch": 0.9839304142709715, "grad_norm": 2.8167569413799303, "learning_rate": 6.373988765434754e-06, "loss": 1.4767, "step": 33370 }, { "epoch": 0.984077841662981, "grad_norm": 2.6317885577653843, "learning_rate": 6.373529165294951e-06, "loss": 1.4147, "step": 33375 }, { "epoch": 0.9842252690549904, "grad_norm": 2.781778761448688, "learning_rate": 6.373069487954806e-06, "loss": 1.4925, "step": 33380 }, { "epoch": 0.9843726964469999, "grad_norm": 2.8198727720203425, "learning_rate": 6.372609733427846e-06, "loss": 1.477, "step": 33385 }, { "epoch": 0.9845201238390093, "grad_norm": 2.7474985199383264, "learning_rate": 6.3721499017276005e-06, "loss": 1.518, "step": 33390 }, { "epoch": 0.9846675512310187, "grad_norm": 2.7420601162752583, "learning_rate": 6.3716899928676e-06, "loss": 1.4458, "step": 33395 }, { "epoch": 0.9848149786230281, "grad_norm": 2.7245000637070884, "learning_rate": 6.3712300068613765e-06, "loss": 1.4474, "step": 33400 }, { "epoch": 0.9849624060150376, "grad_norm": 2.8254923001539445, "learning_rate": 6.370769943722468e-06, "loss": 1.4461, "step": 33405 }, { "epoch": 0.985109833407047, "grad_norm": 2.7032281006321006, "learning_rate": 6.370309803464412e-06, "loss": 1.4716, "step": 33410 }, { "epoch": 0.9852572607990565, "grad_norm": 2.6004814665459284, "learning_rate": 6.3698495861007474e-06, "loss": 1.4886, "step": 33415 }, { "epoch": 0.9854046881910659, "grad_norm": 2.7390897073608262, "learning_rate": 6.369389291645016e-06, "loss": 1.4754, "step": 33420 }, { "epoch": 0.9855521155830753, "grad_norm": 2.6733067576242315, "learning_rate": 6.368928920110764e-06, "loss": 1.4582, "step": 33425 }, { "epoch": 0.9856995429750848, "grad_norm": 2.606546019493718, "learning_rate": 6.368468471511539e-06, "loss": 1.4757, "step": 33430 }, { "epoch": 0.9858469703670942, "grad_norm": 2.639617307784679, "learning_rate": 6.368007945860888e-06, "loss": 1.4549, "step": 33435 }, { "epoch": 0.9859943977591037, "grad_norm": 2.694119916185994, "learning_rate": 6.367547343172366e-06, "loss": 1.5081, "step": 33440 }, { "epoch": 0.986141825151113, "grad_norm": 2.87932573580105, "learning_rate": 6.367086663459522e-06, "loss": 1.4484, "step": 33445 }, { "epoch": 0.9862892525431225, "grad_norm": 2.597121819187356, "learning_rate": 6.366625906735916e-06, "loss": 1.4843, "step": 33450 }, { "epoch": 0.986436679935132, "grad_norm": 2.6805515435936216, "learning_rate": 6.366165073015104e-06, "loss": 1.445, "step": 33455 }, { "epoch": 0.9865841073271414, "grad_norm": 2.6471922933677665, "learning_rate": 6.365704162310648e-06, "loss": 1.4386, "step": 33460 }, { "epoch": 0.9867315347191509, "grad_norm": 2.731611587121345, "learning_rate": 6.36524317463611e-06, "loss": 1.492, "step": 33465 }, { "epoch": 0.9868789621111602, "grad_norm": 2.77669310682002, "learning_rate": 6.364782110005055e-06, "loss": 1.4937, "step": 33470 }, { "epoch": 0.9870263895031697, "grad_norm": 2.5526579365602866, "learning_rate": 6.364320968431051e-06, "loss": 1.4356, "step": 33475 }, { "epoch": 0.9871738168951791, "grad_norm": 2.789055026446641, "learning_rate": 6.363859749927667e-06, "loss": 1.4697, "step": 33480 }, { "epoch": 0.9873212442871886, "grad_norm": 2.8781451372776483, "learning_rate": 6.363398454508475e-06, "loss": 1.4425, "step": 33485 }, { "epoch": 0.9874686716791979, "grad_norm": 2.578252889214316, "learning_rate": 6.36293708218705e-06, "loss": 1.4515, "step": 33490 }, { "epoch": 0.9876160990712074, "grad_norm": 2.9808030817627107, "learning_rate": 6.362475632976967e-06, "loss": 1.4883, "step": 33495 }, { "epoch": 0.9877635264632169, "grad_norm": 2.623460394780005, "learning_rate": 6.362014106891805e-06, "loss": 1.453, "step": 33500 }, { "epoch": 0.9877635264632169, "eval_loss": 1.1344670057296753, "eval_runtime": 4.2064, "eval_samples_per_second": 94.143, "eval_steps_per_second": 3.091, "step": 33500 }, { "epoch": 0.9879109538552263, "grad_norm": 2.7622369655189942, "learning_rate": 6.361552503945147e-06, "loss": 1.4905, "step": 33505 }, { "epoch": 0.9880583812472358, "grad_norm": 2.649114144581081, "learning_rate": 6.3610908241505725e-06, "loss": 1.4809, "step": 33510 }, { "epoch": 0.9882058086392451, "grad_norm": 2.6792236830615774, "learning_rate": 6.360629067521671e-06, "loss": 1.4455, "step": 33515 }, { "epoch": 0.9883532360312546, "grad_norm": 2.7492224635564058, "learning_rate": 6.360167234072027e-06, "loss": 1.4925, "step": 33520 }, { "epoch": 0.988500663423264, "grad_norm": 2.5571508458348893, "learning_rate": 6.359705323815231e-06, "loss": 1.4415, "step": 33525 }, { "epoch": 0.9886480908152735, "grad_norm": 2.6122495178292313, "learning_rate": 6.359243336764877e-06, "loss": 1.475, "step": 33530 }, { "epoch": 0.988795518207283, "grad_norm": 2.6643259556272687, "learning_rate": 6.3587812729345575e-06, "loss": 1.4043, "step": 33535 }, { "epoch": 0.9889429455992923, "grad_norm": 2.8149634825229466, "learning_rate": 6.3583191323378706e-06, "loss": 1.4126, "step": 33540 }, { "epoch": 0.9890903729913018, "grad_norm": 2.7839143478180017, "learning_rate": 6.357856914988415e-06, "loss": 1.4886, "step": 33545 }, { "epoch": 0.9892378003833112, "grad_norm": 2.7410057195601856, "learning_rate": 6.357394620899792e-06, "loss": 1.5031, "step": 33550 }, { "epoch": 0.9893852277753207, "grad_norm": 2.8898630163051924, "learning_rate": 6.3569322500856045e-06, "loss": 1.4824, "step": 33555 }, { "epoch": 0.9895326551673301, "grad_norm": 2.753726209516246, "learning_rate": 6.356469802559458e-06, "loss": 1.4813, "step": 33560 }, { "epoch": 0.9896800825593395, "grad_norm": 2.5156862535897644, "learning_rate": 6.356007278334963e-06, "loss": 1.4719, "step": 33565 }, { "epoch": 0.9898275099513489, "grad_norm": 2.8502210613138756, "learning_rate": 6.355544677425727e-06, "loss": 1.4795, "step": 33570 }, { "epoch": 0.9899749373433584, "grad_norm": 2.7204035280292467, "learning_rate": 6.355081999845363e-06, "loss": 1.516, "step": 33575 }, { "epoch": 0.9901223647353679, "grad_norm": 2.587371008535906, "learning_rate": 6.354619245607488e-06, "loss": 1.4366, "step": 33580 }, { "epoch": 0.9902697921273773, "grad_norm": 2.731214153085702, "learning_rate": 6.354156414725716e-06, "loss": 1.4923, "step": 33585 }, { "epoch": 0.9904172195193867, "grad_norm": 2.741305216439408, "learning_rate": 6.353693507213669e-06, "loss": 1.4513, "step": 33590 }, { "epoch": 0.9905646469113961, "grad_norm": 2.630544456973652, "learning_rate": 6.353230523084967e-06, "loss": 1.4781, "step": 33595 }, { "epoch": 0.9907120743034056, "grad_norm": 2.694472544967248, "learning_rate": 6.352767462353235e-06, "loss": 1.4844, "step": 33600 }, { "epoch": 0.990859501695415, "grad_norm": 2.539338434642883, "learning_rate": 6.352304325032098e-06, "loss": 1.4274, "step": 33605 }, { "epoch": 0.9910069290874245, "grad_norm": 2.6083536246295327, "learning_rate": 6.351841111135184e-06, "loss": 1.465, "step": 33610 }, { "epoch": 0.9911543564794338, "grad_norm": 2.740448855184074, "learning_rate": 6.351377820676125e-06, "loss": 1.5524, "step": 33615 }, { "epoch": 0.9913017838714433, "grad_norm": 2.642336423883237, "learning_rate": 6.350914453668553e-06, "loss": 1.4332, "step": 33620 }, { "epoch": 0.9914492112634528, "grad_norm": 2.7770011102163803, "learning_rate": 6.350451010126102e-06, "loss": 1.4891, "step": 33625 }, { "epoch": 0.9915966386554622, "grad_norm": 2.805250760624242, "learning_rate": 6.349987490062413e-06, "loss": 1.5004, "step": 33630 }, { "epoch": 0.9917440660474717, "grad_norm": 2.7778842143448377, "learning_rate": 6.349523893491122e-06, "loss": 1.4973, "step": 33635 }, { "epoch": 0.991891493439481, "grad_norm": 2.5751159197848223, "learning_rate": 6.349060220425871e-06, "loss": 1.4396, "step": 33640 }, { "epoch": 0.9920389208314905, "grad_norm": 2.7686928476005095, "learning_rate": 6.3485964708803076e-06, "loss": 1.466, "step": 33645 }, { "epoch": 0.9921863482234999, "grad_norm": 2.7197977196916554, "learning_rate": 6.348132644868074e-06, "loss": 1.4147, "step": 33650 }, { "epoch": 0.9923337756155094, "grad_norm": 2.742166394094242, "learning_rate": 6.3476687424028215e-06, "loss": 1.4504, "step": 33655 }, { "epoch": 0.9924812030075187, "grad_norm": 2.801653892511482, "learning_rate": 6.3472047634982e-06, "loss": 1.4627, "step": 33660 }, { "epoch": 0.9926286303995282, "grad_norm": 2.549397770281112, "learning_rate": 6.346740708167862e-06, "loss": 1.5112, "step": 33665 }, { "epoch": 0.9927760577915377, "grad_norm": 2.6245207915393363, "learning_rate": 6.346276576425463e-06, "loss": 1.4131, "step": 33670 }, { "epoch": 0.9929234851835471, "grad_norm": 2.724654773516289, "learning_rate": 6.345812368284663e-06, "loss": 1.431, "step": 33675 }, { "epoch": 0.9930709125755566, "grad_norm": 2.6879704081740368, "learning_rate": 6.345348083759119e-06, "loss": 1.4829, "step": 33680 }, { "epoch": 0.9932183399675659, "grad_norm": 2.7046108864600193, "learning_rate": 6.344883722862493e-06, "loss": 1.4806, "step": 33685 }, { "epoch": 0.9933657673595754, "grad_norm": 2.7177993230758277, "learning_rate": 6.344419285608451e-06, "loss": 1.4609, "step": 33690 }, { "epoch": 0.9935131947515848, "grad_norm": 2.832286325405044, "learning_rate": 6.3439547720106595e-06, "loss": 1.4764, "step": 33695 }, { "epoch": 0.9936606221435943, "grad_norm": 2.61449413644998, "learning_rate": 6.343490182082787e-06, "loss": 1.4313, "step": 33700 }, { "epoch": 0.9938080495356038, "grad_norm": 2.586938381236294, "learning_rate": 6.343025515838504e-06, "loss": 1.4167, "step": 33705 }, { "epoch": 0.9939554769276131, "grad_norm": 2.8642055640919737, "learning_rate": 6.3425607732914835e-06, "loss": 1.4503, "step": 33710 }, { "epoch": 0.9941029043196226, "grad_norm": 2.7310855793447475, "learning_rate": 6.342095954455403e-06, "loss": 1.5126, "step": 33715 }, { "epoch": 0.994250331711632, "grad_norm": 2.552644722638369, "learning_rate": 6.341631059343938e-06, "loss": 1.487, "step": 33720 }, { "epoch": 0.9943977591036415, "grad_norm": 2.526995728054609, "learning_rate": 6.341166087970772e-06, "loss": 1.4388, "step": 33725 }, { "epoch": 0.9945451864956509, "grad_norm": 2.8034316191002935, "learning_rate": 6.340701040349584e-06, "loss": 1.4851, "step": 33730 }, { "epoch": 0.9946926138876603, "grad_norm": 2.620862968207679, "learning_rate": 6.340235916494059e-06, "loss": 1.4458, "step": 33735 }, { "epoch": 0.9948400412796697, "grad_norm": 2.8761615152331164, "learning_rate": 6.339770716417885e-06, "loss": 1.5284, "step": 33740 }, { "epoch": 0.9949874686716792, "grad_norm": 2.8671390464543527, "learning_rate": 6.339305440134749e-06, "loss": 1.527, "step": 33745 }, { "epoch": 0.9951348960636887, "grad_norm": 2.710566616130394, "learning_rate": 6.338840087658346e-06, "loss": 1.5086, "step": 33750 }, { "epoch": 0.9952823234556981, "grad_norm": 2.750847337177647, "learning_rate": 6.338374659002366e-06, "loss": 1.4408, "step": 33755 }, { "epoch": 0.9954297508477075, "grad_norm": 2.6430166022209574, "learning_rate": 6.337909154180506e-06, "loss": 1.4711, "step": 33760 }, { "epoch": 0.9955771782397169, "grad_norm": 2.6665168748631545, "learning_rate": 6.337443573206464e-06, "loss": 1.5115, "step": 33765 }, { "epoch": 0.9957246056317264, "grad_norm": 2.636442229299097, "learning_rate": 6.3369779160939415e-06, "loss": 1.4168, "step": 33770 }, { "epoch": 0.9958720330237358, "grad_norm": 2.812788980344461, "learning_rate": 6.336512182856639e-06, "loss": 1.4691, "step": 33775 }, { "epoch": 0.9960194604157453, "grad_norm": 2.7796986798399246, "learning_rate": 6.336046373508262e-06, "loss": 1.4818, "step": 33780 }, { "epoch": 0.9961668878077546, "grad_norm": 2.6761692960054164, "learning_rate": 6.3355804880625164e-06, "loss": 1.4394, "step": 33785 }, { "epoch": 0.9963143151997641, "grad_norm": 2.66472243455189, "learning_rate": 6.335114526533114e-06, "loss": 1.4639, "step": 33790 }, { "epoch": 0.9964617425917736, "grad_norm": 2.6094983948434867, "learning_rate": 6.334648488933764e-06, "loss": 1.4218, "step": 33795 }, { "epoch": 0.996609169983783, "grad_norm": 2.6221589573271213, "learning_rate": 6.334182375278181e-06, "loss": 1.5091, "step": 33800 }, { "epoch": 0.9967565973757925, "grad_norm": 2.8229379339488974, "learning_rate": 6.333716185580081e-06, "loss": 1.492, "step": 33805 }, { "epoch": 0.9969040247678018, "grad_norm": 2.6209171648451792, "learning_rate": 6.3332499198531825e-06, "loss": 1.4583, "step": 33810 }, { "epoch": 0.9970514521598113, "grad_norm": 2.7352113683530765, "learning_rate": 6.3327835781112045e-06, "loss": 1.4298, "step": 33815 }, { "epoch": 0.9971988795518207, "grad_norm": 2.558298243847808, "learning_rate": 6.3323171603678706e-06, "loss": 1.4856, "step": 33820 }, { "epoch": 0.9973463069438302, "grad_norm": 2.6625594903813483, "learning_rate": 6.331850666636906e-06, "loss": 1.4767, "step": 33825 }, { "epoch": 0.9974937343358395, "grad_norm": 2.9322161558674704, "learning_rate": 6.331384096932038e-06, "loss": 1.4731, "step": 33830 }, { "epoch": 0.997641161727849, "grad_norm": 2.657561555836279, "learning_rate": 6.330917451266993e-06, "loss": 1.471, "step": 33835 }, { "epoch": 0.9977885891198585, "grad_norm": 2.7458117900922385, "learning_rate": 6.330450729655506e-06, "loss": 1.4329, "step": 33840 }, { "epoch": 0.9979360165118679, "grad_norm": 2.667458741568726, "learning_rate": 6.329983932111311e-06, "loss": 1.4528, "step": 33845 }, { "epoch": 0.9980834439038774, "grad_norm": 2.5559561186761215, "learning_rate": 6.329517058648142e-06, "loss": 1.5075, "step": 33850 }, { "epoch": 0.9982308712958867, "grad_norm": 2.726025771209421, "learning_rate": 6.329050109279737e-06, "loss": 1.4244, "step": 33855 }, { "epoch": 0.9983782986878962, "grad_norm": 2.8165608419843196, "learning_rate": 6.328583084019839e-06, "loss": 1.5231, "step": 33860 }, { "epoch": 0.9985257260799056, "grad_norm": 2.779310727521057, "learning_rate": 6.328115982882188e-06, "loss": 1.4684, "step": 33865 }, { "epoch": 0.9986731534719151, "grad_norm": 2.5553463546439827, "learning_rate": 6.327648805880532e-06, "loss": 1.44, "step": 33870 }, { "epoch": 0.9988205808639246, "grad_norm": 2.6833662638939018, "learning_rate": 6.327181553028615e-06, "loss": 1.4378, "step": 33875 }, { "epoch": 0.9989680082559339, "grad_norm": 2.7523360774536108, "learning_rate": 6.326714224340189e-06, "loss": 1.4561, "step": 33880 }, { "epoch": 0.9991154356479434, "grad_norm": 2.786784241772675, "learning_rate": 6.326246819829004e-06, "loss": 1.4877, "step": 33885 }, { "epoch": 0.9992628630399528, "grad_norm": 2.678153008009686, "learning_rate": 6.325779339508814e-06, "loss": 1.5017, "step": 33890 }, { "epoch": 0.9994102904319623, "grad_norm": 2.687343930217536, "learning_rate": 6.325311783393377e-06, "loss": 1.4595, "step": 33895 }, { "epoch": 0.9995577178239717, "grad_norm": 2.581864512717318, "learning_rate": 6.324844151496449e-06, "loss": 1.3916, "step": 33900 }, { "epoch": 0.9997051452159811, "grad_norm": 2.6294502230230954, "learning_rate": 6.3243764438317924e-06, "loss": 1.5058, "step": 33905 }, { "epoch": 0.9998525726079905, "grad_norm": 2.7277484405925536, "learning_rate": 6.323908660413169e-06, "loss": 1.473, "step": 33910 }, { "epoch": 1.0, "grad_norm": 2.7335261218859737, "learning_rate": 6.323440801254344e-06, "loss": 1.4521, "step": 33915 }, { "epoch": 1.0001474273920095, "grad_norm": 2.867157441702481, "learning_rate": 6.322972866369085e-06, "loss": 1.3362, "step": 33920 }, { "epoch": 1.000294854784019, "grad_norm": 2.881867013161956, "learning_rate": 6.322504855771162e-06, "loss": 1.3293, "step": 33925 }, { "epoch": 1.0004422821760284, "grad_norm": 3.0304356820604865, "learning_rate": 6.322036769474345e-06, "loss": 1.3501, "step": 33930 }, { "epoch": 1.0005897095680378, "grad_norm": 3.0591564369628763, "learning_rate": 6.321568607492411e-06, "loss": 1.3367, "step": 33935 }, { "epoch": 1.000737136960047, "grad_norm": 2.926141844818062, "learning_rate": 6.321100369839132e-06, "loss": 1.2983, "step": 33940 }, { "epoch": 1.0008845643520565, "grad_norm": 2.8481576498630448, "learning_rate": 6.32063205652829e-06, "loss": 1.3411, "step": 33945 }, { "epoch": 1.001031991744066, "grad_norm": 2.7499471580942054, "learning_rate": 6.320163667573664e-06, "loss": 1.3019, "step": 33950 }, { "epoch": 1.0011794191360754, "grad_norm": 2.9177314801802514, "learning_rate": 6.319695202989037e-06, "loss": 1.3785, "step": 33955 }, { "epoch": 1.001326846528085, "grad_norm": 2.9530852783768493, "learning_rate": 6.3192266627881954e-06, "loss": 1.3313, "step": 33960 }, { "epoch": 1.0014742739200944, "grad_norm": 3.180710680315285, "learning_rate": 6.318758046984925e-06, "loss": 1.3445, "step": 33965 }, { "epoch": 1.0016217013121038, "grad_norm": 2.81836671418679, "learning_rate": 6.318289355593015e-06, "loss": 1.3131, "step": 33970 }, { "epoch": 1.0017691287041133, "grad_norm": 2.872160826134343, "learning_rate": 6.3178205886262585e-06, "loss": 1.3264, "step": 33975 }, { "epoch": 1.0019165560961227, "grad_norm": 2.8543697123907004, "learning_rate": 6.31735174609845e-06, "loss": 1.3242, "step": 33980 }, { "epoch": 1.0020639834881322, "grad_norm": 2.896623729413617, "learning_rate": 6.3168828280233845e-06, "loss": 1.3571, "step": 33985 }, { "epoch": 1.0022114108801414, "grad_norm": 2.7350215204327673, "learning_rate": 6.31641383441486e-06, "loss": 1.309, "step": 33990 }, { "epoch": 1.0023588382721509, "grad_norm": 3.0099152295788567, "learning_rate": 6.315944765286678e-06, "loss": 1.3063, "step": 33995 }, { "epoch": 1.0025062656641603, "grad_norm": 2.948022261945124, "learning_rate": 6.3154756206526414e-06, "loss": 1.2859, "step": 34000 }, { "epoch": 1.0025062656641603, "eval_loss": 1.136983036994934, "eval_runtime": 4.2979, "eval_samples_per_second": 92.139, "eval_steps_per_second": 3.025, "step": 34000 }, { "epoch": 1.0026536930561698, "grad_norm": 2.89368931336358, "learning_rate": 6.3150064005265555e-06, "loss": 1.3182, "step": 34005 }, { "epoch": 1.0028011204481793, "grad_norm": 2.9320092894779055, "learning_rate": 6.314537104922227e-06, "loss": 1.2989, "step": 34010 }, { "epoch": 1.0029485478401887, "grad_norm": 3.0106013449173856, "learning_rate": 6.314067733853465e-06, "loss": 1.3273, "step": 34015 }, { "epoch": 1.0030959752321982, "grad_norm": 3.0487294956958446, "learning_rate": 6.313598287334084e-06, "loss": 1.341, "step": 34020 }, { "epoch": 1.0032434026242076, "grad_norm": 2.881029969567283, "learning_rate": 6.313128765377894e-06, "loss": 1.2822, "step": 34025 }, { "epoch": 1.003390830016217, "grad_norm": 2.9793424866840352, "learning_rate": 6.312659167998715e-06, "loss": 1.2738, "step": 34030 }, { "epoch": 1.0035382574082266, "grad_norm": 2.8571392598590233, "learning_rate": 6.312189495210362e-06, "loss": 1.2503, "step": 34035 }, { "epoch": 1.0036856848002358, "grad_norm": 3.0733433824274736, "learning_rate": 6.3117197470266585e-06, "loss": 1.2874, "step": 34040 }, { "epoch": 1.0038331121922452, "grad_norm": 2.80193507016588, "learning_rate": 6.311249923461427e-06, "loss": 1.2997, "step": 34045 }, { "epoch": 1.0039805395842547, "grad_norm": 2.9315569988854038, "learning_rate": 6.310780024528491e-06, "loss": 1.3547, "step": 34050 }, { "epoch": 1.0041279669762642, "grad_norm": 2.972797644732997, "learning_rate": 6.310310050241679e-06, "loss": 1.3221, "step": 34055 }, { "epoch": 1.0042753943682736, "grad_norm": 2.9100037961619183, "learning_rate": 6.309840000614819e-06, "loss": 1.2847, "step": 34060 }, { "epoch": 1.004422821760283, "grad_norm": 2.821194374150678, "learning_rate": 6.3093698756617455e-06, "loss": 1.2719, "step": 34065 }, { "epoch": 1.0045702491522925, "grad_norm": 2.81697870454194, "learning_rate": 6.30889967539629e-06, "loss": 1.2885, "step": 34070 }, { "epoch": 1.004717676544302, "grad_norm": 2.797459381300851, "learning_rate": 6.3084293998322895e-06, "loss": 1.3129, "step": 34075 }, { "epoch": 1.0048651039363115, "grad_norm": 2.848702016098145, "learning_rate": 6.307959048983584e-06, "loss": 1.2764, "step": 34080 }, { "epoch": 1.0050125313283207, "grad_norm": 2.8631815046742575, "learning_rate": 6.307488622864011e-06, "loss": 1.3155, "step": 34085 }, { "epoch": 1.0051599587203301, "grad_norm": 2.927520934052213, "learning_rate": 6.307018121487416e-06, "loss": 1.3141, "step": 34090 }, { "epoch": 1.0053073861123396, "grad_norm": 2.857758018153315, "learning_rate": 6.3065475448676415e-06, "loss": 1.2973, "step": 34095 }, { "epoch": 1.005454813504349, "grad_norm": 3.1926227737156188, "learning_rate": 6.306076893018538e-06, "loss": 1.3561, "step": 34100 }, { "epoch": 1.0056022408963585, "grad_norm": 2.9538059140071264, "learning_rate": 6.305606165953952e-06, "loss": 1.3239, "step": 34105 }, { "epoch": 1.005749668288368, "grad_norm": 2.9203530708239924, "learning_rate": 6.305135363687735e-06, "loss": 1.3065, "step": 34110 }, { "epoch": 1.0058970956803774, "grad_norm": 2.8190953752275596, "learning_rate": 6.304664486233746e-06, "loss": 1.2792, "step": 34115 }, { "epoch": 1.006044523072387, "grad_norm": 3.000168766098658, "learning_rate": 6.304193533605835e-06, "loss": 1.3586, "step": 34120 }, { "epoch": 1.0061919504643964, "grad_norm": 2.976325831588524, "learning_rate": 6.303722505817863e-06, "loss": 1.3061, "step": 34125 }, { "epoch": 1.0063393778564058, "grad_norm": 2.971212788001469, "learning_rate": 6.30325140288369e-06, "loss": 1.3433, "step": 34130 }, { "epoch": 1.006486805248415, "grad_norm": 2.9057519958565527, "learning_rate": 6.302780224817179e-06, "loss": 1.3152, "step": 34135 }, { "epoch": 1.0066342326404245, "grad_norm": 2.9414318345497596, "learning_rate": 6.302308971632194e-06, "loss": 1.3188, "step": 34140 }, { "epoch": 1.006781660032434, "grad_norm": 2.922473492112751, "learning_rate": 6.301837643342605e-06, "loss": 1.2994, "step": 34145 }, { "epoch": 1.0069290874244434, "grad_norm": 2.9652983822430277, "learning_rate": 6.301366239962278e-06, "loss": 1.2771, "step": 34150 }, { "epoch": 1.0070765148164529, "grad_norm": 3.032235426806272, "learning_rate": 6.3008947615050875e-06, "loss": 1.365, "step": 34155 }, { "epoch": 1.0072239422084623, "grad_norm": 2.7959760836232648, "learning_rate": 6.3004232079849035e-06, "loss": 1.2429, "step": 34160 }, { "epoch": 1.0073713696004718, "grad_norm": 2.941938772137105, "learning_rate": 6.299951579415607e-06, "loss": 1.2907, "step": 34165 }, { "epoch": 1.0075187969924813, "grad_norm": 2.873448601123642, "learning_rate": 6.2994798758110725e-06, "loss": 1.3321, "step": 34170 }, { "epoch": 1.0076662243844907, "grad_norm": 2.9286311485184884, "learning_rate": 6.29900809718518e-06, "loss": 1.313, "step": 34175 }, { "epoch": 1.0078136517765002, "grad_norm": 2.9539524081610353, "learning_rate": 6.298536243551816e-06, "loss": 1.2885, "step": 34180 }, { "epoch": 1.0079610791685094, "grad_norm": 2.8432012817457837, "learning_rate": 6.2980643149248615e-06, "loss": 1.29, "step": 34185 }, { "epoch": 1.0081085065605189, "grad_norm": 3.0954786706112367, "learning_rate": 6.297592311318205e-06, "loss": 1.3542, "step": 34190 }, { "epoch": 1.0082559339525283, "grad_norm": 3.34837395263456, "learning_rate": 6.297120232745736e-06, "loss": 1.3638, "step": 34195 }, { "epoch": 1.0084033613445378, "grad_norm": 2.969133150869551, "learning_rate": 6.296648079221346e-06, "loss": 1.298, "step": 34200 }, { "epoch": 1.0085507887365472, "grad_norm": 2.855291368273826, "learning_rate": 6.296175850758927e-06, "loss": 1.3273, "step": 34205 }, { "epoch": 1.0086982161285567, "grad_norm": 2.914352429397918, "learning_rate": 6.295703547372378e-06, "loss": 1.2991, "step": 34210 }, { "epoch": 1.0088456435205662, "grad_norm": 2.8734792220858987, "learning_rate": 6.295231169075593e-06, "loss": 1.3401, "step": 34215 }, { "epoch": 1.0089930709125756, "grad_norm": 2.9321200825627716, "learning_rate": 6.294758715882476e-06, "loss": 1.2948, "step": 34220 }, { "epoch": 1.009140498304585, "grad_norm": 2.816807017759125, "learning_rate": 6.294286187806928e-06, "loss": 1.3292, "step": 34225 }, { "epoch": 1.0092879256965945, "grad_norm": 2.9740259522037, "learning_rate": 6.293813584862853e-06, "loss": 1.3619, "step": 34230 }, { "epoch": 1.0094353530886038, "grad_norm": 2.91696786850233, "learning_rate": 6.293340907064159e-06, "loss": 1.3103, "step": 34235 }, { "epoch": 1.0095827804806132, "grad_norm": 3.1396144600010736, "learning_rate": 6.292868154424754e-06, "loss": 1.2932, "step": 34240 }, { "epoch": 1.0097302078726227, "grad_norm": 2.741137442595814, "learning_rate": 6.292395326958551e-06, "loss": 1.2918, "step": 34245 }, { "epoch": 1.0098776352646321, "grad_norm": 2.894259795581738, "learning_rate": 6.291922424679463e-06, "loss": 1.3395, "step": 34250 }, { "epoch": 1.0100250626566416, "grad_norm": 2.9892534606128884, "learning_rate": 6.291449447601403e-06, "loss": 1.365, "step": 34255 }, { "epoch": 1.010172490048651, "grad_norm": 2.9437732803355066, "learning_rate": 6.290976395738292e-06, "loss": 1.3091, "step": 34260 }, { "epoch": 1.0103199174406605, "grad_norm": 2.7627464487473055, "learning_rate": 6.29050326910405e-06, "loss": 1.299, "step": 34265 }, { "epoch": 1.01046734483267, "grad_norm": 2.8964709455777355, "learning_rate": 6.290030067712597e-06, "loss": 1.2998, "step": 34270 }, { "epoch": 1.0106147722246794, "grad_norm": 2.8506376126196282, "learning_rate": 6.289556791577859e-06, "loss": 1.3056, "step": 34275 }, { "epoch": 1.0107621996166887, "grad_norm": 2.8357999020634215, "learning_rate": 6.289083440713762e-06, "loss": 1.3033, "step": 34280 }, { "epoch": 1.0109096270086981, "grad_norm": 2.9220191092405052, "learning_rate": 6.288610015134236e-06, "loss": 1.2723, "step": 34285 }, { "epoch": 1.0110570544007076, "grad_norm": 2.9645860280286196, "learning_rate": 6.288136514853212e-06, "loss": 1.3043, "step": 34290 }, { "epoch": 1.011204481792717, "grad_norm": 2.924373970774071, "learning_rate": 6.287662939884622e-06, "loss": 1.275, "step": 34295 }, { "epoch": 1.0113519091847265, "grad_norm": 2.9804247508651325, "learning_rate": 6.287189290242403e-06, "loss": 1.2764, "step": 34300 }, { "epoch": 1.011499336576736, "grad_norm": 2.962161825978157, "learning_rate": 6.286715565940492e-06, "loss": 1.3033, "step": 34305 }, { "epoch": 1.0116467639687454, "grad_norm": 2.864695411900223, "learning_rate": 6.286241766992828e-06, "loss": 1.3079, "step": 34310 }, { "epoch": 1.0117941913607549, "grad_norm": 2.8291361331150116, "learning_rate": 6.285767893413355e-06, "loss": 1.2852, "step": 34315 }, { "epoch": 1.0119416187527643, "grad_norm": 3.1316367304814374, "learning_rate": 6.2852939452160156e-06, "loss": 1.3551, "step": 34320 }, { "epoch": 1.0120890461447738, "grad_norm": 2.9756163099627795, "learning_rate": 6.284819922414758e-06, "loss": 1.3356, "step": 34325 }, { "epoch": 1.012236473536783, "grad_norm": 2.968419516473641, "learning_rate": 6.284345825023527e-06, "loss": 1.292, "step": 34330 }, { "epoch": 1.0123839009287925, "grad_norm": 2.899577642115739, "learning_rate": 6.283871653056278e-06, "loss": 1.3091, "step": 34335 }, { "epoch": 1.012531328320802, "grad_norm": 2.9465101098060438, "learning_rate": 6.283397406526962e-06, "loss": 1.2848, "step": 34340 }, { "epoch": 1.0126787557128114, "grad_norm": 2.733450718905686, "learning_rate": 6.282923085449534e-06, "loss": 1.2509, "step": 34345 }, { "epoch": 1.0128261831048209, "grad_norm": 2.9255267938118577, "learning_rate": 6.282448689837954e-06, "loss": 1.2794, "step": 34350 }, { "epoch": 1.0129736104968303, "grad_norm": 2.904273362346717, "learning_rate": 6.281974219706177e-06, "loss": 1.3551, "step": 34355 }, { "epoch": 1.0131210378888398, "grad_norm": 2.9730382544518585, "learning_rate": 6.281499675068168e-06, "loss": 1.3073, "step": 34360 }, { "epoch": 1.0132684652808492, "grad_norm": 2.8145739018071767, "learning_rate": 6.28102505593789e-06, "loss": 1.3049, "step": 34365 }, { "epoch": 1.0134158926728587, "grad_norm": 2.8701911694193445, "learning_rate": 6.280550362329312e-06, "loss": 1.3344, "step": 34370 }, { "epoch": 1.0135633200648682, "grad_norm": 2.9231090963316015, "learning_rate": 6.280075594256397e-06, "loss": 1.2851, "step": 34375 }, { "epoch": 1.0137107474568774, "grad_norm": 2.993848580977987, "learning_rate": 6.279600751733121e-06, "loss": 1.3144, "step": 34380 }, { "epoch": 1.0138581748488869, "grad_norm": 2.9412434858386933, "learning_rate": 6.279125834773454e-06, "loss": 1.3382, "step": 34385 }, { "epoch": 1.0140056022408963, "grad_norm": 2.9759331406992984, "learning_rate": 6.27865084339137e-06, "loss": 1.3149, "step": 34390 }, { "epoch": 1.0141530296329058, "grad_norm": 2.9679099166512266, "learning_rate": 6.278175777600848e-06, "loss": 1.3193, "step": 34395 }, { "epoch": 1.0143004570249152, "grad_norm": 2.836546968932105, "learning_rate": 6.2777006374158675e-06, "loss": 1.2944, "step": 34400 }, { "epoch": 1.0144478844169247, "grad_norm": 2.7854399524086424, "learning_rate": 6.277225422850408e-06, "loss": 1.2893, "step": 34405 }, { "epoch": 1.0145953118089341, "grad_norm": 2.886801623605514, "learning_rate": 6.2767501339184565e-06, "loss": 1.2486, "step": 34410 }, { "epoch": 1.0147427392009436, "grad_norm": 2.9990949003706113, "learning_rate": 6.276274770633996e-06, "loss": 1.3505, "step": 34415 }, { "epoch": 1.014890166592953, "grad_norm": 2.8358841715789165, "learning_rate": 6.275799333011017e-06, "loss": 1.3206, "step": 34420 }, { "epoch": 1.0150375939849625, "grad_norm": 2.944717369973095, "learning_rate": 6.275323821063506e-06, "loss": 1.3323, "step": 34425 }, { "epoch": 1.0151850213769718, "grad_norm": 2.8814419748863376, "learning_rate": 6.27484823480546e-06, "loss": 1.2356, "step": 34430 }, { "epoch": 1.0153324487689812, "grad_norm": 2.8665534217139492, "learning_rate": 6.274372574250869e-06, "loss": 1.3, "step": 34435 }, { "epoch": 1.0154798761609907, "grad_norm": 2.9881207522284514, "learning_rate": 6.273896839413736e-06, "loss": 1.3453, "step": 34440 }, { "epoch": 1.0156273035530001, "grad_norm": 2.827043061194686, "learning_rate": 6.273421030308055e-06, "loss": 1.2799, "step": 34445 }, { "epoch": 1.0157747309450096, "grad_norm": 2.9810394321995966, "learning_rate": 6.272945146947828e-06, "loss": 1.3502, "step": 34450 }, { "epoch": 1.015922158337019, "grad_norm": 2.9919788051977703, "learning_rate": 6.2724691893470595e-06, "loss": 1.3703, "step": 34455 }, { "epoch": 1.0160695857290285, "grad_norm": 3.1225835675353406, "learning_rate": 6.271993157519755e-06, "loss": 1.3086, "step": 34460 }, { "epoch": 1.016217013121038, "grad_norm": 2.8748027129706757, "learning_rate": 6.271517051479922e-06, "loss": 1.3165, "step": 34465 }, { "epoch": 1.0163644405130474, "grad_norm": 2.9024609681908475, "learning_rate": 6.2710408712415716e-06, "loss": 1.3515, "step": 34470 }, { "epoch": 1.0165118679050567, "grad_norm": 3.0450113628778492, "learning_rate": 6.270564616818713e-06, "loss": 1.3469, "step": 34475 }, { "epoch": 1.0166592952970661, "grad_norm": 2.975365051291767, "learning_rate": 6.270088288225363e-06, "loss": 1.2783, "step": 34480 }, { "epoch": 1.0168067226890756, "grad_norm": 2.9429096174409266, "learning_rate": 6.269611885475538e-06, "loss": 1.3249, "step": 34485 }, { "epoch": 1.016954150081085, "grad_norm": 2.8356054785702915, "learning_rate": 6.269135408583255e-06, "loss": 1.3367, "step": 34490 }, { "epoch": 1.0171015774730945, "grad_norm": 2.948600566197034, "learning_rate": 6.268658857562537e-06, "loss": 1.3295, "step": 34495 }, { "epoch": 1.017249004865104, "grad_norm": 2.8498550770068283, "learning_rate": 6.2681822324274055e-06, "loss": 1.3039, "step": 34500 }, { "epoch": 1.017249004865104, "eval_loss": 1.1342129707336426, "eval_runtime": 4.206, "eval_samples_per_second": 94.151, "eval_steps_per_second": 3.091, "step": 34500 }, { "epoch": 1.0173964322571134, "grad_norm": 2.921789414714089, "learning_rate": 6.267705533191888e-06, "loss": 1.2956, "step": 34505 }, { "epoch": 1.0175438596491229, "grad_norm": 2.8539387917339627, "learning_rate": 6.267228759870009e-06, "loss": 1.2817, "step": 34510 }, { "epoch": 1.0176912870411323, "grad_norm": 2.8098679958555652, "learning_rate": 6.2667519124757995e-06, "loss": 1.3454, "step": 34515 }, { "epoch": 1.0178387144331418, "grad_norm": 3.0078879856597225, "learning_rate": 6.266274991023291e-06, "loss": 1.3013, "step": 34520 }, { "epoch": 1.017986141825151, "grad_norm": 3.0994916835768844, "learning_rate": 6.265797995526518e-06, "loss": 1.3489, "step": 34525 }, { "epoch": 1.0181335692171605, "grad_norm": 2.9831238908737365, "learning_rate": 6.265320925999515e-06, "loss": 1.3244, "step": 34530 }, { "epoch": 1.01828099660917, "grad_norm": 3.041943358992948, "learning_rate": 6.264843782456324e-06, "loss": 1.3263, "step": 34535 }, { "epoch": 1.0184284240011794, "grad_norm": 2.9334932152635043, "learning_rate": 6.26436656491098e-06, "loss": 1.2278, "step": 34540 }, { "epoch": 1.0185758513931888, "grad_norm": 2.8780993215493327, "learning_rate": 6.263889273377531e-06, "loss": 1.3086, "step": 34545 }, { "epoch": 1.0187232787851983, "grad_norm": 2.8926610425317434, "learning_rate": 6.263411907870019e-06, "loss": 1.3124, "step": 34550 }, { "epoch": 1.0188707061772078, "grad_norm": 2.8875914805296734, "learning_rate": 6.262934468402491e-06, "loss": 1.3059, "step": 34555 }, { "epoch": 1.0190181335692172, "grad_norm": 3.1272581857031567, "learning_rate": 6.2624569549889975e-06, "loss": 1.2839, "step": 34560 }, { "epoch": 1.0191655609612267, "grad_norm": 2.8359046763497866, "learning_rate": 6.261979367643588e-06, "loss": 1.2423, "step": 34565 }, { "epoch": 1.0193129883532361, "grad_norm": 2.8845469814966167, "learning_rate": 6.261501706380319e-06, "loss": 1.3103, "step": 34570 }, { "epoch": 1.0194604157452454, "grad_norm": 2.7221952690317215, "learning_rate": 6.261023971213244e-06, "loss": 1.2844, "step": 34575 }, { "epoch": 1.0196078431372548, "grad_norm": 2.9363916583704466, "learning_rate": 6.260546162156421e-06, "loss": 1.3775, "step": 34580 }, { "epoch": 1.0197552705292643, "grad_norm": 2.9683901522609846, "learning_rate": 6.26006827922391e-06, "loss": 1.2914, "step": 34585 }, { "epoch": 1.0199026979212737, "grad_norm": 2.8885021557105386, "learning_rate": 6.259590322429774e-06, "loss": 1.2432, "step": 34590 }, { "epoch": 1.0200501253132832, "grad_norm": 2.910067177933334, "learning_rate": 6.259112291788077e-06, "loss": 1.281, "step": 34595 }, { "epoch": 1.0201975527052927, "grad_norm": 2.924331446184098, "learning_rate": 6.258634187312886e-06, "loss": 1.3792, "step": 34600 }, { "epoch": 1.0203449800973021, "grad_norm": 2.860186041309543, "learning_rate": 6.258156009018269e-06, "loss": 1.3406, "step": 34605 }, { "epoch": 1.0204924074893116, "grad_norm": 2.9027194550193314, "learning_rate": 6.2576777569182985e-06, "loss": 1.3141, "step": 34610 }, { "epoch": 1.020639834881321, "grad_norm": 3.003263781131926, "learning_rate": 6.257199431027045e-06, "loss": 1.3145, "step": 34615 }, { "epoch": 1.0207872622733303, "grad_norm": 3.403487027999818, "learning_rate": 6.256721031358587e-06, "loss": 1.2713, "step": 34620 }, { "epoch": 1.0209346896653397, "grad_norm": 5.216145261913361, "learning_rate": 6.256242557926998e-06, "loss": 1.296, "step": 34625 }, { "epoch": 1.0210821170573492, "grad_norm": 2.988230156598401, "learning_rate": 6.255764010746362e-06, "loss": 1.3131, "step": 34630 }, { "epoch": 1.0212295444493587, "grad_norm": 3.0478234087434743, "learning_rate": 6.255285389830758e-06, "loss": 1.2928, "step": 34635 }, { "epoch": 1.021376971841368, "grad_norm": 3.1421310151217927, "learning_rate": 6.254806695194271e-06, "loss": 1.3238, "step": 34640 }, { "epoch": 1.0215243992333776, "grad_norm": 2.9508947912159615, "learning_rate": 6.254327926850986e-06, "loss": 1.2952, "step": 34645 }, { "epoch": 1.021671826625387, "grad_norm": 2.9736146608286167, "learning_rate": 6.253849084814994e-06, "loss": 1.3035, "step": 34650 }, { "epoch": 1.0218192540173965, "grad_norm": 3.123026639804486, "learning_rate": 6.253370169100382e-06, "loss": 1.344, "step": 34655 }, { "epoch": 1.021966681409406, "grad_norm": 2.920650006631779, "learning_rate": 6.252891179721243e-06, "loss": 1.358, "step": 34660 }, { "epoch": 1.0221141088014154, "grad_norm": 2.892542072555991, "learning_rate": 6.252412116691675e-06, "loss": 1.2932, "step": 34665 }, { "epoch": 1.0222615361934246, "grad_norm": 2.892015390801555, "learning_rate": 6.251932980025773e-06, "loss": 1.2792, "step": 34670 }, { "epoch": 1.022408963585434, "grad_norm": 3.1663833137085686, "learning_rate": 6.251453769737634e-06, "loss": 1.3002, "step": 34675 }, { "epoch": 1.0225563909774436, "grad_norm": 2.897217163483164, "learning_rate": 6.250974485841363e-06, "loss": 1.328, "step": 34680 }, { "epoch": 1.022703818369453, "grad_norm": 2.820184606079279, "learning_rate": 6.250495128351064e-06, "loss": 1.2912, "step": 34685 }, { "epoch": 1.0228512457614625, "grad_norm": 3.0799148232442293, "learning_rate": 6.250015697280838e-06, "loss": 1.3225, "step": 34690 }, { "epoch": 1.022998673153472, "grad_norm": 2.897319740117675, "learning_rate": 6.249536192644795e-06, "loss": 1.315, "step": 34695 }, { "epoch": 1.0231461005454814, "grad_norm": 2.7954019117437725, "learning_rate": 6.2490566144570475e-06, "loss": 1.316, "step": 34700 }, { "epoch": 1.0232935279374908, "grad_norm": 2.9301701949067986, "learning_rate": 6.248576962731704e-06, "loss": 1.2576, "step": 34705 }, { "epoch": 1.0234409553295003, "grad_norm": 3.092326303798403, "learning_rate": 6.24809723748288e-06, "loss": 1.3619, "step": 34710 }, { "epoch": 1.0235883827215098, "grad_norm": 2.9911640153231374, "learning_rate": 6.2476174387246935e-06, "loss": 1.2973, "step": 34715 }, { "epoch": 1.023735810113519, "grad_norm": 3.0588387947026208, "learning_rate": 6.247137566471261e-06, "loss": 1.2573, "step": 34720 }, { "epoch": 1.0238832375055285, "grad_norm": 3.050987383336779, "learning_rate": 6.246657620736704e-06, "loss": 1.3495, "step": 34725 }, { "epoch": 1.024030664897538, "grad_norm": 2.8484972770162433, "learning_rate": 6.246177601535147e-06, "loss": 1.3057, "step": 34730 }, { "epoch": 1.0241780922895474, "grad_norm": 2.889665082175699, "learning_rate": 6.245697508880712e-06, "loss": 1.2397, "step": 34735 }, { "epoch": 1.0243255196815568, "grad_norm": 3.0116802030845746, "learning_rate": 6.245217342787529e-06, "loss": 1.3123, "step": 34740 }, { "epoch": 1.0244729470735663, "grad_norm": 3.0401817675585447, "learning_rate": 6.244737103269727e-06, "loss": 1.3558, "step": 34745 }, { "epoch": 1.0246203744655757, "grad_norm": 2.909590595059404, "learning_rate": 6.244256790341436e-06, "loss": 1.2722, "step": 34750 }, { "epoch": 1.0247678018575852, "grad_norm": 2.950117632960741, "learning_rate": 6.2437764040167905e-06, "loss": 1.3307, "step": 34755 }, { "epoch": 1.0249152292495947, "grad_norm": 2.980578648553231, "learning_rate": 6.243295944309927e-06, "loss": 1.2859, "step": 34760 }, { "epoch": 1.025062656641604, "grad_norm": 2.845916525004383, "learning_rate": 6.242815411234984e-06, "loss": 1.2525, "step": 34765 }, { "epoch": 1.0252100840336134, "grad_norm": 2.995847895671329, "learning_rate": 6.242334804806101e-06, "loss": 1.3194, "step": 34770 }, { "epoch": 1.0253575114256228, "grad_norm": 2.8999710432389163, "learning_rate": 6.24185412503742e-06, "loss": 1.3077, "step": 34775 }, { "epoch": 1.0255049388176323, "grad_norm": 2.9684718513010653, "learning_rate": 6.2413733719430875e-06, "loss": 1.3218, "step": 34780 }, { "epoch": 1.0256523662096417, "grad_norm": 3.106151931348799, "learning_rate": 6.240892545537246e-06, "loss": 1.3352, "step": 34785 }, { "epoch": 1.0257997936016512, "grad_norm": 2.9944840502092775, "learning_rate": 6.240411645834049e-06, "loss": 1.2951, "step": 34790 }, { "epoch": 1.0259472209936606, "grad_norm": 3.146615270862534, "learning_rate": 6.239930672847644e-06, "loss": 1.3298, "step": 34795 }, { "epoch": 1.02609464838567, "grad_norm": 2.84466745630843, "learning_rate": 6.239449626592187e-06, "loss": 1.2937, "step": 34800 }, { "epoch": 1.0262420757776796, "grad_norm": 2.821382236447741, "learning_rate": 6.238968507081832e-06, "loss": 1.3054, "step": 34805 }, { "epoch": 1.026389503169689, "grad_norm": 2.8322544570798422, "learning_rate": 6.238487314330736e-06, "loss": 1.2755, "step": 34810 }, { "epoch": 1.0265369305616983, "grad_norm": 2.841020021204827, "learning_rate": 6.23800604835306e-06, "loss": 1.2755, "step": 34815 }, { "epoch": 1.0266843579537077, "grad_norm": 2.8561367215259157, "learning_rate": 6.237524709162963e-06, "loss": 1.2829, "step": 34820 }, { "epoch": 1.0268317853457172, "grad_norm": 2.9956533651886974, "learning_rate": 6.2370432967746134e-06, "loss": 1.3189, "step": 34825 }, { "epoch": 1.0269792127377266, "grad_norm": 3.0788353622572378, "learning_rate": 6.236561811202174e-06, "loss": 1.3401, "step": 34830 }, { "epoch": 1.027126640129736, "grad_norm": 2.9897990067536173, "learning_rate": 6.236080252459814e-06, "loss": 1.3481, "step": 34835 }, { "epoch": 1.0272740675217455, "grad_norm": 3.0222354090501504, "learning_rate": 6.235598620561704e-06, "loss": 1.337, "step": 34840 }, { "epoch": 1.027421494913755, "grad_norm": 2.925438382348764, "learning_rate": 6.235116915522016e-06, "loss": 1.295, "step": 34845 }, { "epoch": 1.0275689223057645, "grad_norm": 4.014536504687129, "learning_rate": 6.234635137354924e-06, "loss": 1.3071, "step": 34850 }, { "epoch": 1.027716349697774, "grad_norm": 2.845280098200574, "learning_rate": 6.234153286074608e-06, "loss": 1.2762, "step": 34855 }, { "epoch": 1.0278637770897834, "grad_norm": 2.9868420704183483, "learning_rate": 6.233671361695244e-06, "loss": 1.3293, "step": 34860 }, { "epoch": 1.0280112044817926, "grad_norm": 3.014118190986289, "learning_rate": 6.233189364231014e-06, "loss": 1.3186, "step": 34865 }, { "epoch": 1.028158631873802, "grad_norm": 3.0945518632096043, "learning_rate": 6.2327072936961016e-06, "loss": 1.264, "step": 34870 }, { "epoch": 1.0283060592658115, "grad_norm": 2.986376318265405, "learning_rate": 6.232225150104692e-06, "loss": 1.2818, "step": 34875 }, { "epoch": 1.028453486657821, "grad_norm": 2.973331613256821, "learning_rate": 6.231742933470973e-06, "loss": 1.3108, "step": 34880 }, { "epoch": 1.0286009140498305, "grad_norm": 2.863769464816779, "learning_rate": 6.231260643809134e-06, "loss": 1.2977, "step": 34885 }, { "epoch": 1.02874834144184, "grad_norm": 2.991945807871435, "learning_rate": 6.230778281133367e-06, "loss": 1.3045, "step": 34890 }, { "epoch": 1.0288957688338494, "grad_norm": 3.0407007989088606, "learning_rate": 6.230295845457867e-06, "loss": 1.3132, "step": 34895 }, { "epoch": 1.0290431962258588, "grad_norm": 3.0934355376676956, "learning_rate": 6.229813336796831e-06, "loss": 1.3359, "step": 34900 }, { "epoch": 1.0291906236178683, "grad_norm": 2.885440334347681, "learning_rate": 6.229330755164454e-06, "loss": 1.2761, "step": 34905 }, { "epoch": 1.0293380510098777, "grad_norm": 3.0095815354130084, "learning_rate": 6.228848100574939e-06, "loss": 1.2951, "step": 34910 }, { "epoch": 1.029485478401887, "grad_norm": 2.940649530684757, "learning_rate": 6.228365373042487e-06, "loss": 1.3465, "step": 34915 }, { "epoch": 1.0296329057938964, "grad_norm": 2.8715572885930443, "learning_rate": 6.227882572581305e-06, "loss": 1.3385, "step": 34920 }, { "epoch": 1.029780333185906, "grad_norm": 2.8066177117064, "learning_rate": 6.227399699205598e-06, "loss": 1.3205, "step": 34925 }, { "epoch": 1.0299277605779154, "grad_norm": 2.9971786519414287, "learning_rate": 6.226916752929577e-06, "loss": 1.3095, "step": 34930 }, { "epoch": 1.0300751879699248, "grad_norm": 2.8873543790588774, "learning_rate": 6.226433733767453e-06, "loss": 1.2989, "step": 34935 }, { "epoch": 1.0302226153619343, "grad_norm": 2.780010524076013, "learning_rate": 6.225950641733437e-06, "loss": 1.2692, "step": 34940 }, { "epoch": 1.0303700427539437, "grad_norm": 2.854590812353769, "learning_rate": 6.225467476841746e-06, "loss": 1.2782, "step": 34945 }, { "epoch": 1.0305174701459532, "grad_norm": 2.9220891388815455, "learning_rate": 6.224984239106599e-06, "loss": 1.3309, "step": 34950 }, { "epoch": 1.0306648975379626, "grad_norm": 3.06771261163836, "learning_rate": 6.224500928542214e-06, "loss": 1.3474, "step": 34955 }, { "epoch": 1.0308123249299719, "grad_norm": 2.9476425056779894, "learning_rate": 6.224017545162814e-06, "loss": 1.2629, "step": 34960 }, { "epoch": 1.0309597523219813, "grad_norm": 2.860627450908369, "learning_rate": 6.2235340889826215e-06, "loss": 1.3078, "step": 34965 }, { "epoch": 1.0311071797139908, "grad_norm": 2.9520974352202876, "learning_rate": 6.223050560015865e-06, "loss": 1.3208, "step": 34970 }, { "epoch": 1.0312546071060003, "grad_norm": 2.748976955293462, "learning_rate": 6.2225669582767715e-06, "loss": 1.3113, "step": 34975 }, { "epoch": 1.0314020344980097, "grad_norm": 2.9465912208036573, "learning_rate": 6.222083283779572e-06, "loss": 1.3433, "step": 34980 }, { "epoch": 1.0315494618900192, "grad_norm": 2.83575781960567, "learning_rate": 6.221599536538499e-06, "loss": 1.2828, "step": 34985 }, { "epoch": 1.0316968892820286, "grad_norm": 2.9581306850030336, "learning_rate": 6.221115716567788e-06, "loss": 1.3812, "step": 34990 }, { "epoch": 1.031844316674038, "grad_norm": 2.859423467645175, "learning_rate": 6.220631823881673e-06, "loss": 1.3362, "step": 34995 }, { "epoch": 1.0319917440660475, "grad_norm": 3.0224354837744416, "learning_rate": 6.220147858494398e-06, "loss": 1.2991, "step": 35000 }, { "epoch": 1.0319917440660475, "eval_loss": 1.1313661336898804, "eval_runtime": 4.2491, "eval_samples_per_second": 93.197, "eval_steps_per_second": 3.059, "step": 35000 }, { "epoch": 1.032139171458057, "grad_norm": 2.885140312560734, "learning_rate": 6.219663820420201e-06, "loss": 1.2859, "step": 35005 }, { "epoch": 1.0322865988500662, "grad_norm": 2.8694874221395748, "learning_rate": 6.219179709673326e-06, "loss": 1.2924, "step": 35010 }, { "epoch": 1.0324340262420757, "grad_norm": 3.0689429794959087, "learning_rate": 6.218695526268018e-06, "loss": 1.3387, "step": 35015 }, { "epoch": 1.0325814536340852, "grad_norm": 2.986592164174245, "learning_rate": 6.218211270218525e-06, "loss": 1.3071, "step": 35020 }, { "epoch": 1.0327288810260946, "grad_norm": 2.995119821954863, "learning_rate": 6.2177269415390975e-06, "loss": 1.3609, "step": 35025 }, { "epoch": 1.032876308418104, "grad_norm": 2.8223534712854663, "learning_rate": 6.217242540243988e-06, "loss": 1.3539, "step": 35030 }, { "epoch": 1.0330237358101135, "grad_norm": 3.203935909241668, "learning_rate": 6.216758066347448e-06, "loss": 1.3523, "step": 35035 }, { "epoch": 1.033171163202123, "grad_norm": 3.095869222718247, "learning_rate": 6.216273519863735e-06, "loss": 1.3333, "step": 35040 }, { "epoch": 1.0333185905941324, "grad_norm": 2.759567330263538, "learning_rate": 6.215788900807108e-06, "loss": 1.3015, "step": 35045 }, { "epoch": 1.033466017986142, "grad_norm": 3.060153745120531, "learning_rate": 6.2153042091918275e-06, "loss": 1.3648, "step": 35050 }, { "epoch": 1.0336134453781514, "grad_norm": 2.858426048684078, "learning_rate": 6.2148194450321545e-06, "loss": 1.2584, "step": 35055 }, { "epoch": 1.0337608727701606, "grad_norm": 2.8236173714739983, "learning_rate": 6.214334608342356e-06, "loss": 1.304, "step": 35060 }, { "epoch": 1.03390830016217, "grad_norm": 2.7839066962303742, "learning_rate": 6.213849699136696e-06, "loss": 1.3351, "step": 35065 }, { "epoch": 1.0340557275541795, "grad_norm": 2.936978455349427, "learning_rate": 6.213364717429446e-06, "loss": 1.285, "step": 35070 }, { "epoch": 1.034203154946189, "grad_norm": 2.834090491559631, "learning_rate": 6.212879663234877e-06, "loss": 1.3396, "step": 35075 }, { "epoch": 1.0343505823381984, "grad_norm": 2.7126772306696325, "learning_rate": 6.212394536567261e-06, "loss": 1.2628, "step": 35080 }, { "epoch": 1.034498009730208, "grad_norm": 2.91357865425733, "learning_rate": 6.211909337440874e-06, "loss": 1.2907, "step": 35085 }, { "epoch": 1.0346454371222173, "grad_norm": 2.876718353161866, "learning_rate": 6.211424065869993e-06, "loss": 1.278, "step": 35090 }, { "epoch": 1.0347928645142268, "grad_norm": 2.95984529184277, "learning_rate": 6.210938721868899e-06, "loss": 1.3315, "step": 35095 }, { "epoch": 1.0349402919062363, "grad_norm": 2.929071204902917, "learning_rate": 6.210453305451874e-06, "loss": 1.3011, "step": 35100 }, { "epoch": 1.0350877192982457, "grad_norm": 3.7152833514546186, "learning_rate": 6.2099678166331995e-06, "loss": 1.3111, "step": 35105 }, { "epoch": 1.035235146690255, "grad_norm": 3.0033203392200156, "learning_rate": 6.209482255427163e-06, "loss": 1.2848, "step": 35110 }, { "epoch": 1.0353825740822644, "grad_norm": 2.894161708041788, "learning_rate": 6.2089966218480525e-06, "loss": 1.3135, "step": 35115 }, { "epoch": 1.0355300014742739, "grad_norm": 2.8862428604831436, "learning_rate": 6.20851091591016e-06, "loss": 1.2897, "step": 35120 }, { "epoch": 1.0356774288662833, "grad_norm": 2.9182797776907528, "learning_rate": 6.208025137627776e-06, "loss": 1.3579, "step": 35125 }, { "epoch": 1.0358248562582928, "grad_norm": 2.8942573136540735, "learning_rate": 6.207539287015193e-06, "loss": 1.2812, "step": 35130 }, { "epoch": 1.0359722836503023, "grad_norm": 3.009465217504305, "learning_rate": 6.207053364086714e-06, "loss": 1.3081, "step": 35135 }, { "epoch": 1.0361197110423117, "grad_norm": 2.934547290520455, "learning_rate": 6.206567368856632e-06, "loss": 1.3029, "step": 35140 }, { "epoch": 1.0362671384343212, "grad_norm": 2.8586445379800556, "learning_rate": 6.206081301339251e-06, "loss": 1.2476, "step": 35145 }, { "epoch": 1.0364145658263306, "grad_norm": 2.969329735277775, "learning_rate": 6.205595161548872e-06, "loss": 1.3149, "step": 35150 }, { "epoch": 1.0365619932183399, "grad_norm": 2.893576989774919, "learning_rate": 6.205108949499801e-06, "loss": 1.3366, "step": 35155 }, { "epoch": 1.0367094206103493, "grad_norm": 3.0499834763610836, "learning_rate": 6.204622665206348e-06, "loss": 1.3445, "step": 35160 }, { "epoch": 1.0368568480023588, "grad_norm": 2.8829100898617734, "learning_rate": 6.204136308682819e-06, "loss": 1.3188, "step": 35165 }, { "epoch": 1.0370042753943682, "grad_norm": 2.8823658191351536, "learning_rate": 6.2036498799435265e-06, "loss": 1.3021, "step": 35170 }, { "epoch": 1.0371517027863777, "grad_norm": 2.964069500965221, "learning_rate": 6.203163379002785e-06, "loss": 1.3404, "step": 35175 }, { "epoch": 1.0372991301783872, "grad_norm": 3.053520420990766, "learning_rate": 6.202676805874909e-06, "loss": 1.3403, "step": 35180 }, { "epoch": 1.0374465575703966, "grad_norm": 3.0387960289690366, "learning_rate": 6.2021901605742184e-06, "loss": 1.3244, "step": 35185 }, { "epoch": 1.037593984962406, "grad_norm": 2.9580672079072508, "learning_rate": 6.201703443115031e-06, "loss": 1.273, "step": 35190 }, { "epoch": 1.0377414123544155, "grad_norm": 2.9079086736557938, "learning_rate": 6.201216653511671e-06, "loss": 1.3084, "step": 35195 }, { "epoch": 1.037888839746425, "grad_norm": 2.886451900383991, "learning_rate": 6.200729791778462e-06, "loss": 1.3154, "step": 35200 }, { "epoch": 1.0380362671384342, "grad_norm": 2.9845970483471866, "learning_rate": 6.200242857929731e-06, "loss": 1.3105, "step": 35205 }, { "epoch": 1.0381836945304437, "grad_norm": 2.9031431627868245, "learning_rate": 6.199755851979805e-06, "loss": 1.3004, "step": 35210 }, { "epoch": 1.0383311219224531, "grad_norm": 3.01876862317855, "learning_rate": 6.1992687739430155e-06, "loss": 1.3151, "step": 35215 }, { "epoch": 1.0384785493144626, "grad_norm": 2.808532169200291, "learning_rate": 6.198781623833697e-06, "loss": 1.3005, "step": 35220 }, { "epoch": 1.038625976706472, "grad_norm": 2.8660188686289714, "learning_rate": 6.198294401666182e-06, "loss": 1.3541, "step": 35225 }, { "epoch": 1.0387734040984815, "grad_norm": 2.9979815665870797, "learning_rate": 6.197807107454809e-06, "loss": 1.3749, "step": 35230 }, { "epoch": 1.038920831490491, "grad_norm": 2.9347655062021016, "learning_rate": 6.197319741213916e-06, "loss": 1.3181, "step": 35235 }, { "epoch": 1.0390682588825004, "grad_norm": 2.9402778777838967, "learning_rate": 6.196832302957846e-06, "loss": 1.2798, "step": 35240 }, { "epoch": 1.0392156862745099, "grad_norm": 3.002751456187202, "learning_rate": 6.196344792700941e-06, "loss": 1.2889, "step": 35245 }, { "epoch": 1.0393631136665193, "grad_norm": 2.895892420452694, "learning_rate": 6.1958572104575475e-06, "loss": 1.2992, "step": 35250 }, { "epoch": 1.0395105410585286, "grad_norm": 2.8896333250360002, "learning_rate": 6.195369556242012e-06, "loss": 1.3189, "step": 35255 }, { "epoch": 1.039657968450538, "grad_norm": 2.831754993433122, "learning_rate": 6.194881830068685e-06, "loss": 1.3179, "step": 35260 }, { "epoch": 1.0398053958425475, "grad_norm": 2.987964384079048, "learning_rate": 6.194394031951919e-06, "loss": 1.3132, "step": 35265 }, { "epoch": 1.039952823234557, "grad_norm": 3.196006587537861, "learning_rate": 6.193906161906067e-06, "loss": 1.3145, "step": 35270 }, { "epoch": 1.0401002506265664, "grad_norm": 2.9399071322527934, "learning_rate": 6.193418219945486e-06, "loss": 1.3013, "step": 35275 }, { "epoch": 1.0402476780185759, "grad_norm": 3.164296526727309, "learning_rate": 6.192930206084532e-06, "loss": 1.3177, "step": 35280 }, { "epoch": 1.0403951054105853, "grad_norm": 3.1021823226943748, "learning_rate": 6.192442120337569e-06, "loss": 1.3532, "step": 35285 }, { "epoch": 1.0405425328025948, "grad_norm": 2.891695572249307, "learning_rate": 6.191953962718959e-06, "loss": 1.3438, "step": 35290 }, { "epoch": 1.0406899601946042, "grad_norm": 2.7392023581159046, "learning_rate": 6.191465733243062e-06, "loss": 1.3307, "step": 35295 }, { "epoch": 1.0408373875866137, "grad_norm": 2.904694003447746, "learning_rate": 6.19097743192425e-06, "loss": 1.3224, "step": 35300 }, { "epoch": 1.040984814978623, "grad_norm": 2.816632280307974, "learning_rate": 6.190489058776889e-06, "loss": 1.2761, "step": 35305 }, { "epoch": 1.0411322423706324, "grad_norm": 2.93755915084117, "learning_rate": 6.190000613815351e-06, "loss": 1.3348, "step": 35310 }, { "epoch": 1.0412796697626419, "grad_norm": 3.0360492512052586, "learning_rate": 6.1895120970540084e-06, "loss": 1.3021, "step": 35315 }, { "epoch": 1.0414270971546513, "grad_norm": 2.902973758246607, "learning_rate": 6.189023508507236e-06, "loss": 1.2694, "step": 35320 }, { "epoch": 1.0415745245466608, "grad_norm": 2.895780134881604, "learning_rate": 6.188534848189414e-06, "loss": 1.3153, "step": 35325 }, { "epoch": 1.0417219519386702, "grad_norm": 2.9416012707029813, "learning_rate": 6.188046116114918e-06, "loss": 1.329, "step": 35330 }, { "epoch": 1.0418693793306797, "grad_norm": 2.9120902813481178, "learning_rate": 6.18755731229813e-06, "loss": 1.245, "step": 35335 }, { "epoch": 1.0420168067226891, "grad_norm": 2.9890222670167876, "learning_rate": 6.1870684367534365e-06, "loss": 1.2893, "step": 35340 }, { "epoch": 1.0421642341146986, "grad_norm": 2.9213865777726986, "learning_rate": 6.186579489495221e-06, "loss": 1.2694, "step": 35345 }, { "epoch": 1.0423116615067078, "grad_norm": 2.9295573423995007, "learning_rate": 6.186090470537872e-06, "loss": 1.3318, "step": 35350 }, { "epoch": 1.0424590888987173, "grad_norm": 2.8081743761117273, "learning_rate": 6.185601379895779e-06, "loss": 1.2618, "step": 35355 }, { "epoch": 1.0426065162907268, "grad_norm": 2.920248030496282, "learning_rate": 6.185112217583334e-06, "loss": 1.2952, "step": 35360 }, { "epoch": 1.0427539436827362, "grad_norm": 2.9115579658559243, "learning_rate": 6.1846229836149325e-06, "loss": 1.2677, "step": 35365 }, { "epoch": 1.0429013710747457, "grad_norm": 2.918296108925763, "learning_rate": 6.184133678004967e-06, "loss": 1.2449, "step": 35370 }, { "epoch": 1.0430487984667551, "grad_norm": 2.942027034053953, "learning_rate": 6.183644300767841e-06, "loss": 1.3009, "step": 35375 }, { "epoch": 1.0431962258587646, "grad_norm": 2.96597928226712, "learning_rate": 6.183154851917952e-06, "loss": 1.3148, "step": 35380 }, { "epoch": 1.043343653250774, "grad_norm": 2.8333465108303337, "learning_rate": 6.182665331469702e-06, "loss": 1.2555, "step": 35385 }, { "epoch": 1.0434910806427835, "grad_norm": 2.9574578788702865, "learning_rate": 6.182175739437498e-06, "loss": 1.2804, "step": 35390 }, { "epoch": 1.043638508034793, "grad_norm": 2.9203027335454883, "learning_rate": 6.181686075835745e-06, "loss": 1.2888, "step": 35395 }, { "epoch": 1.0437859354268022, "grad_norm": 2.979288831301217, "learning_rate": 6.181196340678852e-06, "loss": 1.3249, "step": 35400 }, { "epoch": 1.0439333628188117, "grad_norm": 2.979794015455239, "learning_rate": 6.1807065339812325e-06, "loss": 1.2431, "step": 35405 }, { "epoch": 1.0440807902108211, "grad_norm": 2.9931111312553154, "learning_rate": 6.180216655757295e-06, "loss": 1.4072, "step": 35410 }, { "epoch": 1.0442282176028306, "grad_norm": 2.8018314455378426, "learning_rate": 6.17972670602146e-06, "loss": 1.3198, "step": 35415 }, { "epoch": 1.04437564499484, "grad_norm": 2.9075391871762837, "learning_rate": 6.179236684788141e-06, "loss": 1.3621, "step": 35420 }, { "epoch": 1.0445230723868495, "grad_norm": 2.767626459565426, "learning_rate": 6.178746592071759e-06, "loss": 1.316, "step": 35425 }, { "epoch": 1.044670499778859, "grad_norm": 2.8582167788430235, "learning_rate": 6.178256427886735e-06, "loss": 1.317, "step": 35430 }, { "epoch": 1.0448179271708684, "grad_norm": 3.026634689970282, "learning_rate": 6.177766192247492e-06, "loss": 1.3314, "step": 35435 }, { "epoch": 1.0449653545628779, "grad_norm": 3.2304900025351624, "learning_rate": 6.177275885168458e-06, "loss": 1.3549, "step": 35440 }, { "epoch": 1.045112781954887, "grad_norm": 3.019358933427684, "learning_rate": 6.176785506664058e-06, "loss": 1.2995, "step": 35445 }, { "epoch": 1.0452602093468966, "grad_norm": 2.920660628223618, "learning_rate": 6.176295056748725e-06, "loss": 1.3032, "step": 35450 }, { "epoch": 1.045407636738906, "grad_norm": 2.821177793335681, "learning_rate": 6.175804535436888e-06, "loss": 1.2848, "step": 35455 }, { "epoch": 1.0455550641309155, "grad_norm": 3.1354707364426315, "learning_rate": 6.175313942742983e-06, "loss": 1.3646, "step": 35460 }, { "epoch": 1.045702491522925, "grad_norm": 2.7190630039309394, "learning_rate": 6.174823278681445e-06, "loss": 1.2392, "step": 35465 }, { "epoch": 1.0458499189149344, "grad_norm": 3.0751708875969643, "learning_rate": 6.174332543266715e-06, "loss": 1.3373, "step": 35470 }, { "epoch": 1.0459973463069439, "grad_norm": 2.957221022801318, "learning_rate": 6.173841736513231e-06, "loss": 1.2813, "step": 35475 }, { "epoch": 1.0461447736989533, "grad_norm": 3.13188692032135, "learning_rate": 6.173350858435435e-06, "loss": 1.331, "step": 35480 }, { "epoch": 1.0462922010909628, "grad_norm": 2.845848074909053, "learning_rate": 6.172859909047774e-06, "loss": 1.2866, "step": 35485 }, { "epoch": 1.0464396284829722, "grad_norm": 2.975121322037718, "learning_rate": 6.172368888364694e-06, "loss": 1.3287, "step": 35490 }, { "epoch": 1.0465870558749815, "grad_norm": 2.8073167052503725, "learning_rate": 6.171877796400643e-06, "loss": 1.2905, "step": 35495 }, { "epoch": 1.046734483266991, "grad_norm": 2.9846513064394533, "learning_rate": 6.1713866331700715e-06, "loss": 1.3258, "step": 35500 }, { "epoch": 1.046734483266991, "eval_loss": 1.1301237344741821, "eval_runtime": 4.1674, "eval_samples_per_second": 95.024, "eval_steps_per_second": 3.119, "step": 35500 }, { "epoch": 1.0468819106590004, "grad_norm": 3.003101387063824, "learning_rate": 6.170895398687433e-06, "loss": 1.3314, "step": 35505 }, { "epoch": 1.0470293380510098, "grad_norm": 2.968242990769646, "learning_rate": 6.170404092967185e-06, "loss": 1.2949, "step": 35510 }, { "epoch": 1.0471767654430193, "grad_norm": 2.913969443610597, "learning_rate": 6.169912716023781e-06, "loss": 1.257, "step": 35515 }, { "epoch": 1.0473241928350288, "grad_norm": 2.8710550908422525, "learning_rate": 6.169421267871683e-06, "loss": 1.3402, "step": 35520 }, { "epoch": 1.0474716202270382, "grad_norm": 2.987840049664104, "learning_rate": 6.168929748525351e-06, "loss": 1.3277, "step": 35525 }, { "epoch": 1.0476190476190477, "grad_norm": 4.0492047928889745, "learning_rate": 6.16843815799925e-06, "loss": 1.38, "step": 35530 }, { "epoch": 1.0477664750110571, "grad_norm": 2.87560422438559, "learning_rate": 6.167946496307844e-06, "loss": 1.2144, "step": 35535 }, { "epoch": 1.0479139024030666, "grad_norm": 2.9631742418489586, "learning_rate": 6.167454763465601e-06, "loss": 1.3193, "step": 35540 }, { "epoch": 1.0480613297950758, "grad_norm": 2.9258210191689558, "learning_rate": 6.16696295948699e-06, "loss": 1.3163, "step": 35545 }, { "epoch": 1.0482087571870853, "grad_norm": 2.8879211459897896, "learning_rate": 6.166471084386487e-06, "loss": 1.3189, "step": 35550 }, { "epoch": 1.0483561845790947, "grad_norm": 2.938110880642452, "learning_rate": 6.16597913817856e-06, "loss": 1.3377, "step": 35555 }, { "epoch": 1.0485036119711042, "grad_norm": 2.9579886173617536, "learning_rate": 6.16548712087769e-06, "loss": 1.3063, "step": 35560 }, { "epoch": 1.0486510393631137, "grad_norm": 2.945226683712458, "learning_rate": 6.164995032498353e-06, "loss": 1.2894, "step": 35565 }, { "epoch": 1.0487984667551231, "grad_norm": 3.0594718715392055, "learning_rate": 6.16450287305503e-06, "loss": 1.3283, "step": 35570 }, { "epoch": 1.0489458941471326, "grad_norm": 2.8639745117033826, "learning_rate": 6.164010642562201e-06, "loss": 1.2619, "step": 35575 }, { "epoch": 1.049093321539142, "grad_norm": 3.004169145028506, "learning_rate": 6.163518341034354e-06, "loss": 1.3037, "step": 35580 }, { "epoch": 1.0492407489311515, "grad_norm": 2.836449636014258, "learning_rate": 6.163025968485974e-06, "loss": 1.2132, "step": 35585 }, { "epoch": 1.049388176323161, "grad_norm": 2.7658443026100046, "learning_rate": 6.162533524931548e-06, "loss": 1.3172, "step": 35590 }, { "epoch": 1.0495356037151702, "grad_norm": 2.891170092375932, "learning_rate": 6.162041010385569e-06, "loss": 1.3052, "step": 35595 }, { "epoch": 1.0496830311071796, "grad_norm": 2.9503794104200876, "learning_rate": 6.161548424862529e-06, "loss": 1.3051, "step": 35600 }, { "epoch": 1.049830458499189, "grad_norm": 2.9696520432995164, "learning_rate": 6.1610557683769236e-06, "loss": 1.3131, "step": 35605 }, { "epoch": 1.0499778858911986, "grad_norm": 2.9618217485572824, "learning_rate": 6.160563040943249e-06, "loss": 1.2932, "step": 35610 }, { "epoch": 1.050125313283208, "grad_norm": 2.987642430793263, "learning_rate": 6.1600702425760035e-06, "loss": 1.2883, "step": 35615 }, { "epoch": 1.0502727406752175, "grad_norm": 2.8213165171561685, "learning_rate": 6.15957737328969e-06, "loss": 1.2797, "step": 35620 }, { "epoch": 1.050420168067227, "grad_norm": 2.967238903926186, "learning_rate": 6.1590844330988096e-06, "loss": 1.3358, "step": 35625 }, { "epoch": 1.0505675954592364, "grad_norm": 2.8436028689379103, "learning_rate": 6.158591422017869e-06, "loss": 1.2859, "step": 35630 }, { "epoch": 1.0507150228512459, "grad_norm": 2.7683004844419297, "learning_rate": 6.158098340061377e-06, "loss": 1.2409, "step": 35635 }, { "epoch": 1.050862450243255, "grad_norm": 2.9147638134012275, "learning_rate": 6.15760518724384e-06, "loss": 1.2765, "step": 35640 }, { "epoch": 1.0510098776352645, "grad_norm": 2.976899378607385, "learning_rate": 6.157111963579771e-06, "loss": 1.3027, "step": 35645 }, { "epoch": 1.051157305027274, "grad_norm": 2.993066977661392, "learning_rate": 6.1566186690836855e-06, "loss": 1.3746, "step": 35650 }, { "epoch": 1.0513047324192835, "grad_norm": 2.8181356259136314, "learning_rate": 6.156125303770096e-06, "loss": 1.3562, "step": 35655 }, { "epoch": 1.051452159811293, "grad_norm": 3.0021938162295627, "learning_rate": 6.155631867653524e-06, "loss": 1.3293, "step": 35660 }, { "epoch": 1.0515995872033024, "grad_norm": 3.049342144447716, "learning_rate": 6.155138360748485e-06, "loss": 1.295, "step": 35665 }, { "epoch": 1.0517470145953118, "grad_norm": 2.8723614190858306, "learning_rate": 6.154644783069504e-06, "loss": 1.3245, "step": 35670 }, { "epoch": 1.0518944419873213, "grad_norm": 2.9935478455823117, "learning_rate": 6.154151134631105e-06, "loss": 1.3058, "step": 35675 }, { "epoch": 1.0520418693793308, "grad_norm": 2.840236613976995, "learning_rate": 6.153657415447814e-06, "loss": 1.2946, "step": 35680 }, { "epoch": 1.0521892967713402, "grad_norm": 2.8183515863086903, "learning_rate": 6.153163625534158e-06, "loss": 1.243, "step": 35685 }, { "epoch": 1.0523367241633494, "grad_norm": 2.773791670141752, "learning_rate": 6.152669764904669e-06, "loss": 1.2857, "step": 35690 }, { "epoch": 1.052484151555359, "grad_norm": 2.972724748690587, "learning_rate": 6.152175833573878e-06, "loss": 1.27, "step": 35695 }, { "epoch": 1.0526315789473684, "grad_norm": 3.007096393492546, "learning_rate": 6.15168183155632e-06, "loss": 1.3341, "step": 35700 }, { "epoch": 1.0527790063393778, "grad_norm": 2.8977695258609115, "learning_rate": 6.151187758866532e-06, "loss": 1.3269, "step": 35705 }, { "epoch": 1.0529264337313873, "grad_norm": 2.8867433603830115, "learning_rate": 6.150693615519051e-06, "loss": 1.3321, "step": 35710 }, { "epoch": 1.0530738611233967, "grad_norm": 2.982079951924606, "learning_rate": 6.150199401528421e-06, "loss": 1.2952, "step": 35715 }, { "epoch": 1.0532212885154062, "grad_norm": 3.0107792749853695, "learning_rate": 6.1497051169091815e-06, "loss": 1.3343, "step": 35720 }, { "epoch": 1.0533687159074157, "grad_norm": 2.811844007289902, "learning_rate": 6.14921076167588e-06, "loss": 1.3227, "step": 35725 }, { "epoch": 1.0535161432994251, "grad_norm": 3.0510739045118234, "learning_rate": 6.148716335843061e-06, "loss": 1.3439, "step": 35730 }, { "epoch": 1.0536635706914346, "grad_norm": 2.947710737922037, "learning_rate": 6.148221839425276e-06, "loss": 1.3341, "step": 35735 }, { "epoch": 1.0538109980834438, "grad_norm": 2.997179452146753, "learning_rate": 6.147727272437074e-06, "loss": 1.3196, "step": 35740 }, { "epoch": 1.0539584254754533, "grad_norm": 2.9663800513890277, "learning_rate": 6.147232634893009e-06, "loss": 1.3438, "step": 35745 }, { "epoch": 1.0541058528674627, "grad_norm": 2.8597430953726146, "learning_rate": 6.146737926807637e-06, "loss": 1.328, "step": 35750 }, { "epoch": 1.0542532802594722, "grad_norm": 3.2593721354409038, "learning_rate": 6.146243148195515e-06, "loss": 1.3056, "step": 35755 }, { "epoch": 1.0544007076514816, "grad_norm": 3.0227778265708087, "learning_rate": 6.145748299071202e-06, "loss": 1.3108, "step": 35760 }, { "epoch": 1.054548135043491, "grad_norm": 3.072489342582823, "learning_rate": 6.1452533794492585e-06, "loss": 1.32, "step": 35765 }, { "epoch": 1.0546955624355006, "grad_norm": 2.733594868508629, "learning_rate": 6.14475838934425e-06, "loss": 1.2553, "step": 35770 }, { "epoch": 1.05484298982751, "grad_norm": 2.8901969966061545, "learning_rate": 6.144263328770741e-06, "loss": 1.2858, "step": 35775 }, { "epoch": 1.0549904172195195, "grad_norm": 2.920811077227362, "learning_rate": 6.1437681977433e-06, "loss": 1.3179, "step": 35780 }, { "epoch": 1.055137844611529, "grad_norm": 2.8415554119740705, "learning_rate": 6.143272996276496e-06, "loss": 1.2973, "step": 35785 }, { "epoch": 1.0552852720035382, "grad_norm": 2.780653836951401, "learning_rate": 6.142777724384902e-06, "loss": 1.3204, "step": 35790 }, { "epoch": 1.0554326993955476, "grad_norm": 2.928094455570933, "learning_rate": 6.14228238208309e-06, "loss": 1.3132, "step": 35795 }, { "epoch": 1.055580126787557, "grad_norm": 2.8711340137594186, "learning_rate": 6.141786969385638e-06, "loss": 1.29, "step": 35800 }, { "epoch": 1.0557275541795665, "grad_norm": 2.9075995789624587, "learning_rate": 6.141291486307123e-06, "loss": 1.3094, "step": 35805 }, { "epoch": 1.055874981571576, "grad_norm": 3.0150645722951626, "learning_rate": 6.140795932862125e-06, "loss": 1.2934, "step": 35810 }, { "epoch": 1.0560224089635855, "grad_norm": 2.9208628003835213, "learning_rate": 6.140300309065229e-06, "loss": 1.3071, "step": 35815 }, { "epoch": 1.056169836355595, "grad_norm": 2.845449818129883, "learning_rate": 6.139804614931015e-06, "loss": 1.2806, "step": 35820 }, { "epoch": 1.0563172637476044, "grad_norm": 2.991559096011987, "learning_rate": 6.139308850474072e-06, "loss": 1.269, "step": 35825 }, { "epoch": 1.0564646911396138, "grad_norm": 2.9480750990030153, "learning_rate": 6.138813015708986e-06, "loss": 1.3701, "step": 35830 }, { "epoch": 1.056612118531623, "grad_norm": 2.795568241877947, "learning_rate": 6.138317110650352e-06, "loss": 1.2528, "step": 35835 }, { "epoch": 1.0567595459236325, "grad_norm": 3.0573302135412948, "learning_rate": 6.137821135312758e-06, "loss": 1.2848, "step": 35840 }, { "epoch": 1.056906973315642, "grad_norm": 2.7605895683855874, "learning_rate": 6.137325089710802e-06, "loss": 1.2821, "step": 35845 }, { "epoch": 1.0570544007076514, "grad_norm": 2.776619172383642, "learning_rate": 6.136828973859077e-06, "loss": 1.3119, "step": 35850 }, { "epoch": 1.057201828099661, "grad_norm": 2.8904655900732457, "learning_rate": 6.1363327877721845e-06, "loss": 1.2867, "step": 35855 }, { "epoch": 1.0573492554916704, "grad_norm": 2.755575973274105, "learning_rate": 6.135836531464726e-06, "loss": 1.3228, "step": 35860 }, { "epoch": 1.0574966828836798, "grad_norm": 2.983259169641014, "learning_rate": 6.135340204951303e-06, "loss": 1.3049, "step": 35865 }, { "epoch": 1.0576441102756893, "grad_norm": 2.931890734669197, "learning_rate": 6.134843808246519e-06, "loss": 1.3517, "step": 35870 }, { "epoch": 1.0577915376676987, "grad_norm": 2.9336499885282126, "learning_rate": 6.134347341364985e-06, "loss": 1.28, "step": 35875 }, { "epoch": 1.0579389650597082, "grad_norm": 2.9415745712304524, "learning_rate": 6.133850804321307e-06, "loss": 1.3095, "step": 35880 }, { "epoch": 1.0580863924517174, "grad_norm": 3.0609239395722354, "learning_rate": 6.1333541971300985e-06, "loss": 1.3615, "step": 35885 }, { "epoch": 1.0582338198437269, "grad_norm": 2.9343725381984656, "learning_rate": 6.1328575198059696e-06, "loss": 1.2977, "step": 35890 }, { "epoch": 1.0583812472357363, "grad_norm": 3.0023728243216072, "learning_rate": 6.132360772363536e-06, "loss": 1.3205, "step": 35895 }, { "epoch": 1.0585286746277458, "grad_norm": 2.8784242026182234, "learning_rate": 6.131863954817418e-06, "loss": 1.2823, "step": 35900 }, { "epoch": 1.0586761020197553, "grad_norm": 2.9628929010334732, "learning_rate": 6.1313670671822336e-06, "loss": 1.2971, "step": 35905 }, { "epoch": 1.0588235294117647, "grad_norm": 2.936653418038731, "learning_rate": 6.130870109472603e-06, "loss": 1.3052, "step": 35910 }, { "epoch": 1.0589709568037742, "grad_norm": 2.8904153798326537, "learning_rate": 6.130373081703151e-06, "loss": 1.3207, "step": 35915 }, { "epoch": 1.0591183841957836, "grad_norm": 2.951067040643867, "learning_rate": 6.129875983888504e-06, "loss": 1.2649, "step": 35920 }, { "epoch": 1.059265811587793, "grad_norm": 2.8713392995410367, "learning_rate": 6.129378816043287e-06, "loss": 1.3506, "step": 35925 }, { "epoch": 1.0594132389798026, "grad_norm": 3.0614918312129427, "learning_rate": 6.128881578182132e-06, "loss": 1.2772, "step": 35930 }, { "epoch": 1.0595606663718118, "grad_norm": 3.1215998065820396, "learning_rate": 6.128384270319671e-06, "loss": 1.3216, "step": 35935 }, { "epoch": 1.0597080937638212, "grad_norm": 2.9343481295848517, "learning_rate": 6.127886892470536e-06, "loss": 1.3061, "step": 35940 }, { "epoch": 1.0598555211558307, "grad_norm": 2.8793025276646653, "learning_rate": 6.127389444649364e-06, "loss": 1.349, "step": 35945 }, { "epoch": 1.0600029485478402, "grad_norm": 2.943208638143992, "learning_rate": 6.126891926870794e-06, "loss": 1.2995, "step": 35950 }, { "epoch": 1.0601503759398496, "grad_norm": 2.9359093983260944, "learning_rate": 6.126394339149464e-06, "loss": 1.3276, "step": 35955 }, { "epoch": 1.060297803331859, "grad_norm": 3.027240227398791, "learning_rate": 6.1258966815000165e-06, "loss": 1.3313, "step": 35960 }, { "epoch": 1.0604452307238685, "grad_norm": 2.9166472405193513, "learning_rate": 6.125398953937097e-06, "loss": 1.3537, "step": 35965 }, { "epoch": 1.060592658115878, "grad_norm": 2.8920762991340565, "learning_rate": 6.124901156475351e-06, "loss": 1.2446, "step": 35970 }, { "epoch": 1.0607400855078875, "grad_norm": 2.878706443248932, "learning_rate": 6.1244032891294255e-06, "loss": 1.3648, "step": 35975 }, { "epoch": 1.060887512899897, "grad_norm": 2.9897946273413663, "learning_rate": 6.1239053519139735e-06, "loss": 1.3255, "step": 35980 }, { "epoch": 1.0610349402919061, "grad_norm": 2.916842213181693, "learning_rate": 6.123407344843645e-06, "loss": 1.3284, "step": 35985 }, { "epoch": 1.0611823676839156, "grad_norm": 3.0350805274267305, "learning_rate": 6.122909267933095e-06, "loss": 1.3677, "step": 35990 }, { "epoch": 1.061329795075925, "grad_norm": 2.8243378590335615, "learning_rate": 6.122411121196979e-06, "loss": 1.3297, "step": 35995 }, { "epoch": 1.0614772224679345, "grad_norm": 3.2762023799866835, "learning_rate": 6.121912904649959e-06, "loss": 1.3229, "step": 36000 }, { "epoch": 1.0614772224679345, "eval_loss": 1.129478096961975, "eval_runtime": 4.5381, "eval_samples_per_second": 87.262, "eval_steps_per_second": 2.865, "step": 36000 }, { "epoch": 1.061624649859944, "grad_norm": 2.8733415373269766, "learning_rate": 6.121414618306693e-06, "loss": 1.303, "step": 36005 }, { "epoch": 1.0617720772519534, "grad_norm": 2.9342305985384787, "learning_rate": 6.120916262181844e-06, "loss": 1.3111, "step": 36010 }, { "epoch": 1.061919504643963, "grad_norm": 2.8834074841879795, "learning_rate": 6.1204178362900755e-06, "loss": 1.3056, "step": 36015 }, { "epoch": 1.0620669320359724, "grad_norm": 3.0197513986181908, "learning_rate": 6.1199193406460565e-06, "loss": 1.3161, "step": 36020 }, { "epoch": 1.0622143594279818, "grad_norm": 2.8637124398286242, "learning_rate": 6.119420775264454e-06, "loss": 1.274, "step": 36025 }, { "epoch": 1.062361786819991, "grad_norm": 2.879615980961504, "learning_rate": 6.11892214015994e-06, "loss": 1.3322, "step": 36030 }, { "epoch": 1.0625092142120005, "grad_norm": 3.012646767969855, "learning_rate": 6.118423435347186e-06, "loss": 1.3068, "step": 36035 }, { "epoch": 1.06265664160401, "grad_norm": 2.973894921497426, "learning_rate": 6.1179246608408695e-06, "loss": 1.302, "step": 36040 }, { "epoch": 1.0628040689960194, "grad_norm": 2.8886410332168664, "learning_rate": 6.117425816655665e-06, "loss": 1.3146, "step": 36045 }, { "epoch": 1.0629514963880289, "grad_norm": 2.9686697883232793, "learning_rate": 6.116926902806253e-06, "loss": 1.2802, "step": 36050 }, { "epoch": 1.0630989237800383, "grad_norm": 3.0254706183090874, "learning_rate": 6.116427919307314e-06, "loss": 1.3838, "step": 36055 }, { "epoch": 1.0632463511720478, "grad_norm": 2.9374692692813684, "learning_rate": 6.115928866173532e-06, "loss": 1.2821, "step": 36060 }, { "epoch": 1.0633937785640573, "grad_norm": 3.025956021572536, "learning_rate": 6.115429743419591e-06, "loss": 1.3287, "step": 36065 }, { "epoch": 1.0635412059560667, "grad_norm": 2.811203654843443, "learning_rate": 6.114930551060178e-06, "loss": 1.3026, "step": 36070 }, { "epoch": 1.0636886333480762, "grad_norm": 2.970102253507171, "learning_rate": 6.1144312891099844e-06, "loss": 1.3605, "step": 36075 }, { "epoch": 1.0638360607400854, "grad_norm": 3.0354882769288256, "learning_rate": 6.1139319575836995e-06, "loss": 1.3101, "step": 36080 }, { "epoch": 1.0639834881320949, "grad_norm": 2.862521618394923, "learning_rate": 6.113432556496017e-06, "loss": 1.3318, "step": 36085 }, { "epoch": 1.0641309155241043, "grad_norm": 2.941178764576579, "learning_rate": 6.112933085861632e-06, "loss": 1.3301, "step": 36090 }, { "epoch": 1.0642783429161138, "grad_norm": 3.0194721669451825, "learning_rate": 6.112433545695246e-06, "loss": 1.3329, "step": 36095 }, { "epoch": 1.0644257703081232, "grad_norm": 2.9272284250822302, "learning_rate": 6.111933936011553e-06, "loss": 1.3692, "step": 36100 }, { "epoch": 1.0645731977001327, "grad_norm": 2.770585112034809, "learning_rate": 6.111434256825257e-06, "loss": 1.2553, "step": 36105 }, { "epoch": 1.0647206250921422, "grad_norm": 3.0380291826987214, "learning_rate": 6.110934508151061e-06, "loss": 1.3142, "step": 36110 }, { "epoch": 1.0648680524841516, "grad_norm": 2.860391036013925, "learning_rate": 6.110434690003672e-06, "loss": 1.3386, "step": 36115 }, { "epoch": 1.065015479876161, "grad_norm": 2.941739642172182, "learning_rate": 6.109934802397797e-06, "loss": 1.2941, "step": 36120 }, { "epoch": 1.0651629072681703, "grad_norm": 3.070277413193423, "learning_rate": 6.1094348453481445e-06, "loss": 1.3087, "step": 36125 }, { "epoch": 1.0653103346601798, "grad_norm": 3.1439073357660274, "learning_rate": 6.1089348188694285e-06, "loss": 1.3084, "step": 36130 }, { "epoch": 1.0654577620521892, "grad_norm": 2.965216367563121, "learning_rate": 6.108434722976362e-06, "loss": 1.2903, "step": 36135 }, { "epoch": 1.0656051894441987, "grad_norm": 3.002888735222194, "learning_rate": 6.107934557683661e-06, "loss": 1.3136, "step": 36140 }, { "epoch": 1.0657526168362081, "grad_norm": 3.0637111108428163, "learning_rate": 6.107434323006043e-06, "loss": 1.3452, "step": 36145 }, { "epoch": 1.0659000442282176, "grad_norm": 2.8823167928494446, "learning_rate": 6.1069340189582275e-06, "loss": 1.2983, "step": 36150 }, { "epoch": 1.066047471620227, "grad_norm": 3.0833941682950243, "learning_rate": 6.106433645554937e-06, "loss": 1.334, "step": 36155 }, { "epoch": 1.0661948990122365, "grad_norm": 3.1062221500026324, "learning_rate": 6.105933202810897e-06, "loss": 1.4144, "step": 36160 }, { "epoch": 1.066342326404246, "grad_norm": 2.9089892053187807, "learning_rate": 6.105432690740831e-06, "loss": 1.2555, "step": 36165 }, { "epoch": 1.0664897537962554, "grad_norm": 3.088099473745136, "learning_rate": 6.104932109359469e-06, "loss": 1.3269, "step": 36170 }, { "epoch": 1.066637181188265, "grad_norm": 2.974472892028615, "learning_rate": 6.1044314586815396e-06, "loss": 1.3713, "step": 36175 }, { "epoch": 1.0667846085802741, "grad_norm": 2.946714728261921, "learning_rate": 6.103930738721778e-06, "loss": 1.3312, "step": 36180 }, { "epoch": 1.0669320359722836, "grad_norm": 2.8644255051813743, "learning_rate": 6.103429949494915e-06, "loss": 1.2925, "step": 36185 }, { "epoch": 1.067079463364293, "grad_norm": 2.8263766500689536, "learning_rate": 6.102929091015689e-06, "loss": 1.2903, "step": 36190 }, { "epoch": 1.0672268907563025, "grad_norm": 2.81497539187718, "learning_rate": 6.102428163298838e-06, "loss": 1.2901, "step": 36195 }, { "epoch": 1.067374318148312, "grad_norm": 2.8234789323297838, "learning_rate": 6.101927166359101e-06, "loss": 1.2574, "step": 36200 }, { "epoch": 1.0675217455403214, "grad_norm": 2.978623199077123, "learning_rate": 6.101426100211223e-06, "loss": 1.2897, "step": 36205 }, { "epoch": 1.0676691729323309, "grad_norm": 2.9424532796190617, "learning_rate": 6.100924964869947e-06, "loss": 1.3222, "step": 36210 }, { "epoch": 1.0678166003243403, "grad_norm": 2.9121738990823247, "learning_rate": 6.100423760350018e-06, "loss": 1.3107, "step": 36215 }, { "epoch": 1.0679640277163498, "grad_norm": 3.047254575862186, "learning_rate": 6.099922486666187e-06, "loss": 1.3557, "step": 36220 }, { "epoch": 1.068111455108359, "grad_norm": 2.7770205358729894, "learning_rate": 6.099421143833202e-06, "loss": 1.3167, "step": 36225 }, { "epoch": 1.0682588825003685, "grad_norm": 2.914445824286397, "learning_rate": 6.098919731865818e-06, "loss": 1.2766, "step": 36230 }, { "epoch": 1.068406309892378, "grad_norm": 2.847711871515048, "learning_rate": 6.098418250778789e-06, "loss": 1.3711, "step": 36235 }, { "epoch": 1.0685537372843874, "grad_norm": 2.744928045360582, "learning_rate": 6.0979167005868715e-06, "loss": 1.2751, "step": 36240 }, { "epoch": 1.0687011646763969, "grad_norm": 2.963926431447226, "learning_rate": 6.097415081304824e-06, "loss": 1.3001, "step": 36245 }, { "epoch": 1.0688485920684063, "grad_norm": 2.855453898129689, "learning_rate": 6.096913392947407e-06, "loss": 1.3255, "step": 36250 }, { "epoch": 1.0689960194604158, "grad_norm": 2.8323122257875615, "learning_rate": 6.0964116355293835e-06, "loss": 1.2742, "step": 36255 }, { "epoch": 1.0691434468524252, "grad_norm": 2.987697744652499, "learning_rate": 6.095909809065518e-06, "loss": 1.3336, "step": 36260 }, { "epoch": 1.0692908742444347, "grad_norm": 2.8908819228550326, "learning_rate": 6.095407913570578e-06, "loss": 1.2771, "step": 36265 }, { "epoch": 1.0694383016364442, "grad_norm": 2.987688267834192, "learning_rate": 6.094905949059333e-06, "loss": 1.2722, "step": 36270 }, { "epoch": 1.0695857290284534, "grad_norm": 3.006523206513343, "learning_rate": 6.094403915546552e-06, "loss": 1.3223, "step": 36275 }, { "epoch": 1.0697331564204628, "grad_norm": 3.0284797637388396, "learning_rate": 6.093901813047008e-06, "loss": 1.3214, "step": 36280 }, { "epoch": 1.0698805838124723, "grad_norm": 2.954671689256018, "learning_rate": 6.0933996415754766e-06, "loss": 1.2684, "step": 36285 }, { "epoch": 1.0700280112044818, "grad_norm": 3.0342910446595104, "learning_rate": 6.0928974011467365e-06, "loss": 1.2846, "step": 36290 }, { "epoch": 1.0701754385964912, "grad_norm": 2.932875326090357, "learning_rate": 6.092395091775563e-06, "loss": 1.2782, "step": 36295 }, { "epoch": 1.0703228659885007, "grad_norm": 2.9470354644327443, "learning_rate": 6.091892713476741e-06, "loss": 1.3474, "step": 36300 }, { "epoch": 1.0704702933805101, "grad_norm": 3.0818131861345313, "learning_rate": 6.091390266265051e-06, "loss": 1.2969, "step": 36305 }, { "epoch": 1.0706177207725196, "grad_norm": 2.9025322836949408, "learning_rate": 6.0908877501552775e-06, "loss": 1.2654, "step": 36310 }, { "epoch": 1.070765148164529, "grad_norm": 3.2182866433562234, "learning_rate": 6.0903851651622115e-06, "loss": 1.2754, "step": 36315 }, { "epoch": 1.0709125755565383, "grad_norm": 3.0513486021695373, "learning_rate": 6.089882511300638e-06, "loss": 1.3119, "step": 36320 }, { "epoch": 1.0710600029485478, "grad_norm": 3.1935550820404206, "learning_rate": 6.089379788585349e-06, "loss": 1.3114, "step": 36325 }, { "epoch": 1.0712074303405572, "grad_norm": 2.88253999352911, "learning_rate": 6.08887699703114e-06, "loss": 1.2889, "step": 36330 }, { "epoch": 1.0713548577325667, "grad_norm": 2.889707922076829, "learning_rate": 6.088374136652805e-06, "loss": 1.2981, "step": 36335 }, { "epoch": 1.0715022851245761, "grad_norm": 2.8696379730381674, "learning_rate": 6.0878712074651406e-06, "loss": 1.3221, "step": 36340 }, { "epoch": 1.0716497125165856, "grad_norm": 3.059009978027112, "learning_rate": 6.087368209482946e-06, "loss": 1.3049, "step": 36345 }, { "epoch": 1.071797139908595, "grad_norm": 2.956004509193361, "learning_rate": 6.086865142721022e-06, "loss": 1.3218, "step": 36350 }, { "epoch": 1.0719445673006045, "grad_norm": 3.0457358763833775, "learning_rate": 6.086362007194174e-06, "loss": 1.311, "step": 36355 }, { "epoch": 1.072091994692614, "grad_norm": 2.990570749606425, "learning_rate": 6.085858802917205e-06, "loss": 1.3165, "step": 36360 }, { "epoch": 1.0722394220846234, "grad_norm": 2.8120841181475136, "learning_rate": 6.085355529904924e-06, "loss": 1.3055, "step": 36365 }, { "epoch": 1.0723868494766329, "grad_norm": 2.741415610701446, "learning_rate": 6.0848521881721406e-06, "loss": 1.2937, "step": 36370 }, { "epoch": 1.0725342768686421, "grad_norm": 2.861659040112488, "learning_rate": 6.084348777733664e-06, "loss": 1.3036, "step": 36375 }, { "epoch": 1.0726817042606516, "grad_norm": 3.0822660438439993, "learning_rate": 6.083845298604309e-06, "loss": 1.3003, "step": 36380 }, { "epoch": 1.072829131652661, "grad_norm": 2.852753219209137, "learning_rate": 6.0833417507988926e-06, "loss": 1.3001, "step": 36385 }, { "epoch": 1.0729765590446705, "grad_norm": 2.8237096969950337, "learning_rate": 6.082838134332229e-06, "loss": 1.2606, "step": 36390 }, { "epoch": 1.07312398643668, "grad_norm": 2.9824002833280168, "learning_rate": 6.082334449219141e-06, "loss": 1.2882, "step": 36395 }, { "epoch": 1.0732714138286894, "grad_norm": 2.9691064422052804, "learning_rate": 6.0818306954744494e-06, "loss": 1.3383, "step": 36400 }, { "epoch": 1.0734188412206989, "grad_norm": 2.906927241420846, "learning_rate": 6.081326873112975e-06, "loss": 1.3099, "step": 36405 }, { "epoch": 1.0735662686127083, "grad_norm": 3.433281550830158, "learning_rate": 6.080822982149545e-06, "loss": 1.3105, "step": 36410 }, { "epoch": 1.0737136960047178, "grad_norm": 2.9615660625825257, "learning_rate": 6.080319022598988e-06, "loss": 1.3192, "step": 36415 }, { "epoch": 1.073861123396727, "grad_norm": 3.00059684410664, "learning_rate": 6.079814994476134e-06, "loss": 1.349, "step": 36420 }, { "epoch": 1.0740085507887365, "grad_norm": 3.0762000278804607, "learning_rate": 6.079310897795811e-06, "loss": 1.2871, "step": 36425 }, { "epoch": 1.074155978180746, "grad_norm": 2.87886951136664, "learning_rate": 6.078806732572857e-06, "loss": 1.3255, "step": 36430 }, { "epoch": 1.0743034055727554, "grad_norm": 2.8272148387451614, "learning_rate": 6.078302498822105e-06, "loss": 1.3042, "step": 36435 }, { "epoch": 1.0744508329647648, "grad_norm": 2.893168528356708, "learning_rate": 6.077798196558393e-06, "loss": 1.337, "step": 36440 }, { "epoch": 1.0745982603567743, "grad_norm": 2.905496264956328, "learning_rate": 6.077293825796562e-06, "loss": 1.3159, "step": 36445 }, { "epoch": 1.0747456877487838, "grad_norm": 2.8974514798662896, "learning_rate": 6.076789386551452e-06, "loss": 1.2957, "step": 36450 }, { "epoch": 1.0748931151407932, "grad_norm": 2.9765098449288043, "learning_rate": 6.076284878837906e-06, "loss": 1.288, "step": 36455 }, { "epoch": 1.0750405425328027, "grad_norm": 2.8312851186162877, "learning_rate": 6.075780302670773e-06, "loss": 1.3669, "step": 36460 }, { "epoch": 1.0751879699248121, "grad_norm": 2.956061950154361, "learning_rate": 6.075275658064899e-06, "loss": 1.3268, "step": 36465 }, { "epoch": 1.0753353973168214, "grad_norm": 3.066840088432496, "learning_rate": 6.074770945035134e-06, "loss": 1.3163, "step": 36470 }, { "epoch": 1.0754828247088308, "grad_norm": 2.9818473695038934, "learning_rate": 6.074266163596327e-06, "loss": 1.3142, "step": 36475 }, { "epoch": 1.0756302521008403, "grad_norm": 3.067294504313038, "learning_rate": 6.073761313763335e-06, "loss": 1.3272, "step": 36480 }, { "epoch": 1.0757776794928497, "grad_norm": 2.8573732941356127, "learning_rate": 6.073256395551015e-06, "loss": 1.2733, "step": 36485 }, { "epoch": 1.0759251068848592, "grad_norm": 2.8441285594531966, "learning_rate": 6.07275140897422e-06, "loss": 1.3103, "step": 36490 }, { "epoch": 1.0760725342768687, "grad_norm": 2.869064874454892, "learning_rate": 6.072246354047814e-06, "loss": 1.2886, "step": 36495 }, { "epoch": 1.0762199616688781, "grad_norm": 2.9443044167370758, "learning_rate": 6.071741230786655e-06, "loss": 1.2872, "step": 36500 }, { "epoch": 1.0762199616688781, "eval_loss": 1.128989815711975, "eval_runtime": 4.1933, "eval_samples_per_second": 94.435, "eval_steps_per_second": 3.1, "step": 36500 }, { "epoch": 1.0763673890608876, "grad_norm": 2.8342108287412464, "learning_rate": 6.071236039205612e-06, "loss": 1.2876, "step": 36505 }, { "epoch": 1.076514816452897, "grad_norm": 3.0534001864153266, "learning_rate": 6.0707307793195475e-06, "loss": 1.3208, "step": 36510 }, { "epoch": 1.0766622438449063, "grad_norm": 2.9594806152708517, "learning_rate": 6.070225451143329e-06, "loss": 1.2767, "step": 36515 }, { "epoch": 1.0768096712369157, "grad_norm": 2.988357873824673, "learning_rate": 6.069720054691827e-06, "loss": 1.3192, "step": 36520 }, { "epoch": 1.0769570986289252, "grad_norm": 3.025879630901423, "learning_rate": 6.0692145899799135e-06, "loss": 1.2864, "step": 36525 }, { "epoch": 1.0771045260209346, "grad_norm": 3.009302358566407, "learning_rate": 6.068709057022463e-06, "loss": 1.3141, "step": 36530 }, { "epoch": 1.077251953412944, "grad_norm": 2.905748199005529, "learning_rate": 6.068203455834351e-06, "loss": 1.3017, "step": 36535 }, { "epoch": 1.0773993808049536, "grad_norm": 2.8820725088566435, "learning_rate": 6.067697786430454e-06, "loss": 1.2716, "step": 36540 }, { "epoch": 1.077546808196963, "grad_norm": 2.837659377873651, "learning_rate": 6.067192048825654e-06, "loss": 1.2693, "step": 36545 }, { "epoch": 1.0776942355889725, "grad_norm": 2.9019473200075865, "learning_rate": 6.066686243034833e-06, "loss": 1.2812, "step": 36550 }, { "epoch": 1.077841662980982, "grad_norm": 3.076876265674727, "learning_rate": 6.066180369072873e-06, "loss": 1.333, "step": 36555 }, { "epoch": 1.0779890903729914, "grad_norm": 2.759564945220373, "learning_rate": 6.06567442695466e-06, "loss": 1.3313, "step": 36560 }, { "epoch": 1.0781365177650006, "grad_norm": 2.9780626627129765, "learning_rate": 6.0651684166950835e-06, "loss": 1.2985, "step": 36565 }, { "epoch": 1.07828394515701, "grad_norm": 2.98035412585308, "learning_rate": 6.0646623383090325e-06, "loss": 1.2996, "step": 36570 }, { "epoch": 1.0784313725490196, "grad_norm": 3.0479898153903933, "learning_rate": 6.064156191811399e-06, "loss": 1.2876, "step": 36575 }, { "epoch": 1.078578799941029, "grad_norm": 2.9041758451652124, "learning_rate": 6.063649977217077e-06, "loss": 1.2744, "step": 36580 }, { "epoch": 1.0787262273330385, "grad_norm": 3.0184226446846902, "learning_rate": 6.063143694540963e-06, "loss": 1.352, "step": 36585 }, { "epoch": 1.078873654725048, "grad_norm": 2.82908749899752, "learning_rate": 6.062637343797954e-06, "loss": 1.3542, "step": 36590 }, { "epoch": 1.0790210821170574, "grad_norm": 2.860833385702344, "learning_rate": 6.062130925002951e-06, "loss": 1.3122, "step": 36595 }, { "epoch": 1.0791685095090668, "grad_norm": 3.044717672530315, "learning_rate": 6.0616244381708555e-06, "loss": 1.3047, "step": 36600 }, { "epoch": 1.0793159369010763, "grad_norm": 2.8868306522731717, "learning_rate": 6.06111788331657e-06, "loss": 1.3593, "step": 36605 }, { "epoch": 1.0794633642930858, "grad_norm": 3.0084036512128334, "learning_rate": 6.060611260455003e-06, "loss": 1.3076, "step": 36610 }, { "epoch": 1.079610791685095, "grad_norm": 2.911162821421004, "learning_rate": 6.060104569601061e-06, "loss": 1.3217, "step": 36615 }, { "epoch": 1.0797582190771045, "grad_norm": 2.8748504720475583, "learning_rate": 6.059597810769655e-06, "loss": 1.3445, "step": 36620 }, { "epoch": 1.079905646469114, "grad_norm": 3.1309559312179913, "learning_rate": 6.059090983975694e-06, "loss": 1.3464, "step": 36625 }, { "epoch": 1.0800530738611234, "grad_norm": 2.8472313548778208, "learning_rate": 6.058584089234095e-06, "loss": 1.2982, "step": 36630 }, { "epoch": 1.0802005012531328, "grad_norm": 2.909226564008916, "learning_rate": 6.0580771265597735e-06, "loss": 1.3366, "step": 36635 }, { "epoch": 1.0803479286451423, "grad_norm": 2.83819925015423, "learning_rate": 6.057570095967647e-06, "loss": 1.3436, "step": 36640 }, { "epoch": 1.0804953560371517, "grad_norm": 2.782940619732421, "learning_rate": 6.057062997472635e-06, "loss": 1.3427, "step": 36645 }, { "epoch": 1.0806427834291612, "grad_norm": 2.9815972699998805, "learning_rate": 6.056555831089661e-06, "loss": 1.3517, "step": 36650 }, { "epoch": 1.0807902108211707, "grad_norm": 3.0368547925452387, "learning_rate": 6.056048596833646e-06, "loss": 1.3131, "step": 36655 }, { "epoch": 1.0809376382131801, "grad_norm": 2.696307625415169, "learning_rate": 6.05554129471952e-06, "loss": 1.2551, "step": 36660 }, { "epoch": 1.0810850656051894, "grad_norm": 2.8875555660508776, "learning_rate": 6.055033924762207e-06, "loss": 1.3226, "step": 36665 }, { "epoch": 1.0812324929971988, "grad_norm": 2.9988660666991294, "learning_rate": 6.05452648697664e-06, "loss": 1.2542, "step": 36670 }, { "epoch": 1.0813799203892083, "grad_norm": 3.025941713349638, "learning_rate": 6.054018981377748e-06, "loss": 1.3416, "step": 36675 }, { "epoch": 1.0815273477812177, "grad_norm": 2.9013554264634265, "learning_rate": 6.053511407980468e-06, "loss": 1.3293, "step": 36680 }, { "epoch": 1.0816747751732272, "grad_norm": 3.048694702778849, "learning_rate": 6.0530037667997336e-06, "loss": 1.3075, "step": 36685 }, { "epoch": 1.0818222025652366, "grad_norm": 2.91754947093681, "learning_rate": 6.052496057850484e-06, "loss": 1.3531, "step": 36690 }, { "epoch": 1.081969629957246, "grad_norm": 2.9607346813393827, "learning_rate": 6.051988281147659e-06, "loss": 1.3429, "step": 36695 }, { "epoch": 1.0821170573492556, "grad_norm": 2.8553160671902744, "learning_rate": 6.0514804367061995e-06, "loss": 1.3378, "step": 36700 }, { "epoch": 1.082264484741265, "grad_norm": 2.8969086672217967, "learning_rate": 6.05097252454105e-06, "loss": 1.3143, "step": 36705 }, { "epoch": 1.0824119121332743, "grad_norm": 2.989083379814883, "learning_rate": 6.050464544667157e-06, "loss": 1.3315, "step": 36710 }, { "epoch": 1.0825593395252837, "grad_norm": 2.963432546043167, "learning_rate": 6.049956497099468e-06, "loss": 1.2943, "step": 36715 }, { "epoch": 1.0827067669172932, "grad_norm": 3.001906551243653, "learning_rate": 6.0494483818529335e-06, "loss": 1.3235, "step": 36720 }, { "epoch": 1.0828541943093026, "grad_norm": 2.865052136883099, "learning_rate": 6.048940198942504e-06, "loss": 1.3047, "step": 36725 }, { "epoch": 1.083001621701312, "grad_norm": 2.8456423079852957, "learning_rate": 6.048431948383134e-06, "loss": 1.2772, "step": 36730 }, { "epoch": 1.0831490490933215, "grad_norm": 2.962988744650054, "learning_rate": 6.047923630189779e-06, "loss": 1.3198, "step": 36735 }, { "epoch": 1.083296476485331, "grad_norm": 3.011078175476215, "learning_rate": 6.047415244377399e-06, "loss": 1.32, "step": 36740 }, { "epoch": 1.0834439038773405, "grad_norm": 3.0259098470185486, "learning_rate": 6.0469067909609495e-06, "loss": 1.315, "step": 36745 }, { "epoch": 1.08359133126935, "grad_norm": 2.9426264190000637, "learning_rate": 6.046398269955396e-06, "loss": 1.3404, "step": 36750 }, { "epoch": 1.0837387586613594, "grad_norm": 2.8851913550101806, "learning_rate": 6.045889681375702e-06, "loss": 1.2841, "step": 36755 }, { "epoch": 1.0838861860533686, "grad_norm": 2.8601153303291738, "learning_rate": 6.045381025236831e-06, "loss": 1.2941, "step": 36760 }, { "epoch": 1.084033613445378, "grad_norm": 2.9125142442593024, "learning_rate": 6.0448723015537534e-06, "loss": 1.3023, "step": 36765 }, { "epoch": 1.0841810408373875, "grad_norm": 2.954739767849567, "learning_rate": 6.0443635103414365e-06, "loss": 1.3531, "step": 36770 }, { "epoch": 1.084328468229397, "grad_norm": 2.970125095093763, "learning_rate": 6.043854651614854e-06, "loss": 1.3588, "step": 36775 }, { "epoch": 1.0844758956214064, "grad_norm": 2.797445038683611, "learning_rate": 6.043345725388981e-06, "loss": 1.3463, "step": 36780 }, { "epoch": 1.084623323013416, "grad_norm": 2.9132291079250425, "learning_rate": 6.04283673167879e-06, "loss": 1.3423, "step": 36785 }, { "epoch": 1.0847707504054254, "grad_norm": 2.9707077493737897, "learning_rate": 6.042327670499259e-06, "loss": 1.2967, "step": 36790 }, { "epoch": 1.0849181777974348, "grad_norm": 3.016806659620994, "learning_rate": 6.04181854186537e-06, "loss": 1.3492, "step": 36795 }, { "epoch": 1.0850656051894443, "grad_norm": 3.0050762802684527, "learning_rate": 6.041309345792103e-06, "loss": 1.2596, "step": 36800 }, { "epoch": 1.0852130325814535, "grad_norm": 3.110135840015545, "learning_rate": 6.040800082294443e-06, "loss": 1.3528, "step": 36805 }, { "epoch": 1.085360459973463, "grad_norm": 2.9386090929428876, "learning_rate": 6.040290751387373e-06, "loss": 1.2573, "step": 36810 }, { "epoch": 1.0855078873654724, "grad_norm": 2.990563286065844, "learning_rate": 6.039781353085883e-06, "loss": 1.3327, "step": 36815 }, { "epoch": 1.085655314757482, "grad_norm": 2.874307253735379, "learning_rate": 6.039271887404963e-06, "loss": 1.3424, "step": 36820 }, { "epoch": 1.0858027421494914, "grad_norm": 3.0353863065048228, "learning_rate": 6.038762354359601e-06, "loss": 1.361, "step": 36825 }, { "epoch": 1.0859501695415008, "grad_norm": 3.076891475476391, "learning_rate": 6.038252753964795e-06, "loss": 1.3305, "step": 36830 }, { "epoch": 1.0860975969335103, "grad_norm": 2.8704184616407433, "learning_rate": 6.03774308623554e-06, "loss": 1.3642, "step": 36835 }, { "epoch": 1.0862450243255197, "grad_norm": 2.745436015256803, "learning_rate": 6.037233351186832e-06, "loss": 1.2855, "step": 36840 }, { "epoch": 1.0863924517175292, "grad_norm": 2.8354761739928898, "learning_rate": 6.036723548833669e-06, "loss": 1.3059, "step": 36845 }, { "epoch": 1.0865398791095386, "grad_norm": 2.9372626308824845, "learning_rate": 6.036213679191056e-06, "loss": 1.2412, "step": 36850 }, { "epoch": 1.086687306501548, "grad_norm": 3.119410515037176, "learning_rate": 6.0357037422739956e-06, "loss": 1.295, "step": 36855 }, { "epoch": 1.0868347338935573, "grad_norm": 2.9614150659322886, "learning_rate": 6.035193738097491e-06, "loss": 1.2784, "step": 36860 }, { "epoch": 1.0869821612855668, "grad_norm": 2.9524498307260822, "learning_rate": 6.034683666676552e-06, "loss": 1.298, "step": 36865 }, { "epoch": 1.0871295886775763, "grad_norm": 2.9107583568755793, "learning_rate": 6.034173528026186e-06, "loss": 1.3087, "step": 36870 }, { "epoch": 1.0872770160695857, "grad_norm": 2.9589794044189284, "learning_rate": 6.0336633221614064e-06, "loss": 1.3167, "step": 36875 }, { "epoch": 1.0874244434615952, "grad_norm": 2.8511067632147213, "learning_rate": 6.033153049097227e-06, "loss": 1.3187, "step": 36880 }, { "epoch": 1.0875718708536046, "grad_norm": 2.782820647515701, "learning_rate": 6.032642708848661e-06, "loss": 1.2844, "step": 36885 }, { "epoch": 1.087719298245614, "grad_norm": 2.9114957656755993, "learning_rate": 6.032132301430727e-06, "loss": 1.3452, "step": 36890 }, { "epoch": 1.0878667256376235, "grad_norm": 2.8730060349909405, "learning_rate": 6.0316218268584435e-06, "loss": 1.2907, "step": 36895 }, { "epoch": 1.088014153029633, "grad_norm": 2.9478891252936577, "learning_rate": 6.031111285146833e-06, "loss": 1.3082, "step": 36900 }, { "epoch": 1.0881615804216422, "grad_norm": 2.9180794137050565, "learning_rate": 6.0306006763109185e-06, "loss": 1.2809, "step": 36905 }, { "epoch": 1.0883090078136517, "grad_norm": 3.014086339976704, "learning_rate": 6.0300900003657245e-06, "loss": 1.3183, "step": 36910 }, { "epoch": 1.0884564352056612, "grad_norm": 2.9434956700952983, "learning_rate": 6.029579257326279e-06, "loss": 1.3143, "step": 36915 }, { "epoch": 1.0886038625976706, "grad_norm": 2.9717784556401186, "learning_rate": 6.02906844720761e-06, "loss": 1.3611, "step": 36920 }, { "epoch": 1.08875128998968, "grad_norm": 2.786742621227801, "learning_rate": 6.028557570024751e-06, "loss": 1.3244, "step": 36925 }, { "epoch": 1.0888987173816895, "grad_norm": 2.896665957175971, "learning_rate": 6.028046625792733e-06, "loss": 1.283, "step": 36930 }, { "epoch": 1.089046144773699, "grad_norm": 2.8494273463904665, "learning_rate": 6.027535614526591e-06, "loss": 1.3283, "step": 36935 }, { "epoch": 1.0891935721657084, "grad_norm": 2.8982898940396695, "learning_rate": 6.027024536241364e-06, "loss": 1.3188, "step": 36940 }, { "epoch": 1.089340999557718, "grad_norm": 2.7913711702905544, "learning_rate": 6.026513390952089e-06, "loss": 1.3202, "step": 36945 }, { "epoch": 1.0894884269497274, "grad_norm": 2.8802804190864246, "learning_rate": 6.026002178673809e-06, "loss": 1.3647, "step": 36950 }, { "epoch": 1.0896358543417366, "grad_norm": 3.1218628105297883, "learning_rate": 6.025490899421565e-06, "loss": 1.2996, "step": 36955 }, { "epoch": 1.089783281733746, "grad_norm": 3.033754679283563, "learning_rate": 6.024979553210403e-06, "loss": 1.3057, "step": 36960 }, { "epoch": 1.0899307091257555, "grad_norm": 2.680218166821338, "learning_rate": 6.02446814005537e-06, "loss": 1.3403, "step": 36965 }, { "epoch": 1.090078136517765, "grad_norm": 2.9918640802157666, "learning_rate": 6.023956659971515e-06, "loss": 1.312, "step": 36970 }, { "epoch": 1.0902255639097744, "grad_norm": 2.897000539511732, "learning_rate": 6.023445112973889e-06, "loss": 1.3061, "step": 36975 }, { "epoch": 1.090372991301784, "grad_norm": 3.066143483892612, "learning_rate": 6.022933499077543e-06, "loss": 1.3058, "step": 36980 }, { "epoch": 1.0905204186937933, "grad_norm": 2.8402007890924548, "learning_rate": 6.022421818297535e-06, "loss": 1.3212, "step": 36985 }, { "epoch": 1.0906678460858028, "grad_norm": 2.911074548835903, "learning_rate": 6.021910070648921e-06, "loss": 1.3486, "step": 36990 }, { "epoch": 1.0908152734778123, "grad_norm": 2.8880800134169813, "learning_rate": 6.021398256146757e-06, "loss": 1.3172, "step": 36995 }, { "epoch": 1.0909627008698215, "grad_norm": 2.965770127000523, "learning_rate": 6.020886374806106e-06, "loss": 1.346, "step": 37000 }, { "epoch": 1.0909627008698215, "eval_loss": 1.1260353326797485, "eval_runtime": 4.3061, "eval_samples_per_second": 91.963, "eval_steps_per_second": 3.019, "step": 37000 }, { "epoch": 1.091110128261831, "grad_norm": 3.0601184196780946, "learning_rate": 6.020374426642032e-06, "loss": 1.3762, "step": 37005 }, { "epoch": 1.0912575556538404, "grad_norm": 2.9315655813641635, "learning_rate": 6.0198624116695955e-06, "loss": 1.3118, "step": 37010 }, { "epoch": 1.0914049830458499, "grad_norm": 2.886523116147877, "learning_rate": 6.019350329903866e-06, "loss": 1.3296, "step": 37015 }, { "epoch": 1.0915524104378593, "grad_norm": 2.7184480560791924, "learning_rate": 6.018838181359912e-06, "loss": 1.2517, "step": 37020 }, { "epoch": 1.0916998378298688, "grad_norm": 2.935816562538307, "learning_rate": 6.018325966052805e-06, "loss": 1.3069, "step": 37025 }, { "epoch": 1.0918472652218782, "grad_norm": 2.9943986555841713, "learning_rate": 6.017813683997616e-06, "loss": 1.3397, "step": 37030 }, { "epoch": 1.0919946926138877, "grad_norm": 2.9375315303060208, "learning_rate": 6.017301335209419e-06, "loss": 1.2799, "step": 37035 }, { "epoch": 1.0921421200058972, "grad_norm": 2.9754113776421125, "learning_rate": 6.016788919703292e-06, "loss": 1.3292, "step": 37040 }, { "epoch": 1.0922895473979066, "grad_norm": 3.1792671383872166, "learning_rate": 6.016276437494312e-06, "loss": 1.3541, "step": 37045 }, { "epoch": 1.092436974789916, "grad_norm": 2.8631056105699595, "learning_rate": 6.01576388859756e-06, "loss": 1.324, "step": 37050 }, { "epoch": 1.0925844021819253, "grad_norm": 3.0753397443470507, "learning_rate": 6.0152512730281185e-06, "loss": 1.3021, "step": 37055 }, { "epoch": 1.0927318295739348, "grad_norm": 3.098992337984324, "learning_rate": 6.014738590801072e-06, "loss": 1.3008, "step": 37060 }, { "epoch": 1.0928792569659442, "grad_norm": 3.0143402193493105, "learning_rate": 6.014225841931506e-06, "loss": 1.2795, "step": 37065 }, { "epoch": 1.0930266843579537, "grad_norm": 2.8665588437333027, "learning_rate": 6.013713026434509e-06, "loss": 1.3476, "step": 37070 }, { "epoch": 1.0931741117499632, "grad_norm": 3.062059349541976, "learning_rate": 6.0132001443251725e-06, "loss": 1.3259, "step": 37075 }, { "epoch": 1.0933215391419726, "grad_norm": 2.9473925919390034, "learning_rate": 6.012687195618585e-06, "loss": 1.2632, "step": 37080 }, { "epoch": 1.093468966533982, "grad_norm": 2.9899532600120957, "learning_rate": 6.012174180329845e-06, "loss": 1.3354, "step": 37085 }, { "epoch": 1.0936163939259915, "grad_norm": 3.0223555612871036, "learning_rate": 6.011661098474047e-06, "loss": 1.2856, "step": 37090 }, { "epoch": 1.093763821318001, "grad_norm": 3.1225089551831555, "learning_rate": 6.0111479500662875e-06, "loss": 1.307, "step": 37095 }, { "epoch": 1.0939112487100102, "grad_norm": 2.8949632859843013, "learning_rate": 6.010634735121667e-06, "loss": 1.3481, "step": 37100 }, { "epoch": 1.0940586761020197, "grad_norm": 2.9464768321459367, "learning_rate": 6.010121453655289e-06, "loss": 1.3476, "step": 37105 }, { "epoch": 1.0942061034940291, "grad_norm": 2.8431656712639213, "learning_rate": 6.009608105682256e-06, "loss": 1.3144, "step": 37110 }, { "epoch": 1.0943535308860386, "grad_norm": 3.0400762459115187, "learning_rate": 6.009094691217675e-06, "loss": 1.3489, "step": 37115 }, { "epoch": 1.094500958278048, "grad_norm": 3.041218208078162, "learning_rate": 6.008581210276653e-06, "loss": 1.3565, "step": 37120 }, { "epoch": 1.0946483856700575, "grad_norm": 2.881864714147479, "learning_rate": 6.0080676628743e-06, "loss": 1.2813, "step": 37125 }, { "epoch": 1.094795813062067, "grad_norm": 2.990482894053211, "learning_rate": 6.0075540490257265e-06, "loss": 1.3041, "step": 37130 }, { "epoch": 1.0949432404540764, "grad_norm": 2.964204194351562, "learning_rate": 6.007040368746049e-06, "loss": 1.3583, "step": 37135 }, { "epoch": 1.0950906678460859, "grad_norm": 2.9248155391361, "learning_rate": 6.00652662205038e-06, "loss": 1.3122, "step": 37140 }, { "epoch": 1.0952380952380953, "grad_norm": 3.016031383266784, "learning_rate": 6.006012808953839e-06, "loss": 1.3362, "step": 37145 }, { "epoch": 1.0953855226301046, "grad_norm": 3.0280863203600337, "learning_rate": 6.005498929471544e-06, "loss": 1.3258, "step": 37150 }, { "epoch": 1.095532950022114, "grad_norm": 3.0041783370647615, "learning_rate": 6.004984983618618e-06, "loss": 1.3019, "step": 37155 }, { "epoch": 1.0956803774141235, "grad_norm": 2.9681406973226183, "learning_rate": 6.004470971410185e-06, "loss": 1.3152, "step": 37160 }, { "epoch": 1.095827804806133, "grad_norm": 2.88443277650865, "learning_rate": 6.003956892861368e-06, "loss": 1.3176, "step": 37165 }, { "epoch": 1.0959752321981424, "grad_norm": 3.054285491517367, "learning_rate": 6.003442747987296e-06, "loss": 1.3598, "step": 37170 }, { "epoch": 1.0961226595901519, "grad_norm": 2.8235063107005525, "learning_rate": 6.002928536803098e-06, "loss": 1.2624, "step": 37175 }, { "epoch": 1.0962700869821613, "grad_norm": 3.0589143935460577, "learning_rate": 6.002414259323906e-06, "loss": 1.3141, "step": 37180 }, { "epoch": 1.0964175143741708, "grad_norm": 2.9293793729716233, "learning_rate": 6.001899915564852e-06, "loss": 1.3376, "step": 37185 }, { "epoch": 1.0965649417661802, "grad_norm": 2.9830657032138355, "learning_rate": 6.001385505541071e-06, "loss": 1.3159, "step": 37190 }, { "epoch": 1.0967123691581895, "grad_norm": 2.9124160644679837, "learning_rate": 6.000871029267702e-06, "loss": 1.3462, "step": 37195 }, { "epoch": 1.096859796550199, "grad_norm": 3.00350786407978, "learning_rate": 6.000356486759882e-06, "loss": 1.3303, "step": 37200 }, { "epoch": 1.0970072239422084, "grad_norm": 3.0095972437771907, "learning_rate": 5.9998418780327524e-06, "loss": 1.3253, "step": 37205 }, { "epoch": 1.0971546513342179, "grad_norm": 2.898466494766598, "learning_rate": 5.999327203101457e-06, "loss": 1.3224, "step": 37210 }, { "epoch": 1.0973020787262273, "grad_norm": 2.8951505467114482, "learning_rate": 5.99881246198114e-06, "loss": 1.331, "step": 37215 }, { "epoch": 1.0974495061182368, "grad_norm": 3.014512908429324, "learning_rate": 5.998297654686947e-06, "loss": 1.3369, "step": 37220 }, { "epoch": 1.0975969335102462, "grad_norm": 3.022142834169051, "learning_rate": 5.99778278123403e-06, "loss": 1.324, "step": 37225 }, { "epoch": 1.0977443609022557, "grad_norm": 3.012272008234105, "learning_rate": 5.997267841637537e-06, "loss": 1.2839, "step": 37230 }, { "epoch": 1.0978917882942651, "grad_norm": 2.9358705151368527, "learning_rate": 5.996752835912622e-06, "loss": 1.3183, "step": 37235 }, { "epoch": 1.0980392156862746, "grad_norm": 2.8488501441582734, "learning_rate": 5.9962377640744394e-06, "loss": 1.3107, "step": 37240 }, { "epoch": 1.0981866430782838, "grad_norm": 2.9970846018757316, "learning_rate": 5.995722626138147e-06, "loss": 1.2942, "step": 37245 }, { "epoch": 1.0983340704702933, "grad_norm": 2.832789912492581, "learning_rate": 5.9952074221189e-06, "loss": 1.3165, "step": 37250 }, { "epoch": 1.0984814978623028, "grad_norm": 2.917114410431144, "learning_rate": 5.994692152031862e-06, "loss": 1.3395, "step": 37255 }, { "epoch": 1.0986289252543122, "grad_norm": 2.9756050786336075, "learning_rate": 5.994176815892193e-06, "loss": 1.3629, "step": 37260 }, { "epoch": 1.0987763526463217, "grad_norm": 2.9257331167827445, "learning_rate": 5.9936614137150605e-06, "loss": 1.337, "step": 37265 }, { "epoch": 1.0989237800383311, "grad_norm": 2.952970209269295, "learning_rate": 5.9931459455156275e-06, "loss": 1.2868, "step": 37270 }, { "epoch": 1.0990712074303406, "grad_norm": 2.894537257426794, "learning_rate": 5.992630411309063e-06, "loss": 1.3606, "step": 37275 }, { "epoch": 1.09921863482235, "grad_norm": 2.9678121732258154, "learning_rate": 5.992114811110539e-06, "loss": 1.3127, "step": 37280 }, { "epoch": 1.0993660622143595, "grad_norm": 2.8848254374218727, "learning_rate": 5.991599144935227e-06, "loss": 1.2921, "step": 37285 }, { "epoch": 1.099513489606369, "grad_norm": 2.783070875789639, "learning_rate": 5.991083412798299e-06, "loss": 1.339, "step": 37290 }, { "epoch": 1.0996609169983782, "grad_norm": 2.99069452105584, "learning_rate": 5.990567614714933e-06, "loss": 1.341, "step": 37295 }, { "epoch": 1.0998083443903877, "grad_norm": 2.9564054982730164, "learning_rate": 5.990051750700307e-06, "loss": 1.297, "step": 37300 }, { "epoch": 1.0999557717823971, "grad_norm": 2.8516219710319746, "learning_rate": 5.9895358207696005e-06, "loss": 1.2963, "step": 37305 }, { "epoch": 1.1001031991744066, "grad_norm": 2.865765294424085, "learning_rate": 5.989019824937995e-06, "loss": 1.2759, "step": 37310 }, { "epoch": 1.100250626566416, "grad_norm": 2.874957195272486, "learning_rate": 5.988503763220675e-06, "loss": 1.272, "step": 37315 }, { "epoch": 1.1003980539584255, "grad_norm": 2.836529623439134, "learning_rate": 5.987987635632824e-06, "loss": 1.2723, "step": 37320 }, { "epoch": 1.100545481350435, "grad_norm": 2.8239619795656674, "learning_rate": 5.987471442189632e-06, "loss": 1.3346, "step": 37325 }, { "epoch": 1.1006929087424444, "grad_norm": 2.947603908278169, "learning_rate": 5.986955182906288e-06, "loss": 1.3909, "step": 37330 }, { "epoch": 1.1008403361344539, "grad_norm": 2.964774235225198, "learning_rate": 5.986438857797984e-06, "loss": 1.284, "step": 37335 }, { "epoch": 1.1009877635264633, "grad_norm": 2.9607400716315175, "learning_rate": 5.985922466879912e-06, "loss": 1.3393, "step": 37340 }, { "epoch": 1.1011351909184726, "grad_norm": 3.0490033179448495, "learning_rate": 5.9854060101672686e-06, "loss": 1.3437, "step": 37345 }, { "epoch": 1.101282618310482, "grad_norm": 2.8730977073630535, "learning_rate": 5.984889487675251e-06, "loss": 1.2925, "step": 37350 }, { "epoch": 1.1014300457024915, "grad_norm": 3.0730673369439616, "learning_rate": 5.984372899419058e-06, "loss": 1.3452, "step": 37355 }, { "epoch": 1.101577473094501, "grad_norm": 2.9349194300971004, "learning_rate": 5.983856245413891e-06, "loss": 1.3151, "step": 37360 }, { "epoch": 1.1017249004865104, "grad_norm": 2.968625217336683, "learning_rate": 5.983339525674953e-06, "loss": 1.3327, "step": 37365 }, { "epoch": 1.1018723278785199, "grad_norm": 2.7993624025631063, "learning_rate": 5.98282274021745e-06, "loss": 1.2783, "step": 37370 }, { "epoch": 1.1020197552705293, "grad_norm": 2.874629356106442, "learning_rate": 5.982305889056588e-06, "loss": 1.3061, "step": 37375 }, { "epoch": 1.1021671826625388, "grad_norm": 2.7563649448165335, "learning_rate": 5.981788972207576e-06, "loss": 1.3062, "step": 37380 }, { "epoch": 1.1023146100545482, "grad_norm": 2.9063584579230484, "learning_rate": 5.981271989685625e-06, "loss": 1.2682, "step": 37385 }, { "epoch": 1.1024620374465575, "grad_norm": 2.860164282602478, "learning_rate": 5.9807549415059475e-06, "loss": 1.3441, "step": 37390 }, { "epoch": 1.102609464838567, "grad_norm": 2.8758629924252603, "learning_rate": 5.98023782768376e-06, "loss": 1.3091, "step": 37395 }, { "epoch": 1.1027568922305764, "grad_norm": 3.0204207098454323, "learning_rate": 5.979720648234277e-06, "loss": 1.374, "step": 37400 }, { "epoch": 1.1029043196225858, "grad_norm": 2.9653235776291837, "learning_rate": 5.979203403172717e-06, "loss": 1.3277, "step": 37405 }, { "epoch": 1.1030517470145953, "grad_norm": 2.837428985043336, "learning_rate": 5.978686092514302e-06, "loss": 1.276, "step": 37410 }, { "epoch": 1.1031991744066048, "grad_norm": 2.8896408196829935, "learning_rate": 5.978168716274253e-06, "loss": 1.2866, "step": 37415 }, { "epoch": 1.1033466017986142, "grad_norm": 2.889029674589825, "learning_rate": 5.977651274467797e-06, "loss": 1.2858, "step": 37420 }, { "epoch": 1.1034940291906237, "grad_norm": 2.9547929851336976, "learning_rate": 5.977133767110157e-06, "loss": 1.2994, "step": 37425 }, { "epoch": 1.1036414565826331, "grad_norm": 2.9969596747856238, "learning_rate": 5.976616194216563e-06, "loss": 1.342, "step": 37430 }, { "epoch": 1.1037888839746426, "grad_norm": 3.5962725020752577, "learning_rate": 5.9760985558022454e-06, "loss": 1.3249, "step": 37435 }, { "epoch": 1.1039363113666518, "grad_norm": 2.9214907140555857, "learning_rate": 5.9755808518824365e-06, "loss": 1.3495, "step": 37440 }, { "epoch": 1.1040837387586613, "grad_norm": 2.9618948531848144, "learning_rate": 5.975063082472369e-06, "loss": 1.3352, "step": 37445 }, { "epoch": 1.1042311661506707, "grad_norm": 2.931948464652945, "learning_rate": 5.974545247587281e-06, "loss": 1.3067, "step": 37450 }, { "epoch": 1.1043785935426802, "grad_norm": 2.9166531782662024, "learning_rate": 5.974027347242406e-06, "loss": 1.2862, "step": 37455 }, { "epoch": 1.1045260209346897, "grad_norm": 2.9564768546412585, "learning_rate": 5.973509381452988e-06, "loss": 1.314, "step": 37460 }, { "epoch": 1.1046734483266991, "grad_norm": 2.8382736226663505, "learning_rate": 5.972991350234268e-06, "loss": 1.2964, "step": 37465 }, { "epoch": 1.1048208757187086, "grad_norm": 2.8394026265894836, "learning_rate": 5.972473253601489e-06, "loss": 1.271, "step": 37470 }, { "epoch": 1.104968303110718, "grad_norm": 2.9104430458091524, "learning_rate": 5.971955091569895e-06, "loss": 1.3566, "step": 37475 }, { "epoch": 1.1051157305027275, "grad_norm": 2.916285658952654, "learning_rate": 5.971436864154738e-06, "loss": 1.2734, "step": 37480 }, { "epoch": 1.1052631578947367, "grad_norm": 3.008963605448822, "learning_rate": 5.970918571371263e-06, "loss": 1.2913, "step": 37485 }, { "epoch": 1.1054105852867462, "grad_norm": 2.9259126116773206, "learning_rate": 5.970400213234723e-06, "loss": 1.3383, "step": 37490 }, { "epoch": 1.1055580126787556, "grad_norm": 3.003777325223679, "learning_rate": 5.969881789760372e-06, "loss": 1.2942, "step": 37495 }, { "epoch": 1.105705440070765, "grad_norm": 2.9063250467142114, "learning_rate": 5.969363300963465e-06, "loss": 1.3494, "step": 37500 }, { "epoch": 1.105705440070765, "eval_loss": 1.1254873275756836, "eval_runtime": 4.1851, "eval_samples_per_second": 94.622, "eval_steps_per_second": 3.106, "step": 37500 }, { "epoch": 1.1058528674627746, "grad_norm": 3.005006960620822, "learning_rate": 5.968844746859257e-06, "loss": 1.3033, "step": 37505 }, { "epoch": 1.106000294854784, "grad_norm": 3.000202977919666, "learning_rate": 5.968326127463011e-06, "loss": 1.3648, "step": 37510 }, { "epoch": 1.1061477222467935, "grad_norm": 2.9488866314901, "learning_rate": 5.967807442789984e-06, "loss": 1.2758, "step": 37515 }, { "epoch": 1.106295149638803, "grad_norm": 2.998763965305685, "learning_rate": 5.967288692855439e-06, "loss": 1.3319, "step": 37520 }, { "epoch": 1.1064425770308124, "grad_norm": 3.0070350092045763, "learning_rate": 5.966769877674645e-06, "loss": 1.3131, "step": 37525 }, { "epoch": 1.1065900044228218, "grad_norm": 2.902963028301355, "learning_rate": 5.966250997262865e-06, "loss": 1.2824, "step": 37530 }, { "epoch": 1.1067374318148313, "grad_norm": 3.2250526835436952, "learning_rate": 5.965732051635368e-06, "loss": 1.3571, "step": 37535 }, { "epoch": 1.1068848592068405, "grad_norm": 2.9641947117295357, "learning_rate": 5.9652130408074265e-06, "loss": 1.2961, "step": 37540 }, { "epoch": 1.10703228659885, "grad_norm": 3.0395313460331055, "learning_rate": 5.964693964794311e-06, "loss": 1.3175, "step": 37545 }, { "epoch": 1.1071797139908595, "grad_norm": 3.108310704004606, "learning_rate": 5.964174823611297e-06, "loss": 1.3028, "step": 37550 }, { "epoch": 1.107327141382869, "grad_norm": 2.8909635012577586, "learning_rate": 5.96365561727366e-06, "loss": 1.2578, "step": 37555 }, { "epoch": 1.1074745687748784, "grad_norm": 2.9296149231044057, "learning_rate": 5.963136345796679e-06, "loss": 1.3259, "step": 37560 }, { "epoch": 1.1076219961668878, "grad_norm": 2.8079105818969343, "learning_rate": 5.962617009195635e-06, "loss": 1.3075, "step": 37565 }, { "epoch": 1.1077694235588973, "grad_norm": 2.979451691024881, "learning_rate": 5.962097607485808e-06, "loss": 1.3285, "step": 37570 }, { "epoch": 1.1079168509509068, "grad_norm": 3.044799277154058, "learning_rate": 5.961578140682484e-06, "loss": 1.3205, "step": 37575 }, { "epoch": 1.1080642783429162, "grad_norm": 2.9059851143157744, "learning_rate": 5.961058608800945e-06, "loss": 1.3696, "step": 37580 }, { "epoch": 1.1082117057349254, "grad_norm": 2.946018902452434, "learning_rate": 5.960539011856484e-06, "loss": 1.3095, "step": 37585 }, { "epoch": 1.108359133126935, "grad_norm": 2.980219814227423, "learning_rate": 5.960019349864389e-06, "loss": 1.3331, "step": 37590 }, { "epoch": 1.1085065605189444, "grad_norm": 2.964649770524663, "learning_rate": 5.959499622839949e-06, "loss": 1.3408, "step": 37595 }, { "epoch": 1.1086539879109538, "grad_norm": 2.9806846862200196, "learning_rate": 5.9589798307984615e-06, "loss": 1.3109, "step": 37600 }, { "epoch": 1.1088014153029633, "grad_norm": 2.975125254222679, "learning_rate": 5.958459973755218e-06, "loss": 1.2792, "step": 37605 }, { "epoch": 1.1089488426949727, "grad_norm": 2.8794260693443086, "learning_rate": 5.957940051725519e-06, "loss": 1.3276, "step": 37610 }, { "epoch": 1.1090962700869822, "grad_norm": 2.8312079563902843, "learning_rate": 5.957420064724663e-06, "loss": 1.3428, "step": 37615 }, { "epoch": 1.1092436974789917, "grad_norm": 3.0421208276706495, "learning_rate": 5.9569000127679515e-06, "loss": 1.326, "step": 37620 }, { "epoch": 1.109391124871001, "grad_norm": 2.942989512626033, "learning_rate": 5.956379895870687e-06, "loss": 1.2875, "step": 37625 }, { "epoch": 1.1095385522630106, "grad_norm": 2.906396942542393, "learning_rate": 5.9558597140481735e-06, "loss": 1.2959, "step": 37630 }, { "epoch": 1.1096859796550198, "grad_norm": 3.0308860564609885, "learning_rate": 5.955339467315719e-06, "loss": 1.3231, "step": 37635 }, { "epoch": 1.1098334070470293, "grad_norm": 3.647973433484402, "learning_rate": 5.954819155688635e-06, "loss": 1.3237, "step": 37640 }, { "epoch": 1.1099808344390387, "grad_norm": 2.912348380788121, "learning_rate": 5.954298779182227e-06, "loss": 1.2866, "step": 37645 }, { "epoch": 1.1101282618310482, "grad_norm": 2.978743103002667, "learning_rate": 5.953778337811811e-06, "loss": 1.3919, "step": 37650 }, { "epoch": 1.1102756892230576, "grad_norm": 2.9413997187955685, "learning_rate": 5.953257831592701e-06, "loss": 1.302, "step": 37655 }, { "epoch": 1.110423116615067, "grad_norm": 2.9039802386497904, "learning_rate": 5.952737260540214e-06, "loss": 1.3474, "step": 37660 }, { "epoch": 1.1105705440070766, "grad_norm": 3.0634308981467684, "learning_rate": 5.952216624669668e-06, "loss": 1.3066, "step": 37665 }, { "epoch": 1.110717971399086, "grad_norm": 2.8714028359499677, "learning_rate": 5.9516959239963825e-06, "loss": 1.3004, "step": 37670 }, { "epoch": 1.1108653987910955, "grad_norm": 2.8032905816431035, "learning_rate": 5.95117515853568e-06, "loss": 1.2679, "step": 37675 }, { "epoch": 1.1110128261831047, "grad_norm": 2.960623883034472, "learning_rate": 5.950654328302886e-06, "loss": 1.2901, "step": 37680 }, { "epoch": 1.1111602535751142, "grad_norm": 2.886938751589972, "learning_rate": 5.950133433313325e-06, "loss": 1.2937, "step": 37685 }, { "epoch": 1.1113076809671236, "grad_norm": 2.9564120126546367, "learning_rate": 5.949612473582325e-06, "loss": 1.3526, "step": 37690 }, { "epoch": 1.111455108359133, "grad_norm": 2.9456929222324786, "learning_rate": 5.949091449125217e-06, "loss": 1.339, "step": 37695 }, { "epoch": 1.1116025357511425, "grad_norm": 3.0286272226375335, "learning_rate": 5.948570359957332e-06, "loss": 1.3132, "step": 37700 }, { "epoch": 1.111749963143152, "grad_norm": 2.954055747165596, "learning_rate": 5.948049206094004e-06, "loss": 1.3095, "step": 37705 }, { "epoch": 1.1118973905351615, "grad_norm": 2.8714440229019704, "learning_rate": 5.9475279875505665e-06, "loss": 1.3015, "step": 37710 }, { "epoch": 1.112044817927171, "grad_norm": 3.0344028123383273, "learning_rate": 5.947006704342361e-06, "loss": 1.323, "step": 37715 }, { "epoch": 1.1121922453191804, "grad_norm": 3.0426507407055796, "learning_rate": 5.946485356484723e-06, "loss": 1.3388, "step": 37720 }, { "epoch": 1.1123396727111898, "grad_norm": 2.952628225385987, "learning_rate": 5.945963943992995e-06, "loss": 1.3156, "step": 37725 }, { "epoch": 1.1124871001031993, "grad_norm": 3.164469302094381, "learning_rate": 5.9454424668825215e-06, "loss": 1.3407, "step": 37730 }, { "epoch": 1.1126345274952085, "grad_norm": 2.962396075229033, "learning_rate": 5.944920925168645e-06, "loss": 1.3224, "step": 37735 }, { "epoch": 1.112781954887218, "grad_norm": 2.9243702015079864, "learning_rate": 5.944399318866715e-06, "loss": 1.3578, "step": 37740 }, { "epoch": 1.1129293822792274, "grad_norm": 2.975254492018835, "learning_rate": 5.94387764799208e-06, "loss": 1.3478, "step": 37745 }, { "epoch": 1.113076809671237, "grad_norm": 2.8238632486335864, "learning_rate": 5.9433559125600885e-06, "loss": 1.3192, "step": 37750 }, { "epoch": 1.1132242370632464, "grad_norm": 2.871151368019904, "learning_rate": 5.942834112586095e-06, "loss": 1.3281, "step": 37755 }, { "epoch": 1.1133716644552558, "grad_norm": 3.010722412205487, "learning_rate": 5.942312248085455e-06, "loss": 1.3393, "step": 37760 }, { "epoch": 1.1135190918472653, "grad_norm": 2.9564132863622405, "learning_rate": 5.941790319073523e-06, "loss": 1.3373, "step": 37765 }, { "epoch": 1.1136665192392747, "grad_norm": 2.835433360427278, "learning_rate": 5.9412683255656575e-06, "loss": 1.3467, "step": 37770 }, { "epoch": 1.1138139466312842, "grad_norm": 2.876456982084905, "learning_rate": 5.9407462675772216e-06, "loss": 1.3318, "step": 37775 }, { "epoch": 1.1139613740232934, "grad_norm": 2.9585045643777117, "learning_rate": 5.940224145123572e-06, "loss": 1.2959, "step": 37780 }, { "epoch": 1.1141088014153029, "grad_norm": 2.947113996406222, "learning_rate": 5.9397019582200776e-06, "loss": 1.3016, "step": 37785 }, { "epoch": 1.1142562288073123, "grad_norm": 2.957723933943217, "learning_rate": 5.939179706882102e-06, "loss": 1.3174, "step": 37790 }, { "epoch": 1.1144036561993218, "grad_norm": 2.9087766206464987, "learning_rate": 5.938657391125015e-06, "loss": 1.2997, "step": 37795 }, { "epoch": 1.1145510835913313, "grad_norm": 2.9243252247907345, "learning_rate": 5.938135010964184e-06, "loss": 1.3403, "step": 37800 }, { "epoch": 1.1146985109833407, "grad_norm": 2.924996161824745, "learning_rate": 5.937612566414982e-06, "loss": 1.3017, "step": 37805 }, { "epoch": 1.1148459383753502, "grad_norm": 2.8839309733340874, "learning_rate": 5.937090057492783e-06, "loss": 1.2729, "step": 37810 }, { "epoch": 1.1149933657673596, "grad_norm": 3.166118398577699, "learning_rate": 5.936567484212961e-06, "loss": 1.3172, "step": 37815 }, { "epoch": 1.115140793159369, "grad_norm": 2.924213212253916, "learning_rate": 5.936044846590894e-06, "loss": 1.3483, "step": 37820 }, { "epoch": 1.1152882205513786, "grad_norm": 3.06176565730439, "learning_rate": 5.935522144641961e-06, "loss": 1.3125, "step": 37825 }, { "epoch": 1.1154356479433878, "grad_norm": 2.9490448052611575, "learning_rate": 5.934999378381544e-06, "loss": 1.332, "step": 37830 }, { "epoch": 1.1155830753353972, "grad_norm": 3.0936433194062305, "learning_rate": 5.934476547825024e-06, "loss": 1.297, "step": 37835 }, { "epoch": 1.1157305027274067, "grad_norm": 2.92150156679666, "learning_rate": 5.933953652987789e-06, "loss": 1.318, "step": 37840 }, { "epoch": 1.1158779301194162, "grad_norm": 2.905329856363481, "learning_rate": 5.933430693885222e-06, "loss": 1.2382, "step": 37845 }, { "epoch": 1.1160253575114256, "grad_norm": 2.8870085310370937, "learning_rate": 5.932907670532716e-06, "loss": 1.3081, "step": 37850 }, { "epoch": 1.116172784903435, "grad_norm": 2.924930403135114, "learning_rate": 5.9323845829456576e-06, "loss": 1.3108, "step": 37855 }, { "epoch": 1.1163202122954445, "grad_norm": 2.903391886669318, "learning_rate": 5.9318614311394405e-06, "loss": 1.3194, "step": 37860 }, { "epoch": 1.116467639687454, "grad_norm": 2.9074720256568427, "learning_rate": 5.931338215129459e-06, "loss": 1.3282, "step": 37865 }, { "epoch": 1.1166150670794635, "grad_norm": 3.066070379925274, "learning_rate": 5.9308149349311105e-06, "loss": 1.3366, "step": 37870 }, { "epoch": 1.1167624944714727, "grad_norm": 2.9643104246916807, "learning_rate": 5.930291590559793e-06, "loss": 1.3367, "step": 37875 }, { "epoch": 1.1169099218634821, "grad_norm": 2.905428515733342, "learning_rate": 5.929768182030904e-06, "loss": 1.3697, "step": 37880 }, { "epoch": 1.1170573492554916, "grad_norm": 2.914517872020462, "learning_rate": 5.929244709359849e-06, "loss": 1.2896, "step": 37885 }, { "epoch": 1.117204776647501, "grad_norm": 3.042625569443934, "learning_rate": 5.928721172562029e-06, "loss": 1.2848, "step": 37890 }, { "epoch": 1.1173522040395105, "grad_norm": 3.0127950644981616, "learning_rate": 5.9281975716528515e-06, "loss": 1.2741, "step": 37895 }, { "epoch": 1.11749963143152, "grad_norm": 2.823062178888536, "learning_rate": 5.927673906647723e-06, "loss": 1.3171, "step": 37900 }, { "epoch": 1.1176470588235294, "grad_norm": 2.9456141709632475, "learning_rate": 5.927150177562053e-06, "loss": 1.2903, "step": 37905 }, { "epoch": 1.117794486215539, "grad_norm": 2.959689218048723, "learning_rate": 5.9266263844112516e-06, "loss": 1.3161, "step": 37910 }, { "epoch": 1.1179419136075484, "grad_norm": 2.9512052263276445, "learning_rate": 5.926102527210735e-06, "loss": 1.3722, "step": 37915 }, { "epoch": 1.1180893409995578, "grad_norm": 2.893231840361479, "learning_rate": 5.925578605975917e-06, "loss": 1.3008, "step": 37920 }, { "epoch": 1.1182367683915673, "grad_norm": 2.9762783776615898, "learning_rate": 5.925054620722213e-06, "loss": 1.3424, "step": 37925 }, { "epoch": 1.1183841957835765, "grad_norm": 2.8887924488173553, "learning_rate": 5.924530571465043e-06, "loss": 1.293, "step": 37930 }, { "epoch": 1.118531623175586, "grad_norm": 3.0255472475253344, "learning_rate": 5.9240064582198295e-06, "loss": 1.3082, "step": 37935 }, { "epoch": 1.1186790505675954, "grad_norm": 3.0193712670342854, "learning_rate": 5.923482281001992e-06, "loss": 1.3186, "step": 37940 }, { "epoch": 1.1188264779596049, "grad_norm": 2.969350311123046, "learning_rate": 5.922958039826958e-06, "loss": 1.2869, "step": 37945 }, { "epoch": 1.1189739053516143, "grad_norm": 2.8464646568951912, "learning_rate": 5.922433734710151e-06, "loss": 1.3028, "step": 37950 }, { "epoch": 1.1191213327436238, "grad_norm": 3.196538270851739, "learning_rate": 5.921909365667e-06, "loss": 1.3779, "step": 37955 }, { "epoch": 1.1192687601356333, "grad_norm": 2.797497804235189, "learning_rate": 5.921384932712939e-06, "loss": 1.3048, "step": 37960 }, { "epoch": 1.1194161875276427, "grad_norm": 2.973445484027302, "learning_rate": 5.920860435863394e-06, "loss": 1.3414, "step": 37965 }, { "epoch": 1.1195636149196522, "grad_norm": 2.7899797055249667, "learning_rate": 5.920335875133803e-06, "loss": 1.2818, "step": 37970 }, { "epoch": 1.1197110423116614, "grad_norm": 2.84818733165336, "learning_rate": 5.9198112505396e-06, "loss": 1.2967, "step": 37975 }, { "epoch": 1.1198584697036709, "grad_norm": 2.957170404877011, "learning_rate": 5.919286562096225e-06, "loss": 1.3126, "step": 37980 }, { "epoch": 1.1200058970956803, "grad_norm": 2.906999538061775, "learning_rate": 5.918761809819114e-06, "loss": 1.2997, "step": 37985 }, { "epoch": 1.1201533244876898, "grad_norm": 2.818078868799168, "learning_rate": 5.91823699372371e-06, "loss": 1.3107, "step": 37990 }, { "epoch": 1.1203007518796992, "grad_norm": 2.8954764091682916, "learning_rate": 5.917712113825458e-06, "loss": 1.2537, "step": 37995 }, { "epoch": 1.1204481792717087, "grad_norm": 2.9292285083375225, "learning_rate": 5.917187170139801e-06, "loss": 1.3234, "step": 38000 }, { "epoch": 1.1204481792717087, "eval_loss": 1.1247270107269287, "eval_runtime": 4.2714, "eval_samples_per_second": 92.71, "eval_steps_per_second": 3.044, "step": 38000 }, { "epoch": 1.1205956066637182, "grad_norm": 2.980963765823767, "learning_rate": 5.916662162682187e-06, "loss": 1.2912, "step": 38005 }, { "epoch": 1.1207430340557276, "grad_norm": 2.877394170242133, "learning_rate": 5.9161370914680644e-06, "loss": 1.2695, "step": 38010 }, { "epoch": 1.120890461447737, "grad_norm": 2.9755084308921713, "learning_rate": 5.9156119565128856e-06, "loss": 1.3021, "step": 38015 }, { "epoch": 1.1210378888397465, "grad_norm": 2.780579879444715, "learning_rate": 5.915086757832101e-06, "loss": 1.3176, "step": 38020 }, { "epoch": 1.1211853162317558, "grad_norm": 2.920812502203571, "learning_rate": 5.914561495441167e-06, "loss": 1.3173, "step": 38025 }, { "epoch": 1.1213327436237652, "grad_norm": 2.878579064029118, "learning_rate": 5.9140361693555385e-06, "loss": 1.3103, "step": 38030 }, { "epoch": 1.1214801710157747, "grad_norm": 3.1295071597300295, "learning_rate": 5.9135107795906755e-06, "loss": 1.3535, "step": 38035 }, { "epoch": 1.1216275984077841, "grad_norm": 2.8633789073362106, "learning_rate": 5.912985326162037e-06, "loss": 1.3303, "step": 38040 }, { "epoch": 1.1217750257997936, "grad_norm": 2.915391471169707, "learning_rate": 5.912459809085085e-06, "loss": 1.3591, "step": 38045 }, { "epoch": 1.121922453191803, "grad_norm": 2.930260339202568, "learning_rate": 5.911934228375284e-06, "loss": 1.3743, "step": 38050 }, { "epoch": 1.1220698805838125, "grad_norm": 2.8165479435391947, "learning_rate": 5.9114085840481e-06, "loss": 1.3617, "step": 38055 }, { "epoch": 1.122217307975822, "grad_norm": 2.8608466102420698, "learning_rate": 5.910882876119e-06, "loss": 1.3017, "step": 38060 }, { "epoch": 1.1223647353678314, "grad_norm": 2.926849160071575, "learning_rate": 5.910357104603455e-06, "loss": 1.3409, "step": 38065 }, { "epoch": 1.1225121627598407, "grad_norm": 2.9664435154696616, "learning_rate": 5.9098312695169344e-06, "loss": 1.2847, "step": 38070 }, { "epoch": 1.1226595901518501, "grad_norm": 2.994118628290366, "learning_rate": 5.909305370874912e-06, "loss": 1.2996, "step": 38075 }, { "epoch": 1.1228070175438596, "grad_norm": 2.885938460305759, "learning_rate": 5.908779408692864e-06, "loss": 1.3304, "step": 38080 }, { "epoch": 1.122954444935869, "grad_norm": 2.9890193821341806, "learning_rate": 5.908253382986267e-06, "loss": 1.3232, "step": 38085 }, { "epoch": 1.1231018723278785, "grad_norm": 2.9464012802850235, "learning_rate": 5.907727293770599e-06, "loss": 1.3173, "step": 38090 }, { "epoch": 1.123249299719888, "grad_norm": 2.9231625425082175, "learning_rate": 5.907201141061342e-06, "loss": 1.3061, "step": 38095 }, { "epoch": 1.1233967271118974, "grad_norm": 3.124080026905959, "learning_rate": 5.9066749248739786e-06, "loss": 1.3223, "step": 38100 }, { "epoch": 1.1235441545039069, "grad_norm": 2.942073831963818, "learning_rate": 5.906148645223992e-06, "loss": 1.3069, "step": 38105 }, { "epoch": 1.1236915818959163, "grad_norm": 2.95334740457086, "learning_rate": 5.90562230212687e-06, "loss": 1.3411, "step": 38110 }, { "epoch": 1.1238390092879258, "grad_norm": 2.8670219792760614, "learning_rate": 5.9050958955981e-06, "loss": 1.3228, "step": 38115 }, { "epoch": 1.123986436679935, "grad_norm": 2.957003618965956, "learning_rate": 5.904569425653172e-06, "loss": 1.2686, "step": 38120 }, { "epoch": 1.1241338640719445, "grad_norm": 2.9629158435182843, "learning_rate": 5.9040428923075796e-06, "loss": 1.3436, "step": 38125 }, { "epoch": 1.124281291463954, "grad_norm": 3.1835084525684336, "learning_rate": 5.903516295576814e-06, "loss": 1.343, "step": 38130 }, { "epoch": 1.1244287188559634, "grad_norm": 2.9148202607240434, "learning_rate": 5.902989635476374e-06, "loss": 1.3088, "step": 38135 }, { "epoch": 1.1245761462479729, "grad_norm": 2.868005320512829, "learning_rate": 5.902462912021754e-06, "loss": 1.2836, "step": 38140 }, { "epoch": 1.1247235736399823, "grad_norm": 2.874725415642177, "learning_rate": 5.901936125228456e-06, "loss": 1.3056, "step": 38145 }, { "epoch": 1.1248710010319918, "grad_norm": 2.918603057078603, "learning_rate": 5.901409275111979e-06, "loss": 1.3518, "step": 38150 }, { "epoch": 1.1250184284240012, "grad_norm": 2.9674141569453907, "learning_rate": 5.9008823616878284e-06, "loss": 1.3098, "step": 38155 }, { "epoch": 1.1251658558160107, "grad_norm": 3.062673211820755, "learning_rate": 5.900355384971508e-06, "loss": 1.3347, "step": 38160 }, { "epoch": 1.12531328320802, "grad_norm": 3.1191120032339064, "learning_rate": 5.8998283449785265e-06, "loss": 1.3252, "step": 38165 }, { "epoch": 1.1254607106000294, "grad_norm": 2.8398793656447547, "learning_rate": 5.899301241724389e-06, "loss": 1.312, "step": 38170 }, { "epoch": 1.1256081379920388, "grad_norm": 2.9192223124952528, "learning_rate": 5.898774075224609e-06, "loss": 1.3105, "step": 38175 }, { "epoch": 1.1257555653840483, "grad_norm": 2.986414208910968, "learning_rate": 5.898246845494698e-06, "loss": 1.2779, "step": 38180 }, { "epoch": 1.1259029927760578, "grad_norm": 2.82539999474153, "learning_rate": 5.89771955255017e-06, "loss": 1.2994, "step": 38185 }, { "epoch": 1.1260504201680672, "grad_norm": 2.9103387982354048, "learning_rate": 5.897192196406544e-06, "loss": 1.2806, "step": 38190 }, { "epoch": 1.1261978475600767, "grad_norm": 2.8682815928197876, "learning_rate": 5.896664777079333e-06, "loss": 1.3617, "step": 38195 }, { "epoch": 1.1263452749520861, "grad_norm": 2.9115829297772007, "learning_rate": 5.89613729458406e-06, "loss": 1.2951, "step": 38200 }, { "epoch": 1.1264927023440956, "grad_norm": 2.786366402014139, "learning_rate": 5.895609748936248e-06, "loss": 1.2953, "step": 38205 }, { "epoch": 1.126640129736105, "grad_norm": 3.0289715763098406, "learning_rate": 5.895082140151419e-06, "loss": 1.3291, "step": 38210 }, { "epoch": 1.1267875571281145, "grad_norm": 2.8105406232352466, "learning_rate": 5.894554468245097e-06, "loss": 1.295, "step": 38215 }, { "epoch": 1.1269349845201238, "grad_norm": 2.8990950555570048, "learning_rate": 5.894026733232813e-06, "loss": 1.2903, "step": 38220 }, { "epoch": 1.1270824119121332, "grad_norm": 2.972509661785895, "learning_rate": 5.893498935130093e-06, "loss": 1.286, "step": 38225 }, { "epoch": 1.1272298393041427, "grad_norm": 2.95731412998536, "learning_rate": 5.892971073952469e-06, "loss": 1.2971, "step": 38230 }, { "epoch": 1.1273772666961521, "grad_norm": 2.920851620450364, "learning_rate": 5.892443149715474e-06, "loss": 1.3286, "step": 38235 }, { "epoch": 1.1275246940881616, "grad_norm": 2.7955125635638822, "learning_rate": 5.891915162434645e-06, "loss": 1.2896, "step": 38240 }, { "epoch": 1.127672121480171, "grad_norm": 3.000121916250684, "learning_rate": 5.891387112125514e-06, "loss": 1.312, "step": 38245 }, { "epoch": 1.1278195488721805, "grad_norm": 3.0392680710401616, "learning_rate": 5.890858998803623e-06, "loss": 1.3427, "step": 38250 }, { "epoch": 1.12796697626419, "grad_norm": 2.945936664987977, "learning_rate": 5.890330822484511e-06, "loss": 1.3208, "step": 38255 }, { "epoch": 1.1281144036561994, "grad_norm": 2.9687384657918154, "learning_rate": 5.889802583183721e-06, "loss": 1.3238, "step": 38260 }, { "epoch": 1.1282618310482087, "grad_norm": 2.9654804271830484, "learning_rate": 5.889274280916797e-06, "loss": 1.3107, "step": 38265 }, { "epoch": 1.128409258440218, "grad_norm": 2.9810943513846158, "learning_rate": 5.888745915699283e-06, "loss": 1.2958, "step": 38270 }, { "epoch": 1.1285566858322276, "grad_norm": 2.926197344669324, "learning_rate": 5.8882174875467295e-06, "loss": 1.2964, "step": 38275 }, { "epoch": 1.128704113224237, "grad_norm": 2.902167686402576, "learning_rate": 5.887688996474684e-06, "loss": 1.3501, "step": 38280 }, { "epoch": 1.1288515406162465, "grad_norm": 2.9425097057411778, "learning_rate": 5.8871604424986996e-06, "loss": 1.2938, "step": 38285 }, { "epoch": 1.128998968008256, "grad_norm": 2.95871997637943, "learning_rate": 5.886631825634328e-06, "loss": 1.346, "step": 38290 }, { "epoch": 1.1291463954002654, "grad_norm": 2.9736903455759025, "learning_rate": 5.886103145897127e-06, "loss": 1.334, "step": 38295 }, { "epoch": 1.1292938227922749, "grad_norm": 2.8053289472004836, "learning_rate": 5.8855744033026496e-06, "loss": 1.2635, "step": 38300 }, { "epoch": 1.1294412501842843, "grad_norm": 2.8949470381153652, "learning_rate": 5.885045597866458e-06, "loss": 1.3224, "step": 38305 }, { "epoch": 1.1295886775762938, "grad_norm": 2.9750285417069136, "learning_rate": 5.884516729604111e-06, "loss": 1.329, "step": 38310 }, { "epoch": 1.1297361049683032, "grad_norm": 3.0306904625054303, "learning_rate": 5.883987798531172e-06, "loss": 1.3144, "step": 38315 }, { "epoch": 1.1298835323603125, "grad_norm": 2.9371222736522915, "learning_rate": 5.883458804663205e-06, "loss": 1.3279, "step": 38320 }, { "epoch": 1.130030959752322, "grad_norm": 2.9200581598116586, "learning_rate": 5.882929748015777e-06, "loss": 1.316, "step": 38325 }, { "epoch": 1.1301783871443314, "grad_norm": 2.8390263188249025, "learning_rate": 5.882400628604456e-06, "loss": 1.3641, "step": 38330 }, { "epoch": 1.1303258145363408, "grad_norm": 2.8582204079232265, "learning_rate": 5.881871446444811e-06, "loss": 1.309, "step": 38335 }, { "epoch": 1.1304732419283503, "grad_norm": 2.902272312034371, "learning_rate": 5.8813422015524146e-06, "loss": 1.3036, "step": 38340 }, { "epoch": 1.1306206693203598, "grad_norm": 3.0477265851194564, "learning_rate": 5.88081289394284e-06, "loss": 1.2841, "step": 38345 }, { "epoch": 1.1307680967123692, "grad_norm": 2.868848266008631, "learning_rate": 5.880283523631663e-06, "loss": 1.2978, "step": 38350 }, { "epoch": 1.1309155241043787, "grad_norm": 3.1000619908851865, "learning_rate": 5.879754090634461e-06, "loss": 1.3227, "step": 38355 }, { "epoch": 1.131062951496388, "grad_norm": 2.917271621445222, "learning_rate": 5.879224594966813e-06, "loss": 1.2763, "step": 38360 }, { "epoch": 1.1312103788883974, "grad_norm": 2.8722072393980502, "learning_rate": 5.8786950366442985e-06, "loss": 1.3063, "step": 38365 }, { "epoch": 1.1313578062804068, "grad_norm": 3.0584396722324585, "learning_rate": 5.878165415682504e-06, "loss": 1.3715, "step": 38370 }, { "epoch": 1.1315052336724163, "grad_norm": 3.045176847203099, "learning_rate": 5.8776357320970106e-06, "loss": 1.324, "step": 38375 }, { "epoch": 1.1316526610644257, "grad_norm": 2.90256354568212, "learning_rate": 5.877105985903407e-06, "loss": 1.3348, "step": 38380 }, { "epoch": 1.1318000884564352, "grad_norm": 2.827798495105491, "learning_rate": 5.87657617711728e-06, "loss": 1.318, "step": 38385 }, { "epoch": 1.1319475158484447, "grad_norm": 2.944964735458685, "learning_rate": 5.876046305754222e-06, "loss": 1.3483, "step": 38390 }, { "epoch": 1.1320949432404541, "grad_norm": 3.031502384780466, "learning_rate": 5.875516371829823e-06, "loss": 1.2738, "step": 38395 }, { "epoch": 1.1322423706324636, "grad_norm": 2.905645952073015, "learning_rate": 5.874986375359676e-06, "loss": 1.2955, "step": 38400 }, { "epoch": 1.132389798024473, "grad_norm": 2.879236365854518, "learning_rate": 5.8744563163593814e-06, "loss": 1.3082, "step": 38405 }, { "epoch": 1.1325372254164825, "grad_norm": 2.7905898741583424, "learning_rate": 5.873926194844532e-06, "loss": 1.2771, "step": 38410 }, { "epoch": 1.1326846528084917, "grad_norm": 2.854350967054463, "learning_rate": 5.87339601083073e-06, "loss": 1.3317, "step": 38415 }, { "epoch": 1.1328320802005012, "grad_norm": 3.065200574963826, "learning_rate": 5.872865764333575e-06, "loss": 1.3092, "step": 38420 }, { "epoch": 1.1329795075925106, "grad_norm": 3.085179621544787, "learning_rate": 5.872335455368673e-06, "loss": 1.31, "step": 38425 }, { "epoch": 1.13312693498452, "grad_norm": 2.9393957032381635, "learning_rate": 5.871805083951624e-06, "loss": 1.382, "step": 38430 }, { "epoch": 1.1332743623765296, "grad_norm": 2.8521804177462498, "learning_rate": 5.871274650098038e-06, "loss": 1.2971, "step": 38435 }, { "epoch": 1.133421789768539, "grad_norm": 2.88133463806418, "learning_rate": 5.8707441538235235e-06, "loss": 1.2926, "step": 38440 }, { "epoch": 1.1335692171605485, "grad_norm": 2.895447465471562, "learning_rate": 5.870213595143691e-06, "loss": 1.3452, "step": 38445 }, { "epoch": 1.133716644552558, "grad_norm": 2.928345553777226, "learning_rate": 5.869682974074152e-06, "loss": 1.2487, "step": 38450 }, { "epoch": 1.1338640719445674, "grad_norm": 2.7764543837282813, "learning_rate": 5.869152290630521e-06, "loss": 1.3218, "step": 38455 }, { "epoch": 1.1340114993365766, "grad_norm": 2.7228697908536716, "learning_rate": 5.868621544828415e-06, "loss": 1.2989, "step": 38460 }, { "epoch": 1.134158926728586, "grad_norm": 2.9139870853147007, "learning_rate": 5.868090736683451e-06, "loss": 1.3551, "step": 38465 }, { "epoch": 1.1343063541205956, "grad_norm": 2.9277961100660157, "learning_rate": 5.867559866211247e-06, "loss": 1.3328, "step": 38470 }, { "epoch": 1.134453781512605, "grad_norm": 3.0411848238839263, "learning_rate": 5.867028933427426e-06, "loss": 1.3062, "step": 38475 }, { "epoch": 1.1346012089046145, "grad_norm": 2.9220659526673014, "learning_rate": 5.866497938347612e-06, "loss": 1.2931, "step": 38480 }, { "epoch": 1.134748636296624, "grad_norm": 2.8727424152200003, "learning_rate": 5.865966880987429e-06, "loss": 1.2974, "step": 38485 }, { "epoch": 1.1348960636886334, "grad_norm": 2.914321977583868, "learning_rate": 5.865435761362504e-06, "loss": 1.2912, "step": 38490 }, { "epoch": 1.1350434910806428, "grad_norm": 2.8560625772584745, "learning_rate": 5.864904579488467e-06, "loss": 1.327, "step": 38495 }, { "epoch": 1.1351909184726523, "grad_norm": 2.850068751890656, "learning_rate": 5.864373335380948e-06, "loss": 1.2964, "step": 38500 }, { "epoch": 1.1351909184726523, "eval_loss": 1.1404690742492676, "eval_runtime": 4.1931, "eval_samples_per_second": 94.44, "eval_steps_per_second": 3.1, "step": 38500 }, { "epoch": 1.1353383458646618, "grad_norm": 2.9192880338733014, "learning_rate": 5.8638420290555785e-06, "loss": 1.3107, "step": 38505 }, { "epoch": 1.135485773256671, "grad_norm": 2.9245934172827552, "learning_rate": 5.863310660527994e-06, "loss": 1.3293, "step": 38510 }, { "epoch": 1.1356332006486805, "grad_norm": 2.8371328175607977, "learning_rate": 5.86277922981383e-06, "loss": 1.2861, "step": 38515 }, { "epoch": 1.13578062804069, "grad_norm": 2.966526911801747, "learning_rate": 5.862247736928723e-06, "loss": 1.3232, "step": 38520 }, { "epoch": 1.1359280554326994, "grad_norm": 2.857476685742183, "learning_rate": 5.861716181888317e-06, "loss": 1.3312, "step": 38525 }, { "epoch": 1.1360754828247088, "grad_norm": 2.8329853299085856, "learning_rate": 5.861184564708249e-06, "loss": 1.2848, "step": 38530 }, { "epoch": 1.1362229102167183, "grad_norm": 3.0019794877880814, "learning_rate": 5.860652885404166e-06, "loss": 1.332, "step": 38535 }, { "epoch": 1.1363703376087277, "grad_norm": 2.8819927038373336, "learning_rate": 5.8601211439917106e-06, "loss": 1.3331, "step": 38540 }, { "epoch": 1.1365177650007372, "grad_norm": 3.0513614050752738, "learning_rate": 5.859589340486532e-06, "loss": 1.3211, "step": 38545 }, { "epoch": 1.1366651923927467, "grad_norm": 2.86853518318339, "learning_rate": 5.859057474904278e-06, "loss": 1.261, "step": 38550 }, { "epoch": 1.136812619784756, "grad_norm": 2.8981793159869675, "learning_rate": 5.858525547260599e-06, "loss": 1.2649, "step": 38555 }, { "epoch": 1.1369600471767654, "grad_norm": 2.9655927437355456, "learning_rate": 5.857993557571149e-06, "loss": 1.3308, "step": 38560 }, { "epoch": 1.1371074745687748, "grad_norm": 2.8657739443922536, "learning_rate": 5.8574615058515805e-06, "loss": 1.3306, "step": 38565 }, { "epoch": 1.1372549019607843, "grad_norm": 3.075673768439525, "learning_rate": 5.856929392117551e-06, "loss": 1.2863, "step": 38570 }, { "epoch": 1.1374023293527937, "grad_norm": 2.812371861470307, "learning_rate": 5.856397216384719e-06, "loss": 1.2959, "step": 38575 }, { "epoch": 1.1375497567448032, "grad_norm": 2.9175080001225226, "learning_rate": 5.855864978668743e-06, "loss": 1.2974, "step": 38580 }, { "epoch": 1.1376971841368126, "grad_norm": 2.969863801479219, "learning_rate": 5.855332678985285e-06, "loss": 1.2944, "step": 38585 }, { "epoch": 1.137844611528822, "grad_norm": 2.990342048386667, "learning_rate": 5.854800317350009e-06, "loss": 1.3274, "step": 38590 }, { "epoch": 1.1379920389208316, "grad_norm": 2.8902464801918355, "learning_rate": 5.85426789377858e-06, "loss": 1.3105, "step": 38595 }, { "epoch": 1.138139466312841, "grad_norm": 2.986862398993542, "learning_rate": 5.853735408286666e-06, "loss": 1.297, "step": 38600 }, { "epoch": 1.1382868937048505, "grad_norm": 2.7995559493911104, "learning_rate": 5.853202860889934e-06, "loss": 1.2736, "step": 38605 }, { "epoch": 1.1384343210968597, "grad_norm": 2.9710709139358253, "learning_rate": 5.852670251604059e-06, "loss": 1.3409, "step": 38610 }, { "epoch": 1.1385817484888692, "grad_norm": 3.017651334148449, "learning_rate": 5.852137580444708e-06, "loss": 1.3707, "step": 38615 }, { "epoch": 1.1387291758808786, "grad_norm": 2.9308621594775928, "learning_rate": 5.851604847427561e-06, "loss": 1.3026, "step": 38620 }, { "epoch": 1.138876603272888, "grad_norm": 2.96789038774108, "learning_rate": 5.85107205256829e-06, "loss": 1.3057, "step": 38625 }, { "epoch": 1.1390240306648975, "grad_norm": 3.010337177538618, "learning_rate": 5.850539195882575e-06, "loss": 1.2803, "step": 38630 }, { "epoch": 1.139171458056907, "grad_norm": 2.8047436017207015, "learning_rate": 5.850006277386095e-06, "loss": 1.3228, "step": 38635 }, { "epoch": 1.1393188854489165, "grad_norm": 2.9352036380584225, "learning_rate": 5.849473297094534e-06, "loss": 1.3075, "step": 38640 }, { "epoch": 1.139466312840926, "grad_norm": 2.851111315109685, "learning_rate": 5.848940255023572e-06, "loss": 1.371, "step": 38645 }, { "epoch": 1.1396137402329352, "grad_norm": 2.9506047897656167, "learning_rate": 5.848407151188897e-06, "loss": 1.3194, "step": 38650 }, { "epoch": 1.1397611676249446, "grad_norm": 2.797668423512623, "learning_rate": 5.847873985606195e-06, "loss": 1.3249, "step": 38655 }, { "epoch": 1.139908595016954, "grad_norm": 3.0319369133520664, "learning_rate": 5.8473407582911555e-06, "loss": 1.3421, "step": 38660 }, { "epoch": 1.1400560224089635, "grad_norm": 3.0412317673756757, "learning_rate": 5.84680746925947e-06, "loss": 1.3608, "step": 38665 }, { "epoch": 1.140203449800973, "grad_norm": 3.0743682073048397, "learning_rate": 5.84627411852683e-06, "loss": 1.3043, "step": 38670 }, { "epoch": 1.1403508771929824, "grad_norm": 3.009422563983006, "learning_rate": 5.845740706108931e-06, "loss": 1.3242, "step": 38675 }, { "epoch": 1.140498304584992, "grad_norm": 2.905943712308863, "learning_rate": 5.8452072320214676e-06, "loss": 1.3504, "step": 38680 }, { "epoch": 1.1406457319770014, "grad_norm": 2.971160198584673, "learning_rate": 5.84467369628014e-06, "loss": 1.3704, "step": 38685 }, { "epoch": 1.1407931593690108, "grad_norm": 2.931353835520077, "learning_rate": 5.844140098900646e-06, "loss": 1.289, "step": 38690 }, { "epoch": 1.1409405867610203, "grad_norm": 2.937906008334986, "learning_rate": 5.843606439898689e-06, "loss": 1.3139, "step": 38695 }, { "epoch": 1.1410880141530297, "grad_norm": 2.8230351523143966, "learning_rate": 5.8430727192899706e-06, "loss": 1.3224, "step": 38700 }, { "epoch": 1.141235441545039, "grad_norm": 2.966276081826041, "learning_rate": 5.842538937090199e-06, "loss": 1.2931, "step": 38705 }, { "epoch": 1.1413828689370484, "grad_norm": 2.9171513263434354, "learning_rate": 5.842005093315079e-06, "loss": 1.3247, "step": 38710 }, { "epoch": 1.141530296329058, "grad_norm": 2.885341089147283, "learning_rate": 5.84147118798032e-06, "loss": 1.3164, "step": 38715 }, { "epoch": 1.1416777237210674, "grad_norm": 2.9264188543270326, "learning_rate": 5.840937221101634e-06, "loss": 1.3088, "step": 38720 }, { "epoch": 1.1418251511130768, "grad_norm": 2.8593742789214387, "learning_rate": 5.840403192694732e-06, "loss": 1.2899, "step": 38725 }, { "epoch": 1.1419725785050863, "grad_norm": 3.029319574448388, "learning_rate": 5.839869102775329e-06, "loss": 1.3073, "step": 38730 }, { "epoch": 1.1421200058970957, "grad_norm": 2.7242213395894317, "learning_rate": 5.839334951359142e-06, "loss": 1.3105, "step": 38735 }, { "epoch": 1.1422674332891052, "grad_norm": 2.902517687112969, "learning_rate": 5.838800738461888e-06, "loss": 1.2512, "step": 38740 }, { "epoch": 1.1424148606811146, "grad_norm": 3.085080111153216, "learning_rate": 5.838266464099287e-06, "loss": 1.2995, "step": 38745 }, { "epoch": 1.1425622880731239, "grad_norm": 2.8369495205949904, "learning_rate": 5.837732128287061e-06, "loss": 1.325, "step": 38750 }, { "epoch": 1.1427097154651333, "grad_norm": 2.969936740296712, "learning_rate": 5.837197731040932e-06, "loss": 1.3803, "step": 38755 }, { "epoch": 1.1428571428571428, "grad_norm": 3.075054806015554, "learning_rate": 5.836663272376627e-06, "loss": 1.2925, "step": 38760 }, { "epoch": 1.1430045702491523, "grad_norm": 16.826455568692364, "learning_rate": 5.836128752309873e-06, "loss": 1.2805, "step": 38765 }, { "epoch": 1.1431519976411617, "grad_norm": 3.101216366208565, "learning_rate": 5.835594170856398e-06, "loss": 1.365, "step": 38770 }, { "epoch": 1.1432994250331712, "grad_norm": 2.9486524377021057, "learning_rate": 5.835059528031933e-06, "loss": 1.3821, "step": 38775 }, { "epoch": 1.1434468524251806, "grad_norm": 3.005280040269426, "learning_rate": 5.834524823852211e-06, "loss": 1.347, "step": 38780 }, { "epoch": 1.14359427981719, "grad_norm": 3.1599897184104755, "learning_rate": 5.833990058332966e-06, "loss": 1.2833, "step": 38785 }, { "epoch": 1.1437417072091995, "grad_norm": 3.061641259604542, "learning_rate": 5.833455231489933e-06, "loss": 1.2976, "step": 38790 }, { "epoch": 1.143889134601209, "grad_norm": 2.914291381321314, "learning_rate": 5.832920343338851e-06, "loss": 1.3334, "step": 38795 }, { "epoch": 1.1440365619932185, "grad_norm": 2.8806270813362835, "learning_rate": 5.832385393895459e-06, "loss": 1.302, "step": 38800 }, { "epoch": 1.1441839893852277, "grad_norm": 2.770127783868724, "learning_rate": 5.8318503831754994e-06, "loss": 1.3675, "step": 38805 }, { "epoch": 1.1443314167772372, "grad_norm": 3.012729877216844, "learning_rate": 5.831315311194715e-06, "loss": 1.3265, "step": 38810 }, { "epoch": 1.1444788441692466, "grad_norm": 2.9513167132852067, "learning_rate": 5.830780177968851e-06, "loss": 1.24, "step": 38815 }, { "epoch": 1.144626271561256, "grad_norm": 2.943042296695274, "learning_rate": 5.830244983513654e-06, "loss": 1.3418, "step": 38820 }, { "epoch": 1.1447736989532655, "grad_norm": 2.9300187203389894, "learning_rate": 5.829709727844874e-06, "loss": 1.3062, "step": 38825 }, { "epoch": 1.144921126345275, "grad_norm": 2.8843731070916485, "learning_rate": 5.82917441097826e-06, "loss": 1.3464, "step": 38830 }, { "epoch": 1.1450685537372844, "grad_norm": 3.0022467746406694, "learning_rate": 5.828639032929565e-06, "loss": 1.3551, "step": 38835 }, { "epoch": 1.145215981129294, "grad_norm": 2.9689961258170854, "learning_rate": 5.828103593714542e-06, "loss": 1.3137, "step": 38840 }, { "epoch": 1.1453634085213031, "grad_norm": 3.0265928123036034, "learning_rate": 5.827568093348948e-06, "loss": 1.3823, "step": 38845 }, { "epoch": 1.1455108359133126, "grad_norm": 2.850325608937369, "learning_rate": 5.827032531848541e-06, "loss": 1.319, "step": 38850 }, { "epoch": 1.145658263305322, "grad_norm": 2.868112901720061, "learning_rate": 5.82649690922908e-06, "loss": 1.3224, "step": 38855 }, { "epoch": 1.1458056906973315, "grad_norm": 2.9595451642563146, "learning_rate": 5.825961225506326e-06, "loss": 1.3281, "step": 38860 }, { "epoch": 1.145953118089341, "grad_norm": 3.061791101609737, "learning_rate": 5.825425480696042e-06, "loss": 1.3083, "step": 38865 }, { "epoch": 1.1461005454813504, "grad_norm": 2.9027554481120292, "learning_rate": 5.824889674813995e-06, "loss": 1.2969, "step": 38870 }, { "epoch": 1.1462479728733599, "grad_norm": 2.9298080705651492, "learning_rate": 5.8243538078759485e-06, "loss": 1.3006, "step": 38875 }, { "epoch": 1.1463954002653693, "grad_norm": 3.262818397442039, "learning_rate": 5.823817879897673e-06, "loss": 1.2991, "step": 38880 }, { "epoch": 1.1465428276573788, "grad_norm": 2.7527267582396036, "learning_rate": 5.823281890894938e-06, "loss": 1.3169, "step": 38885 }, { "epoch": 1.1466902550493883, "grad_norm": 3.0139541210384593, "learning_rate": 5.822745840883517e-06, "loss": 1.3205, "step": 38890 }, { "epoch": 1.1468376824413977, "grad_norm": 2.9122938936703404, "learning_rate": 5.822209729879182e-06, "loss": 1.3186, "step": 38895 }, { "epoch": 1.146985109833407, "grad_norm": 2.9590755616553817, "learning_rate": 5.8216735578977094e-06, "loss": 1.3407, "step": 38900 }, { "epoch": 1.1471325372254164, "grad_norm": 2.7861934149099468, "learning_rate": 5.821137324954877e-06, "loss": 1.3227, "step": 38905 }, { "epoch": 1.1472799646174259, "grad_norm": 2.8468952278427673, "learning_rate": 5.820601031066463e-06, "loss": 1.3055, "step": 38910 }, { "epoch": 1.1474273920094353, "grad_norm": 2.9054062704269663, "learning_rate": 5.820064676248251e-06, "loss": 1.3327, "step": 38915 }, { "epoch": 1.1475748194014448, "grad_norm": 2.8346834394576406, "learning_rate": 5.8195282605160205e-06, "loss": 1.2855, "step": 38920 }, { "epoch": 1.1477222467934542, "grad_norm": 2.8527708061250237, "learning_rate": 5.818991783885559e-06, "loss": 1.2932, "step": 38925 }, { "epoch": 1.1478696741854637, "grad_norm": 3.1467317858369963, "learning_rate": 5.81845524637265e-06, "loss": 1.3237, "step": 38930 }, { "epoch": 1.1480171015774732, "grad_norm": 3.019368398062489, "learning_rate": 5.817918647993085e-06, "loss": 1.2995, "step": 38935 }, { "epoch": 1.1481645289694826, "grad_norm": 3.0756487433807322, "learning_rate": 5.817381988762651e-06, "loss": 1.3032, "step": 38940 }, { "epoch": 1.1483119563614919, "grad_norm": 3.009745474528644, "learning_rate": 5.816845268697143e-06, "loss": 1.3027, "step": 38945 }, { "epoch": 1.1484593837535013, "grad_norm": 2.8947795061175428, "learning_rate": 5.816308487812353e-06, "loss": 1.3324, "step": 38950 }, { "epoch": 1.1486068111455108, "grad_norm": 2.9448129866473227, "learning_rate": 5.815771646124075e-06, "loss": 1.3405, "step": 38955 }, { "epoch": 1.1487542385375202, "grad_norm": 2.992568738706028, "learning_rate": 5.815234743648109e-06, "loss": 1.3051, "step": 38960 }, { "epoch": 1.1489016659295297, "grad_norm": 2.8123187328047465, "learning_rate": 5.814697780400251e-06, "loss": 1.3046, "step": 38965 }, { "epoch": 1.1490490933215392, "grad_norm": 2.814215363356973, "learning_rate": 5.8141607563963035e-06, "loss": 1.3138, "step": 38970 }, { "epoch": 1.1491965207135486, "grad_norm": 2.8570283702168577, "learning_rate": 5.8136236716520685e-06, "loss": 1.3059, "step": 38975 }, { "epoch": 1.149343948105558, "grad_norm": 3.119116400187225, "learning_rate": 5.813086526183351e-06, "loss": 1.3024, "step": 38980 }, { "epoch": 1.1494913754975675, "grad_norm": 2.8576630130296885, "learning_rate": 5.812549320005956e-06, "loss": 1.3028, "step": 38985 }, { "epoch": 1.149638802889577, "grad_norm": 2.8634083716767007, "learning_rate": 5.812012053135692e-06, "loss": 1.3164, "step": 38990 }, { "epoch": 1.1497862302815864, "grad_norm": 2.7651135849940314, "learning_rate": 5.811474725588369e-06, "loss": 1.2727, "step": 38995 }, { "epoch": 1.1499336576735957, "grad_norm": 2.847009333811869, "learning_rate": 5.810937337379799e-06, "loss": 1.34, "step": 39000 }, { "epoch": 1.1499336576735957, "eval_loss": 1.122633457183838, "eval_runtime": 4.3211, "eval_samples_per_second": 91.642, "eval_steps_per_second": 3.008, "step": 39000 }, { "epoch": 1.1500810850656051, "grad_norm": 2.941066986413962, "learning_rate": 5.810399888525794e-06, "loss": 1.3275, "step": 39005 }, { "epoch": 1.1502285124576146, "grad_norm": 2.915629776579422, "learning_rate": 5.8098623790421704e-06, "loss": 1.3368, "step": 39010 }, { "epoch": 1.150375939849624, "grad_norm": 2.8501702952072714, "learning_rate": 5.809324808944744e-06, "loss": 1.3325, "step": 39015 }, { "epoch": 1.1505233672416335, "grad_norm": 2.8803980996246445, "learning_rate": 5.808787178249333e-06, "loss": 1.3038, "step": 39020 }, { "epoch": 1.150670794633643, "grad_norm": 2.896041448173104, "learning_rate": 5.808249486971758e-06, "loss": 1.2842, "step": 39025 }, { "epoch": 1.1508182220256524, "grad_norm": 2.9416629041994273, "learning_rate": 5.807711735127843e-06, "loss": 1.3206, "step": 39030 }, { "epoch": 1.1509656494176619, "grad_norm": 2.963679371360455, "learning_rate": 5.807173922733409e-06, "loss": 1.3489, "step": 39035 }, { "epoch": 1.1511130768096711, "grad_norm": 2.944980300723793, "learning_rate": 5.806636049804283e-06, "loss": 1.3158, "step": 39040 }, { "epoch": 1.1512605042016806, "grad_norm": 3.0339374535569688, "learning_rate": 5.806098116356293e-06, "loss": 1.3061, "step": 39045 }, { "epoch": 1.15140793159369, "grad_norm": 2.8220541374914982, "learning_rate": 5.80556012240527e-06, "loss": 1.2957, "step": 39050 }, { "epoch": 1.1515553589856995, "grad_norm": 2.87776856713682, "learning_rate": 5.805022067967041e-06, "loss": 1.3129, "step": 39055 }, { "epoch": 1.151702786377709, "grad_norm": 2.9184347204521717, "learning_rate": 5.804483953057442e-06, "loss": 1.2865, "step": 39060 }, { "epoch": 1.1518502137697184, "grad_norm": 3.020955809500711, "learning_rate": 5.803945777692306e-06, "loss": 1.327, "step": 39065 }, { "epoch": 1.1519976411617279, "grad_norm": 2.9141401098098036, "learning_rate": 5.803407541887471e-06, "loss": 1.3173, "step": 39070 }, { "epoch": 1.1521450685537373, "grad_norm": 3.0868901190336517, "learning_rate": 5.802869245658775e-06, "loss": 1.3307, "step": 39075 }, { "epoch": 1.1522924959457468, "grad_norm": 2.9269612115043273, "learning_rate": 5.802330889022056e-06, "loss": 1.3018, "step": 39080 }, { "epoch": 1.1524399233377562, "grad_norm": 2.8399362664107137, "learning_rate": 5.801792471993158e-06, "loss": 1.3053, "step": 39085 }, { "epoch": 1.1525873507297657, "grad_norm": 2.9065272003463973, "learning_rate": 5.801253994587923e-06, "loss": 1.2681, "step": 39090 }, { "epoch": 1.152734778121775, "grad_norm": 2.849792267897388, "learning_rate": 5.800715456822197e-06, "loss": 1.2722, "step": 39095 }, { "epoch": 1.1528822055137844, "grad_norm": 2.7930040046650073, "learning_rate": 5.800176858711827e-06, "loss": 1.3383, "step": 39100 }, { "epoch": 1.1530296329057939, "grad_norm": 2.827259378764465, "learning_rate": 5.799638200272662e-06, "loss": 1.2829, "step": 39105 }, { "epoch": 1.1531770602978033, "grad_norm": 2.938009411973322, "learning_rate": 5.799099481520552e-06, "loss": 1.319, "step": 39110 }, { "epoch": 1.1533244876898128, "grad_norm": 2.952065412193786, "learning_rate": 5.798560702471351e-06, "loss": 1.2977, "step": 39115 }, { "epoch": 1.1534719150818222, "grad_norm": 2.8695923726836154, "learning_rate": 5.79802186314091e-06, "loss": 1.3097, "step": 39120 }, { "epoch": 1.1536193424738317, "grad_norm": 2.9280827503786875, "learning_rate": 5.7974829635450895e-06, "loss": 1.3526, "step": 39125 }, { "epoch": 1.1537667698658411, "grad_norm": 2.989139708920264, "learning_rate": 5.796944003699743e-06, "loss": 1.3228, "step": 39130 }, { "epoch": 1.1539141972578506, "grad_norm": 2.965827462690499, "learning_rate": 5.796404983620733e-06, "loss": 1.3068, "step": 39135 }, { "epoch": 1.1540616246498598, "grad_norm": 2.9516663515802404, "learning_rate": 5.79586590332392e-06, "loss": 1.3727, "step": 39140 }, { "epoch": 1.1542090520418693, "grad_norm": 2.9723107059415383, "learning_rate": 5.795326762825166e-06, "loss": 1.3277, "step": 39145 }, { "epoch": 1.1543564794338788, "grad_norm": 3.007281121027471, "learning_rate": 5.794787562140336e-06, "loss": 1.3608, "step": 39150 }, { "epoch": 1.1545039068258882, "grad_norm": 2.9249416427329433, "learning_rate": 5.794248301285298e-06, "loss": 1.3175, "step": 39155 }, { "epoch": 1.1546513342178977, "grad_norm": 2.9363229297560443, "learning_rate": 5.793708980275919e-06, "loss": 1.3347, "step": 39160 }, { "epoch": 1.1547987616099071, "grad_norm": 2.9331840726902927, "learning_rate": 5.79316959912807e-06, "loss": 1.3127, "step": 39165 }, { "epoch": 1.1549461890019166, "grad_norm": 3.0071235189574703, "learning_rate": 5.792630157857621e-06, "loss": 1.2896, "step": 39170 }, { "epoch": 1.155093616393926, "grad_norm": 2.840299375291658, "learning_rate": 5.792090656480448e-06, "loss": 1.3322, "step": 39175 }, { "epoch": 1.1552410437859355, "grad_norm": 2.9298045081908843, "learning_rate": 5.791551095012426e-06, "loss": 1.2953, "step": 39180 }, { "epoch": 1.155388471177945, "grad_norm": 3.2989607396513954, "learning_rate": 5.791011473469431e-06, "loss": 1.3414, "step": 39185 }, { "epoch": 1.1555358985699542, "grad_norm": 2.885526980895087, "learning_rate": 5.790471791867344e-06, "loss": 1.3437, "step": 39190 }, { "epoch": 1.1556833259619637, "grad_norm": 2.8066350668766744, "learning_rate": 5.789932050222044e-06, "loss": 1.2792, "step": 39195 }, { "epoch": 1.1558307533539731, "grad_norm": 3.018938440946192, "learning_rate": 5.789392248549414e-06, "loss": 1.3162, "step": 39200 }, { "epoch": 1.1559781807459826, "grad_norm": 3.0425246028607167, "learning_rate": 5.788852386865338e-06, "loss": 1.3339, "step": 39205 }, { "epoch": 1.156125608137992, "grad_norm": 2.8913209795795796, "learning_rate": 5.7883124651857034e-06, "loss": 1.2574, "step": 39210 }, { "epoch": 1.1562730355300015, "grad_norm": 2.840172403238995, "learning_rate": 5.787772483526397e-06, "loss": 1.3399, "step": 39215 }, { "epoch": 1.156420462922011, "grad_norm": 2.816978699456332, "learning_rate": 5.787232441903307e-06, "loss": 1.3181, "step": 39220 }, { "epoch": 1.1565678903140204, "grad_norm": 2.9379933398745064, "learning_rate": 5.786692340332328e-06, "loss": 1.3368, "step": 39225 }, { "epoch": 1.1567153177060299, "grad_norm": 2.842400175915214, "learning_rate": 5.786152178829349e-06, "loss": 1.2927, "step": 39230 }, { "epoch": 1.156862745098039, "grad_norm": 2.911937987121027, "learning_rate": 5.785611957410268e-06, "loss": 1.2915, "step": 39235 }, { "epoch": 1.1570101724900486, "grad_norm": 2.9761423041875275, "learning_rate": 5.7850716760909816e-06, "loss": 1.3178, "step": 39240 }, { "epoch": 1.157157599882058, "grad_norm": 2.7651548447762595, "learning_rate": 5.784531334887386e-06, "loss": 1.3093, "step": 39245 }, { "epoch": 1.1573050272740675, "grad_norm": 3.000889313204749, "learning_rate": 5.7839909338153845e-06, "loss": 1.3558, "step": 39250 }, { "epoch": 1.157452454666077, "grad_norm": 2.935769248565025, "learning_rate": 5.783450472890876e-06, "loss": 1.2928, "step": 39255 }, { "epoch": 1.1575998820580864, "grad_norm": 2.798831684436029, "learning_rate": 5.782909952129765e-06, "loss": 1.2917, "step": 39260 }, { "epoch": 1.1577473094500959, "grad_norm": 2.9149553711973053, "learning_rate": 5.782369371547957e-06, "loss": 1.3124, "step": 39265 }, { "epoch": 1.1578947368421053, "grad_norm": 2.8974898491493684, "learning_rate": 5.781828731161361e-06, "loss": 1.3456, "step": 39270 }, { "epoch": 1.1580421642341148, "grad_norm": 2.9923378543405708, "learning_rate": 5.7812880309858845e-06, "loss": 1.2681, "step": 39275 }, { "epoch": 1.1581895916261242, "grad_norm": 3.054511604851428, "learning_rate": 5.780747271037437e-06, "loss": 1.3609, "step": 39280 }, { "epoch": 1.1583370190181337, "grad_norm": 2.954632396320146, "learning_rate": 5.780206451331933e-06, "loss": 1.2737, "step": 39285 }, { "epoch": 1.158484446410143, "grad_norm": 2.8356612570341255, "learning_rate": 5.7796655718852854e-06, "loss": 1.3363, "step": 39290 }, { "epoch": 1.1586318738021524, "grad_norm": 2.9227183105193597, "learning_rate": 5.779124632713411e-06, "loss": 1.2766, "step": 39295 }, { "epoch": 1.1587793011941618, "grad_norm": 3.0541956416739233, "learning_rate": 5.778583633832226e-06, "loss": 1.3097, "step": 39300 }, { "epoch": 1.1589267285861713, "grad_norm": 3.05815433023628, "learning_rate": 5.7780425752576526e-06, "loss": 1.3737, "step": 39305 }, { "epoch": 1.1590741559781808, "grad_norm": 2.9089696093850197, "learning_rate": 5.7775014570056114e-06, "loss": 1.281, "step": 39310 }, { "epoch": 1.1592215833701902, "grad_norm": 2.8765236098671885, "learning_rate": 5.776960279092023e-06, "loss": 1.2987, "step": 39315 }, { "epoch": 1.1593690107621997, "grad_norm": 2.8976345491335436, "learning_rate": 5.776419041532815e-06, "loss": 1.3314, "step": 39320 }, { "epoch": 1.1595164381542091, "grad_norm": 2.8716601850750143, "learning_rate": 5.775877744343911e-06, "loss": 1.2654, "step": 39325 }, { "epoch": 1.1596638655462184, "grad_norm": 2.9073580393772094, "learning_rate": 5.775336387541243e-06, "loss": 1.2959, "step": 39330 }, { "epoch": 1.1598112929382278, "grad_norm": 2.8988175773051528, "learning_rate": 5.774794971140739e-06, "loss": 1.306, "step": 39335 }, { "epoch": 1.1599587203302373, "grad_norm": 2.853421620140322, "learning_rate": 5.774253495158329e-06, "loss": 1.3302, "step": 39340 }, { "epoch": 1.1601061477222467, "grad_norm": 3.0075845911833525, "learning_rate": 5.773711959609951e-06, "loss": 1.2962, "step": 39345 }, { "epoch": 1.1602535751142562, "grad_norm": 2.9341355691272732, "learning_rate": 5.7731703645115364e-06, "loss": 1.3016, "step": 39350 }, { "epoch": 1.1604010025062657, "grad_norm": 2.870073987416135, "learning_rate": 5.772628709879024e-06, "loss": 1.3121, "step": 39355 }, { "epoch": 1.1605484298982751, "grad_norm": 2.847710613637522, "learning_rate": 5.772086995728351e-06, "loss": 1.318, "step": 39360 }, { "epoch": 1.1606958572902846, "grad_norm": 3.01838548584173, "learning_rate": 5.771545222075461e-06, "loss": 1.3325, "step": 39365 }, { "epoch": 1.160843284682294, "grad_norm": 3.031398029239322, "learning_rate": 5.771003388936293e-06, "loss": 1.2742, "step": 39370 }, { "epoch": 1.1609907120743035, "grad_norm": 2.847084124446678, "learning_rate": 5.770461496326792e-06, "loss": 1.3424, "step": 39375 }, { "epoch": 1.161138139466313, "grad_norm": 2.8867220528683326, "learning_rate": 5.7699195442629065e-06, "loss": 1.3375, "step": 39380 }, { "epoch": 1.1612855668583222, "grad_norm": 2.941720819697836, "learning_rate": 5.769377532760581e-06, "loss": 1.3533, "step": 39385 }, { "epoch": 1.1614329942503316, "grad_norm": 2.8380187039977245, "learning_rate": 5.768835461835766e-06, "loss": 1.2727, "step": 39390 }, { "epoch": 1.161580421642341, "grad_norm": 2.932707498301859, "learning_rate": 5.768293331504411e-06, "loss": 1.3136, "step": 39395 }, { "epoch": 1.1617278490343506, "grad_norm": 2.812429468195901, "learning_rate": 5.767751141782471e-06, "loss": 1.3051, "step": 39400 }, { "epoch": 1.16187527642636, "grad_norm": 2.9070246764026924, "learning_rate": 5.7672088926859e-06, "loss": 1.3648, "step": 39405 }, { "epoch": 1.1620227038183695, "grad_norm": 3.0319872104766636, "learning_rate": 5.766666584230652e-06, "loss": 1.3263, "step": 39410 }, { "epoch": 1.162170131210379, "grad_norm": 2.7895274688484135, "learning_rate": 5.76612421643269e-06, "loss": 1.2975, "step": 39415 }, { "epoch": 1.1623175586023884, "grad_norm": 3.0088377327713567, "learning_rate": 5.7655817893079696e-06, "loss": 1.3106, "step": 39420 }, { "epoch": 1.1624649859943978, "grad_norm": 2.8052636564284903, "learning_rate": 5.765039302872453e-06, "loss": 1.3062, "step": 39425 }, { "epoch": 1.162612413386407, "grad_norm": 2.8359925099337175, "learning_rate": 5.764496757142104e-06, "loss": 1.2833, "step": 39430 }, { "epoch": 1.1627598407784165, "grad_norm": 2.9314353944816647, "learning_rate": 5.763954152132887e-06, "loss": 1.3478, "step": 39435 }, { "epoch": 1.162907268170426, "grad_norm": 2.9025938476475255, "learning_rate": 5.7634114878607706e-06, "loss": 1.3107, "step": 39440 }, { "epoch": 1.1630546955624355, "grad_norm": 2.9800651738055617, "learning_rate": 5.762868764341721e-06, "loss": 1.2997, "step": 39445 }, { "epoch": 1.163202122954445, "grad_norm": 2.8144281748173534, "learning_rate": 5.76232598159171e-06, "loss": 1.3113, "step": 39450 }, { "epoch": 1.1633495503464544, "grad_norm": 2.8574836827384957, "learning_rate": 5.76178313962671e-06, "loss": 1.291, "step": 39455 }, { "epoch": 1.1634969777384638, "grad_norm": 3.0520629535564696, "learning_rate": 5.761240238462691e-06, "loss": 1.3395, "step": 39460 }, { "epoch": 1.1636444051304733, "grad_norm": 2.9386740179194235, "learning_rate": 5.7606972781156355e-06, "loss": 1.3017, "step": 39465 }, { "epoch": 1.1637918325224827, "grad_norm": 2.9340053999678557, "learning_rate": 5.760154258601515e-06, "loss": 1.3401, "step": 39470 }, { "epoch": 1.1639392599144922, "grad_norm": 2.88209660651287, "learning_rate": 5.759611179936309e-06, "loss": 1.3683, "step": 39475 }, { "epoch": 1.1640866873065017, "grad_norm": 2.923832965594943, "learning_rate": 5.759068042136e-06, "loss": 1.3729, "step": 39480 }, { "epoch": 1.164234114698511, "grad_norm": 2.9679083848218504, "learning_rate": 5.75852484521657e-06, "loss": 1.2714, "step": 39485 }, { "epoch": 1.1643815420905204, "grad_norm": 2.753776485553078, "learning_rate": 5.7579815891940025e-06, "loss": 1.3099, "step": 39490 }, { "epoch": 1.1645289694825298, "grad_norm": 2.894831652204707, "learning_rate": 5.757438274084285e-06, "loss": 1.2977, "step": 39495 }, { "epoch": 1.1646763968745393, "grad_norm": 2.882989424955921, "learning_rate": 5.756894899903402e-06, "loss": 1.316, "step": 39500 }, { "epoch": 1.1646763968745393, "eval_loss": 1.1214183568954468, "eval_runtime": 4.2034, "eval_samples_per_second": 94.21, "eval_steps_per_second": 3.093, "step": 39500 }, { "epoch": 1.1648238242665487, "grad_norm": 2.8802177507741216, "learning_rate": 5.7563514666673455e-06, "loss": 1.2812, "step": 39505 }, { "epoch": 1.1649712516585582, "grad_norm": 2.853380768426722, "learning_rate": 5.755807974392107e-06, "loss": 1.292, "step": 39510 }, { "epoch": 1.1651186790505677, "grad_norm": 2.8687071929465993, "learning_rate": 5.7552644230936784e-06, "loss": 1.293, "step": 39515 }, { "epoch": 1.165266106442577, "grad_norm": 2.895159504844427, "learning_rate": 5.754720812788054e-06, "loss": 1.301, "step": 39520 }, { "epoch": 1.1654135338345863, "grad_norm": 2.991125503053927, "learning_rate": 5.75417714349123e-06, "loss": 1.3386, "step": 39525 }, { "epoch": 1.1655609612265958, "grad_norm": 3.0921649812267784, "learning_rate": 5.753633415219205e-06, "loss": 1.3406, "step": 39530 }, { "epoch": 1.1657083886186053, "grad_norm": 2.906409817349192, "learning_rate": 5.753089627987981e-06, "loss": 1.2736, "step": 39535 }, { "epoch": 1.1658558160106147, "grad_norm": 2.8560386503475974, "learning_rate": 5.752545781813555e-06, "loss": 1.3074, "step": 39540 }, { "epoch": 1.1660032434026242, "grad_norm": 2.8706719727549737, "learning_rate": 5.752001876711934e-06, "loss": 1.3041, "step": 39545 }, { "epoch": 1.1661506707946336, "grad_norm": 2.926419337010338, "learning_rate": 5.751457912699122e-06, "loss": 1.2883, "step": 39550 }, { "epoch": 1.166298098186643, "grad_norm": 2.7572243787580875, "learning_rate": 5.750913889791125e-06, "loss": 1.332, "step": 39555 }, { "epoch": 1.1664455255786526, "grad_norm": 2.8481779830053484, "learning_rate": 5.75036980800395e-06, "loss": 1.2822, "step": 39560 }, { "epoch": 1.166592952970662, "grad_norm": 2.9350788421872256, "learning_rate": 5.749825667353612e-06, "loss": 1.3158, "step": 39565 }, { "epoch": 1.1667403803626715, "grad_norm": 2.8559247206358456, "learning_rate": 5.749281467856119e-06, "loss": 1.3125, "step": 39570 }, { "epoch": 1.166887807754681, "grad_norm": 2.9690158403570583, "learning_rate": 5.748737209527486e-06, "loss": 1.3171, "step": 39575 }, { "epoch": 1.1670352351466902, "grad_norm": 2.7285439828283966, "learning_rate": 5.748192892383729e-06, "loss": 1.2581, "step": 39580 }, { "epoch": 1.1671826625386996, "grad_norm": 3.0096035281136118, "learning_rate": 5.747648516440864e-06, "loss": 1.3664, "step": 39585 }, { "epoch": 1.167330089930709, "grad_norm": 3.0304559351859446, "learning_rate": 5.74710408171491e-06, "loss": 1.3141, "step": 39590 }, { "epoch": 1.1674775173227185, "grad_norm": 3.0599666258247114, "learning_rate": 5.746559588221888e-06, "loss": 1.3614, "step": 39595 }, { "epoch": 1.167624944714728, "grad_norm": 2.9324067633338435, "learning_rate": 5.74601503597782e-06, "loss": 1.3113, "step": 39600 }, { "epoch": 1.1677723721067375, "grad_norm": 2.8027062395799756, "learning_rate": 5.74547042499873e-06, "loss": 1.2905, "step": 39605 }, { "epoch": 1.167919799498747, "grad_norm": 2.8877410080742014, "learning_rate": 5.744925755300645e-06, "loss": 1.2837, "step": 39610 }, { "epoch": 1.1680672268907564, "grad_norm": 2.9549861378351214, "learning_rate": 5.744381026899592e-06, "loss": 1.3161, "step": 39615 }, { "epoch": 1.1682146542827658, "grad_norm": 3.0141581810867724, "learning_rate": 5.743836239811599e-06, "loss": 1.3375, "step": 39620 }, { "epoch": 1.168362081674775, "grad_norm": 3.029837983432206, "learning_rate": 5.7432913940526995e-06, "loss": 1.3242, "step": 39625 }, { "epoch": 1.1685095090667845, "grad_norm": 2.7823866937826076, "learning_rate": 5.742746489638923e-06, "loss": 1.3956, "step": 39630 }, { "epoch": 1.168656936458794, "grad_norm": 2.927232397791417, "learning_rate": 5.742201526586306e-06, "loss": 1.3245, "step": 39635 }, { "epoch": 1.1688043638508034, "grad_norm": 3.1003322463111425, "learning_rate": 5.741656504910884e-06, "loss": 1.3385, "step": 39640 }, { "epoch": 1.168951791242813, "grad_norm": 2.9562252374715623, "learning_rate": 5.741111424628696e-06, "loss": 1.2828, "step": 39645 }, { "epoch": 1.1690992186348224, "grad_norm": 2.9972471312630797, "learning_rate": 5.74056628575578e-06, "loss": 1.3203, "step": 39650 }, { "epoch": 1.1692466460268318, "grad_norm": 2.8884779161816936, "learning_rate": 5.740021088308178e-06, "loss": 1.3453, "step": 39655 }, { "epoch": 1.1693940734188413, "grad_norm": 2.940810347762924, "learning_rate": 5.739475832301933e-06, "loss": 1.3141, "step": 39660 }, { "epoch": 1.1695415008108507, "grad_norm": 2.8830126258417046, "learning_rate": 5.738930517753092e-06, "loss": 1.3389, "step": 39665 }, { "epoch": 1.1696889282028602, "grad_norm": 2.820700012158153, "learning_rate": 5.738385144677696e-06, "loss": 1.3278, "step": 39670 }, { "epoch": 1.1698363555948696, "grad_norm": 2.95189096073426, "learning_rate": 5.737839713091799e-06, "loss": 1.3185, "step": 39675 }, { "epoch": 1.1699837829868789, "grad_norm": 2.9593141997654584, "learning_rate": 5.737294223011447e-06, "loss": 1.3203, "step": 39680 }, { "epoch": 1.1701312103788883, "grad_norm": 2.844899031373801, "learning_rate": 5.736748674452694e-06, "loss": 1.2967, "step": 39685 }, { "epoch": 1.1702786377708978, "grad_norm": 2.979749011002548, "learning_rate": 5.7362030674315925e-06, "loss": 1.283, "step": 39690 }, { "epoch": 1.1704260651629073, "grad_norm": 2.8420624891419526, "learning_rate": 5.735657401964198e-06, "loss": 1.2985, "step": 39695 }, { "epoch": 1.1705734925549167, "grad_norm": 3.0163423621340804, "learning_rate": 5.735111678066567e-06, "loss": 1.3356, "step": 39700 }, { "epoch": 1.1707209199469262, "grad_norm": 2.9523032636676834, "learning_rate": 5.734565895754759e-06, "loss": 1.3256, "step": 39705 }, { "epoch": 1.1708683473389356, "grad_norm": 2.7225613598280094, "learning_rate": 5.734020055044831e-06, "loss": 1.2728, "step": 39710 }, { "epoch": 1.171015774730945, "grad_norm": 2.875112238084, "learning_rate": 5.733474155952849e-06, "loss": 1.3344, "step": 39715 }, { "epoch": 1.1711632021229543, "grad_norm": 3.0914333495354955, "learning_rate": 5.732928198494875e-06, "loss": 1.3405, "step": 39720 }, { "epoch": 1.1713106295149638, "grad_norm": 2.991447427158216, "learning_rate": 5.732382182686975e-06, "loss": 1.3339, "step": 39725 }, { "epoch": 1.1714580569069732, "grad_norm": 2.791562811945553, "learning_rate": 5.731836108545216e-06, "loss": 1.3296, "step": 39730 }, { "epoch": 1.1716054842989827, "grad_norm": 2.984378689006562, "learning_rate": 5.731289976085665e-06, "loss": 1.2821, "step": 39735 }, { "epoch": 1.1717529116909922, "grad_norm": 2.771519500685001, "learning_rate": 5.730743785324394e-06, "loss": 1.288, "step": 39740 }, { "epoch": 1.1719003390830016, "grad_norm": 2.9688892353614587, "learning_rate": 5.7301975362774755e-06, "loss": 1.3764, "step": 39745 }, { "epoch": 1.172047766475011, "grad_norm": 3.018835077286697, "learning_rate": 5.729651228960984e-06, "loss": 1.3636, "step": 39750 }, { "epoch": 1.1721951938670205, "grad_norm": 2.9639318994732173, "learning_rate": 5.729104863390994e-06, "loss": 1.3592, "step": 39755 }, { "epoch": 1.17234262125903, "grad_norm": 3.0346402229975755, "learning_rate": 5.728558439583585e-06, "loss": 1.2982, "step": 39760 }, { "epoch": 1.1724900486510395, "grad_norm": 2.7842681935126126, "learning_rate": 5.728011957554833e-06, "loss": 1.3056, "step": 39765 }, { "epoch": 1.172637476043049, "grad_norm": 2.7918381317494387, "learning_rate": 5.7274654173208205e-06, "loss": 1.3129, "step": 39770 }, { "epoch": 1.1727849034350581, "grad_norm": 2.8534880727199665, "learning_rate": 5.726918818897631e-06, "loss": 1.2887, "step": 39775 }, { "epoch": 1.1729323308270676, "grad_norm": 2.9827800545107963, "learning_rate": 5.726372162301347e-06, "loss": 1.3449, "step": 39780 }, { "epoch": 1.173079758219077, "grad_norm": 2.9472637791430762, "learning_rate": 5.725825447548056e-06, "loss": 1.3263, "step": 39785 }, { "epoch": 1.1732271856110865, "grad_norm": 2.805592347253387, "learning_rate": 5.725278674653844e-06, "loss": 1.2756, "step": 39790 }, { "epoch": 1.173374613003096, "grad_norm": 3.084569825192678, "learning_rate": 5.724731843634803e-06, "loss": 1.3703, "step": 39795 }, { "epoch": 1.1735220403951054, "grad_norm": 2.8947008461334605, "learning_rate": 5.724184954507022e-06, "loss": 1.3687, "step": 39800 }, { "epoch": 1.173669467787115, "grad_norm": 2.749663840240026, "learning_rate": 5.7236380072865945e-06, "loss": 1.3109, "step": 39805 }, { "epoch": 1.1738168951791244, "grad_norm": 4.057300614304265, "learning_rate": 5.723091001989614e-06, "loss": 1.3303, "step": 39810 }, { "epoch": 1.1739643225711338, "grad_norm": 2.875959681461481, "learning_rate": 5.722543938632179e-06, "loss": 1.2623, "step": 39815 }, { "epoch": 1.174111749963143, "grad_norm": 2.787692862446215, "learning_rate": 5.721996817230385e-06, "loss": 1.2928, "step": 39820 }, { "epoch": 1.1742591773551525, "grad_norm": 2.8685319153266895, "learning_rate": 5.721449637800333e-06, "loss": 1.3001, "step": 39825 }, { "epoch": 1.174406604747162, "grad_norm": 2.8313828489608253, "learning_rate": 5.720902400358126e-06, "loss": 1.3205, "step": 39830 }, { "epoch": 1.1745540321391714, "grad_norm": 2.9252453496945128, "learning_rate": 5.7203551049198635e-06, "loss": 1.286, "step": 39835 }, { "epoch": 1.1747014595311809, "grad_norm": 2.8502750127832788, "learning_rate": 5.719807751501652e-06, "loss": 1.3463, "step": 39840 }, { "epoch": 1.1748488869231903, "grad_norm": 2.8315407113339384, "learning_rate": 5.719260340119599e-06, "loss": 1.313, "step": 39845 }, { "epoch": 1.1749963143151998, "grad_norm": 2.8473221566225035, "learning_rate": 5.718712870789812e-06, "loss": 1.3157, "step": 39850 }, { "epoch": 1.1751437417072093, "grad_norm": 2.8210112579644626, "learning_rate": 5.7181653435284e-06, "loss": 1.2474, "step": 39855 }, { "epoch": 1.1752911690992187, "grad_norm": 2.8682851146001975, "learning_rate": 5.717617758351477e-06, "loss": 1.3033, "step": 39860 }, { "epoch": 1.1754385964912282, "grad_norm": 2.8546693690951317, "learning_rate": 5.717070115275153e-06, "loss": 1.2867, "step": 39865 }, { "epoch": 1.1755860238832376, "grad_norm": 3.025267057484621, "learning_rate": 5.716522414315546e-06, "loss": 1.336, "step": 39870 }, { "epoch": 1.1757334512752469, "grad_norm": 2.813554175969089, "learning_rate": 5.715974655488769e-06, "loss": 1.3654, "step": 39875 }, { "epoch": 1.1758808786672563, "grad_norm": 2.837933562150885, "learning_rate": 5.715426838810945e-06, "loss": 1.2632, "step": 39880 }, { "epoch": 1.1760283060592658, "grad_norm": 3.0555511094699157, "learning_rate": 5.714878964298192e-06, "loss": 1.3523, "step": 39885 }, { "epoch": 1.1761757334512752, "grad_norm": 2.8872259322109532, "learning_rate": 5.714331031966631e-06, "loss": 1.3031, "step": 39890 }, { "epoch": 1.1763231608432847, "grad_norm": 2.913853554047871, "learning_rate": 5.713783041832386e-06, "loss": 1.3156, "step": 39895 }, { "epoch": 1.1764705882352942, "grad_norm": 2.8118495147873395, "learning_rate": 5.713234993911584e-06, "loss": 1.3148, "step": 39900 }, { "epoch": 1.1766180156273036, "grad_norm": 2.800007639562909, "learning_rate": 5.71268688822035e-06, "loss": 1.3373, "step": 39905 }, { "epoch": 1.176765443019313, "grad_norm": 2.8006973268417164, "learning_rate": 5.7121387247748126e-06, "loss": 1.3266, "step": 39910 }, { "epoch": 1.1769128704113223, "grad_norm": 2.9808247176121374, "learning_rate": 5.711590503591102e-06, "loss": 1.3143, "step": 39915 }, { "epoch": 1.1770602978033318, "grad_norm": 3.028010695357788, "learning_rate": 5.711042224685352e-06, "loss": 1.3092, "step": 39920 }, { "epoch": 1.1772077251953412, "grad_norm": 3.076679162638311, "learning_rate": 5.710493888073695e-06, "loss": 1.3455, "step": 39925 }, { "epoch": 1.1773551525873507, "grad_norm": 2.8862683023121307, "learning_rate": 5.709945493772268e-06, "loss": 1.263, "step": 39930 }, { "epoch": 1.1775025799793601, "grad_norm": 2.997441811633059, "learning_rate": 5.709397041797206e-06, "loss": 1.3451, "step": 39935 }, { "epoch": 1.1776500073713696, "grad_norm": 2.9393777981139904, "learning_rate": 5.7088485321646485e-06, "loss": 1.3387, "step": 39940 }, { "epoch": 1.177797434763379, "grad_norm": 2.8083901883676785, "learning_rate": 5.708299964890735e-06, "loss": 1.2907, "step": 39945 }, { "epoch": 1.1779448621553885, "grad_norm": 2.9575402234858754, "learning_rate": 5.707751339991609e-06, "loss": 1.3122, "step": 39950 }, { "epoch": 1.178092289547398, "grad_norm": 2.8810413859930932, "learning_rate": 5.707202657483416e-06, "loss": 1.3384, "step": 39955 }, { "epoch": 1.1782397169394074, "grad_norm": 2.980503211689489, "learning_rate": 5.706653917382299e-06, "loss": 1.2973, "step": 39960 }, { "epoch": 1.178387144331417, "grad_norm": 3.0682506133730576, "learning_rate": 5.706105119704406e-06, "loss": 1.2979, "step": 39965 }, { "epoch": 1.1785345717234261, "grad_norm": 2.956577791348845, "learning_rate": 5.7055562644658866e-06, "loss": 1.2858, "step": 39970 }, { "epoch": 1.1786819991154356, "grad_norm": 2.8819441848413767, "learning_rate": 5.70500735168289e-06, "loss": 1.2744, "step": 39975 }, { "epoch": 1.178829426507445, "grad_norm": 2.8986231478651656, "learning_rate": 5.704458381371571e-06, "loss": 1.3208, "step": 39980 }, { "epoch": 1.1789768538994545, "grad_norm": 3.0060308252225982, "learning_rate": 5.703909353548084e-06, "loss": 1.2993, "step": 39985 }, { "epoch": 1.179124281291464, "grad_norm": 2.8921974938032227, "learning_rate": 5.70336026822858e-06, "loss": 1.3094, "step": 39990 }, { "epoch": 1.1792717086834734, "grad_norm": 3.0018701444510305, "learning_rate": 5.7028111254292215e-06, "loss": 1.3392, "step": 39995 }, { "epoch": 1.1794191360754829, "grad_norm": 2.8596485339512543, "learning_rate": 5.7022619251661644e-06, "loss": 1.3232, "step": 40000 }, { "epoch": 1.1794191360754829, "eval_loss": 1.1206333637237549, "eval_runtime": 4.2662, "eval_samples_per_second": 92.822, "eval_steps_per_second": 3.047, "step": 40000 }, { "epoch": 1.1795665634674923, "grad_norm": 2.9629661204688347, "learning_rate": 5.701712667455573e-06, "loss": 1.2551, "step": 40005 }, { "epoch": 1.1797139908595016, "grad_norm": 2.875101211552013, "learning_rate": 5.701163352313606e-06, "loss": 1.3276, "step": 40010 }, { "epoch": 1.179861418251511, "grad_norm": 3.1365129065980484, "learning_rate": 5.70061397975643e-06, "loss": 1.2885, "step": 40015 }, { "epoch": 1.1800088456435205, "grad_norm": 3.0000951053362206, "learning_rate": 5.70006454980021e-06, "loss": 1.3579, "step": 40020 }, { "epoch": 1.18015627303553, "grad_norm": 2.9434708386087887, "learning_rate": 5.699515062461113e-06, "loss": 1.307, "step": 40025 }, { "epoch": 1.1803037004275394, "grad_norm": 2.9285025240417677, "learning_rate": 5.69896551775531e-06, "loss": 1.3234, "step": 40030 }, { "epoch": 1.1804511278195489, "grad_norm": 2.957124037292086, "learning_rate": 5.698415915698972e-06, "loss": 1.3403, "step": 40035 }, { "epoch": 1.1805985552115583, "grad_norm": 2.8857760435034767, "learning_rate": 5.697866256308269e-06, "loss": 1.3688, "step": 40040 }, { "epoch": 1.1807459826035678, "grad_norm": 2.9976284735754146, "learning_rate": 5.697316539599379e-06, "loss": 1.3261, "step": 40045 }, { "epoch": 1.1808934099955772, "grad_norm": 2.978965617720175, "learning_rate": 5.696766765588475e-06, "loss": 1.2994, "step": 40050 }, { "epoch": 1.1810408373875867, "grad_norm": 3.1040549359844785, "learning_rate": 5.6962169342917355e-06, "loss": 1.3294, "step": 40055 }, { "epoch": 1.1811882647795962, "grad_norm": 2.876322257131608, "learning_rate": 5.695667045725342e-06, "loss": 1.3372, "step": 40060 }, { "epoch": 1.1813356921716054, "grad_norm": 2.965964604038545, "learning_rate": 5.695117099905474e-06, "loss": 1.2915, "step": 40065 }, { "epoch": 1.1814831195636148, "grad_norm": 2.9075778775771357, "learning_rate": 5.694567096848313e-06, "loss": 1.3354, "step": 40070 }, { "epoch": 1.1816305469556243, "grad_norm": 2.887897016549293, "learning_rate": 5.694017036570046e-06, "loss": 1.238, "step": 40075 }, { "epoch": 1.1817779743476338, "grad_norm": 2.8877904676142063, "learning_rate": 5.693466919086855e-06, "loss": 1.3066, "step": 40080 }, { "epoch": 1.1819254017396432, "grad_norm": 3.0456515573870067, "learning_rate": 5.692916744414933e-06, "loss": 1.3769, "step": 40085 }, { "epoch": 1.1820728291316527, "grad_norm": 3.1190800158685508, "learning_rate": 5.692366512570468e-06, "loss": 1.3374, "step": 40090 }, { "epoch": 1.1822202565236621, "grad_norm": 2.836405380772416, "learning_rate": 5.6918162235696495e-06, "loss": 1.2988, "step": 40095 }, { "epoch": 1.1823676839156716, "grad_norm": 2.939988989696659, "learning_rate": 5.6912658774286705e-06, "loss": 1.3018, "step": 40100 }, { "epoch": 1.182515111307681, "grad_norm": 3.0742167335361477, "learning_rate": 5.690715474163726e-06, "loss": 1.3001, "step": 40105 }, { "epoch": 1.1826625386996903, "grad_norm": 3.159912232457051, "learning_rate": 5.690165013791015e-06, "loss": 1.3496, "step": 40110 }, { "epoch": 1.1828099660916997, "grad_norm": 2.946937460084232, "learning_rate": 5.689614496326731e-06, "loss": 1.3283, "step": 40115 }, { "epoch": 1.1829573934837092, "grad_norm": 2.792883148193667, "learning_rate": 5.689063921787077e-06, "loss": 1.2748, "step": 40120 }, { "epoch": 1.1831048208757187, "grad_norm": 2.9398607052281975, "learning_rate": 5.688513290188253e-06, "loss": 1.332, "step": 40125 }, { "epoch": 1.1832522482677281, "grad_norm": 2.9641379287805156, "learning_rate": 5.68796260154646e-06, "loss": 1.3409, "step": 40130 }, { "epoch": 1.1833996756597376, "grad_norm": 2.889215362523399, "learning_rate": 5.687411855877905e-06, "loss": 1.3341, "step": 40135 }, { "epoch": 1.183547103051747, "grad_norm": 2.937820376240488, "learning_rate": 5.686861053198795e-06, "loss": 1.3289, "step": 40140 }, { "epoch": 1.1836945304437565, "grad_norm": 3.1409885322433424, "learning_rate": 5.686310193525337e-06, "loss": 1.3396, "step": 40145 }, { "epoch": 1.183841957835766, "grad_norm": 2.9148225650579573, "learning_rate": 5.685759276873739e-06, "loss": 1.3263, "step": 40150 }, { "epoch": 1.1839893852277754, "grad_norm": 3.0890466151405196, "learning_rate": 5.685208303260215e-06, "loss": 1.2855, "step": 40155 }, { "epoch": 1.1841368126197849, "grad_norm": 2.9852682250236158, "learning_rate": 5.684657272700976e-06, "loss": 1.2862, "step": 40160 }, { "epoch": 1.184284240011794, "grad_norm": 2.914248828427147, "learning_rate": 5.68410618521224e-06, "loss": 1.3477, "step": 40165 }, { "epoch": 1.1844316674038036, "grad_norm": 2.97207955773705, "learning_rate": 5.683555040810218e-06, "loss": 1.2855, "step": 40170 }, { "epoch": 1.184579094795813, "grad_norm": 2.951984096076611, "learning_rate": 5.683003839511132e-06, "loss": 1.3154, "step": 40175 }, { "epoch": 1.1847265221878225, "grad_norm": 3.106654212368227, "learning_rate": 5.682452581331201e-06, "loss": 1.3235, "step": 40180 }, { "epoch": 1.184873949579832, "grad_norm": 2.8213298642070535, "learning_rate": 5.681901266286645e-06, "loss": 1.2996, "step": 40185 }, { "epoch": 1.1850213769718414, "grad_norm": 2.894104330478777, "learning_rate": 5.681349894393691e-06, "loss": 1.323, "step": 40190 }, { "epoch": 1.1851688043638509, "grad_norm": 2.9689207944738247, "learning_rate": 5.68079846566856e-06, "loss": 1.33, "step": 40195 }, { "epoch": 1.1853162317558603, "grad_norm": 2.9637007076193305, "learning_rate": 5.6802469801274785e-06, "loss": 1.2997, "step": 40200 }, { "epoch": 1.1854636591478696, "grad_norm": 2.958980357924992, "learning_rate": 5.679695437786675e-06, "loss": 1.3356, "step": 40205 }, { "epoch": 1.185611086539879, "grad_norm": 2.925169460692165, "learning_rate": 5.679143838662381e-06, "loss": 1.3229, "step": 40210 }, { "epoch": 1.1857585139318885, "grad_norm": 3.071410665511455, "learning_rate": 5.678592182770826e-06, "loss": 1.3572, "step": 40215 }, { "epoch": 1.185905941323898, "grad_norm": 2.92361856490783, "learning_rate": 5.678040470128244e-06, "loss": 1.3451, "step": 40220 }, { "epoch": 1.1860533687159074, "grad_norm": 2.9645731377809836, "learning_rate": 5.67748870075087e-06, "loss": 1.3112, "step": 40225 }, { "epoch": 1.1862007961079168, "grad_norm": 2.9230744329804743, "learning_rate": 5.67693687465494e-06, "loss": 1.2721, "step": 40230 }, { "epoch": 1.1863482234999263, "grad_norm": 2.8561334730780574, "learning_rate": 5.676384991856691e-06, "loss": 1.3141, "step": 40235 }, { "epoch": 1.1864956508919358, "grad_norm": 2.870589683245683, "learning_rate": 5.675833052372365e-06, "loss": 1.3037, "step": 40240 }, { "epoch": 1.1866430782839452, "grad_norm": 2.930603357662, "learning_rate": 5.675281056218202e-06, "loss": 1.347, "step": 40245 }, { "epoch": 1.1867905056759547, "grad_norm": 2.838496697640297, "learning_rate": 5.674729003410446e-06, "loss": 1.3259, "step": 40250 }, { "epoch": 1.1869379330679641, "grad_norm": 3.096066715278154, "learning_rate": 5.67417689396534e-06, "loss": 1.2894, "step": 40255 }, { "epoch": 1.1870853604599734, "grad_norm": 2.7492692094755085, "learning_rate": 5.673624727899134e-06, "loss": 1.288, "step": 40260 }, { "epoch": 1.1872327878519828, "grad_norm": 3.002881730766533, "learning_rate": 5.673072505228072e-06, "loss": 1.35, "step": 40265 }, { "epoch": 1.1873802152439923, "grad_norm": 3.0289191112340093, "learning_rate": 5.672520225968405e-06, "loss": 1.3687, "step": 40270 }, { "epoch": 1.1875276426360017, "grad_norm": 2.887689638806928, "learning_rate": 5.671967890136387e-06, "loss": 1.2834, "step": 40275 }, { "epoch": 1.1876750700280112, "grad_norm": 2.8561490198974258, "learning_rate": 5.6714154977482674e-06, "loss": 1.329, "step": 40280 }, { "epoch": 1.1878224974200207, "grad_norm": 3.0520627155917284, "learning_rate": 5.670863048820303e-06, "loss": 1.3932, "step": 40285 }, { "epoch": 1.1879699248120301, "grad_norm": 2.841177836545199, "learning_rate": 5.670310543368751e-06, "loss": 1.2918, "step": 40290 }, { "epoch": 1.1881173522040396, "grad_norm": 2.9151747163798523, "learning_rate": 5.6697579814098685e-06, "loss": 1.2881, "step": 40295 }, { "epoch": 1.188264779596049, "grad_norm": 2.8937312318568087, "learning_rate": 5.669205362959914e-06, "loss": 1.3142, "step": 40300 }, { "epoch": 1.1884122069880583, "grad_norm": 2.865001079161772, "learning_rate": 5.668652688035152e-06, "loss": 1.3137, "step": 40305 }, { "epoch": 1.1885596343800677, "grad_norm": 2.852568597739556, "learning_rate": 5.668099956651843e-06, "loss": 1.3093, "step": 40310 }, { "epoch": 1.1887070617720772, "grad_norm": 2.852888754200441, "learning_rate": 5.667547168826252e-06, "loss": 1.3471, "step": 40315 }, { "epoch": 1.1888544891640866, "grad_norm": 2.9033048527385303, "learning_rate": 5.666994324574646e-06, "loss": 1.3179, "step": 40320 }, { "epoch": 1.189001916556096, "grad_norm": 2.867539491186158, "learning_rate": 5.666441423913294e-06, "loss": 1.3233, "step": 40325 }, { "epoch": 1.1891493439481056, "grad_norm": 3.0031176174619745, "learning_rate": 5.665888466858464e-06, "loss": 1.3271, "step": 40330 }, { "epoch": 1.189296771340115, "grad_norm": 2.881257179674972, "learning_rate": 5.665335453426427e-06, "loss": 1.2962, "step": 40335 }, { "epoch": 1.1894441987321245, "grad_norm": 3.019437225994316, "learning_rate": 5.664782383633459e-06, "loss": 1.2894, "step": 40340 }, { "epoch": 1.189591626124134, "grad_norm": 2.7819458144216997, "learning_rate": 5.664229257495832e-06, "loss": 1.2886, "step": 40345 }, { "epoch": 1.1897390535161434, "grad_norm": 3.164779487912367, "learning_rate": 5.663676075029824e-06, "loss": 1.3111, "step": 40350 }, { "epoch": 1.1898864809081529, "grad_norm": 2.9669420254112326, "learning_rate": 5.663122836251712e-06, "loss": 1.3564, "step": 40355 }, { "epoch": 1.190033908300162, "grad_norm": 2.8562728980561087, "learning_rate": 5.6625695411777754e-06, "loss": 1.2705, "step": 40360 }, { "epoch": 1.1901813356921715, "grad_norm": 2.9382813463453124, "learning_rate": 5.662016189824297e-06, "loss": 1.3686, "step": 40365 }, { "epoch": 1.190328763084181, "grad_norm": 2.893691947618697, "learning_rate": 5.6614627822075584e-06, "loss": 1.2995, "step": 40370 }, { "epoch": 1.1904761904761905, "grad_norm": 2.9727268366022215, "learning_rate": 5.660909318343845e-06, "loss": 1.3041, "step": 40375 }, { "epoch": 1.1906236178682, "grad_norm": 2.883362588504694, "learning_rate": 5.660355798249443e-06, "loss": 1.3284, "step": 40380 }, { "epoch": 1.1907710452602094, "grad_norm": 2.871097191229477, "learning_rate": 5.65980222194064e-06, "loss": 1.3041, "step": 40385 }, { "epoch": 1.1909184726522188, "grad_norm": 2.9864212131052827, "learning_rate": 5.659248589433726e-06, "loss": 1.3398, "step": 40390 }, { "epoch": 1.1910659000442283, "grad_norm": 2.7893408240712207, "learning_rate": 5.658694900744991e-06, "loss": 1.2951, "step": 40395 }, { "epoch": 1.1912133274362375, "grad_norm": 2.8581886857983902, "learning_rate": 5.658141155890731e-06, "loss": 1.338, "step": 40400 }, { "epoch": 1.191360754828247, "grad_norm": 2.9151207374016725, "learning_rate": 5.657587354887238e-06, "loss": 1.284, "step": 40405 }, { "epoch": 1.1915081822202565, "grad_norm": 2.917343066293897, "learning_rate": 5.657033497750808e-06, "loss": 1.3211, "step": 40410 }, { "epoch": 1.191655609612266, "grad_norm": 2.9052804805837122, "learning_rate": 5.65647958449774e-06, "loss": 1.3289, "step": 40415 }, { "epoch": 1.1918030370042754, "grad_norm": 3.068319737512941, "learning_rate": 5.655925615144333e-06, "loss": 1.3532, "step": 40420 }, { "epoch": 1.1919504643962848, "grad_norm": 2.8780887236319357, "learning_rate": 5.65537158970689e-06, "loss": 1.3355, "step": 40425 }, { "epoch": 1.1920978917882943, "grad_norm": 2.933839078791224, "learning_rate": 5.654817508201711e-06, "loss": 1.3153, "step": 40430 }, { "epoch": 1.1922453191803037, "grad_norm": 2.8404816253586844, "learning_rate": 5.654263370645101e-06, "loss": 1.3091, "step": 40435 }, { "epoch": 1.1923927465723132, "grad_norm": 2.9443040782958465, "learning_rate": 5.653709177053368e-06, "loss": 1.3536, "step": 40440 }, { "epoch": 1.1925401739643227, "grad_norm": 2.8849322882286716, "learning_rate": 5.653154927442819e-06, "loss": 1.3447, "step": 40445 }, { "epoch": 1.1926876013563321, "grad_norm": 2.991547861373245, "learning_rate": 5.652600621829762e-06, "loss": 1.2528, "step": 40450 }, { "epoch": 1.1928350287483414, "grad_norm": 3.0276671860676854, "learning_rate": 5.652046260230511e-06, "loss": 1.2981, "step": 40455 }, { "epoch": 1.1929824561403508, "grad_norm": 2.9623551867742486, "learning_rate": 5.651491842661375e-06, "loss": 1.3508, "step": 40460 }, { "epoch": 1.1931298835323603, "grad_norm": 2.9518533709527004, "learning_rate": 5.650937369138671e-06, "loss": 1.3099, "step": 40465 }, { "epoch": 1.1932773109243697, "grad_norm": 2.78273154081923, "learning_rate": 5.650382839678713e-06, "loss": 1.3177, "step": 40470 }, { "epoch": 1.1934247383163792, "grad_norm": 3.1012404698883307, "learning_rate": 5.649828254297822e-06, "loss": 1.3394, "step": 40475 }, { "epoch": 1.1935721657083886, "grad_norm": 2.892132179528489, "learning_rate": 5.649273613012314e-06, "loss": 1.3234, "step": 40480 }, { "epoch": 1.193719593100398, "grad_norm": 2.87054158022448, "learning_rate": 5.648718915838512e-06, "loss": 1.3652, "step": 40485 }, { "epoch": 1.1938670204924076, "grad_norm": 2.9346712799356682, "learning_rate": 5.648164162792737e-06, "loss": 1.3332, "step": 40490 }, { "epoch": 1.194014447884417, "grad_norm": 3.828998938561286, "learning_rate": 5.647609353891314e-06, "loss": 1.3369, "step": 40495 }, { "epoch": 1.1941618752764263, "grad_norm": 2.9565916936831833, "learning_rate": 5.6470544891505695e-06, "loss": 1.3175, "step": 40500 }, { "epoch": 1.1941618752764263, "eval_loss": 1.1211522817611694, "eval_runtime": 4.1506, "eval_samples_per_second": 95.407, "eval_steps_per_second": 3.132, "step": 40500 }, { "epoch": 1.1943093026684357, "grad_norm": 2.90927425850983, "learning_rate": 5.64649956858683e-06, "loss": 1.3793, "step": 40505 }, { "epoch": 1.1944567300604452, "grad_norm": 2.8601965803344656, "learning_rate": 5.645944592216425e-06, "loss": 1.2891, "step": 40510 }, { "epoch": 1.1946041574524546, "grad_norm": 2.9271059462611584, "learning_rate": 5.645389560055687e-06, "loss": 1.279, "step": 40515 }, { "epoch": 1.194751584844464, "grad_norm": 2.947753572414577, "learning_rate": 5.644834472120945e-06, "loss": 1.308, "step": 40520 }, { "epoch": 1.1948990122364735, "grad_norm": 2.8144637411308913, "learning_rate": 5.644279328428536e-06, "loss": 1.3235, "step": 40525 }, { "epoch": 1.195046439628483, "grad_norm": 3.018101763517854, "learning_rate": 5.643724128994793e-06, "loss": 1.3588, "step": 40530 }, { "epoch": 1.1951938670204925, "grad_norm": 2.9617941944304054, "learning_rate": 5.643168873836058e-06, "loss": 1.2918, "step": 40535 }, { "epoch": 1.195341294412502, "grad_norm": 2.856524276387488, "learning_rate": 5.642613562968665e-06, "loss": 1.2988, "step": 40540 }, { "epoch": 1.1954887218045114, "grad_norm": 2.6741572938951212, "learning_rate": 5.642058196408957e-06, "loss": 1.3433, "step": 40545 }, { "epoch": 1.1956361491965208, "grad_norm": 2.9161393711709547, "learning_rate": 5.641502774173276e-06, "loss": 1.2886, "step": 40550 }, { "epoch": 1.19578357658853, "grad_norm": 3.072793526098513, "learning_rate": 5.640947296277967e-06, "loss": 1.3386, "step": 40555 }, { "epoch": 1.1959310039805395, "grad_norm": 2.954641537855923, "learning_rate": 5.640391762739374e-06, "loss": 1.3141, "step": 40560 }, { "epoch": 1.196078431372549, "grad_norm": 2.770308729994204, "learning_rate": 5.639836173573844e-06, "loss": 1.3264, "step": 40565 }, { "epoch": 1.1962258587645584, "grad_norm": 2.8996392607371932, "learning_rate": 5.639280528797727e-06, "loss": 1.356, "step": 40570 }, { "epoch": 1.196373286156568, "grad_norm": 2.976114334871336, "learning_rate": 5.638724828427374e-06, "loss": 1.3442, "step": 40575 }, { "epoch": 1.1965207135485774, "grad_norm": 2.8350340419932643, "learning_rate": 5.638169072479135e-06, "loss": 1.2979, "step": 40580 }, { "epoch": 1.1966681409405868, "grad_norm": 2.8255565292881655, "learning_rate": 5.637613260969367e-06, "loss": 1.3122, "step": 40585 }, { "epoch": 1.1968155683325963, "grad_norm": 3.1052763911708214, "learning_rate": 5.637057393914422e-06, "loss": 1.3177, "step": 40590 }, { "epoch": 1.1969629957246055, "grad_norm": 2.998026664816468, "learning_rate": 5.636501471330658e-06, "loss": 1.2949, "step": 40595 }, { "epoch": 1.197110423116615, "grad_norm": 2.8894811826162345, "learning_rate": 5.6359454932344344e-06, "loss": 1.245, "step": 40600 }, { "epoch": 1.1972578505086244, "grad_norm": 2.8963890448612633, "learning_rate": 5.635389459642111e-06, "loss": 1.3078, "step": 40605 }, { "epoch": 1.197405277900634, "grad_norm": 2.7230686488172573, "learning_rate": 5.634833370570051e-06, "loss": 1.2939, "step": 40610 }, { "epoch": 1.1975527052926433, "grad_norm": 2.950103103961456, "learning_rate": 5.634277226034616e-06, "loss": 1.3359, "step": 40615 }, { "epoch": 1.1977001326846528, "grad_norm": 2.798545308167478, "learning_rate": 5.633721026052172e-06, "loss": 1.3695, "step": 40620 }, { "epoch": 1.1978475600766623, "grad_norm": 2.891191980734181, "learning_rate": 5.633164770639084e-06, "loss": 1.2697, "step": 40625 }, { "epoch": 1.1979949874686717, "grad_norm": 2.848680189656314, "learning_rate": 5.632608459811725e-06, "loss": 1.283, "step": 40630 }, { "epoch": 1.1981424148606812, "grad_norm": 2.8738307299239034, "learning_rate": 5.6320520935864615e-06, "loss": 1.3276, "step": 40635 }, { "epoch": 1.1982898422526906, "grad_norm": 2.841491411168708, "learning_rate": 5.631495671979666e-06, "loss": 1.2716, "step": 40640 }, { "epoch": 1.1984372696447, "grad_norm": 2.990971827590669, "learning_rate": 5.630939195007712e-06, "loss": 1.3247, "step": 40645 }, { "epoch": 1.1985846970367093, "grad_norm": 2.86887730921598, "learning_rate": 5.630382662686974e-06, "loss": 1.3112, "step": 40650 }, { "epoch": 1.1987321244287188, "grad_norm": 2.9912984495366555, "learning_rate": 5.6298260750338285e-06, "loss": 1.327, "step": 40655 }, { "epoch": 1.1988795518207283, "grad_norm": 2.934561026226338, "learning_rate": 5.629269432064655e-06, "loss": 1.3474, "step": 40660 }, { "epoch": 1.1990269792127377, "grad_norm": 2.998741502673737, "learning_rate": 5.628712733795833e-06, "loss": 1.3561, "step": 40665 }, { "epoch": 1.1991744066047472, "grad_norm": 2.8171337642490077, "learning_rate": 5.628155980243743e-06, "loss": 1.2773, "step": 40670 }, { "epoch": 1.1993218339967566, "grad_norm": 3.007112379466013, "learning_rate": 5.627599171424769e-06, "loss": 1.3175, "step": 40675 }, { "epoch": 1.199469261388766, "grad_norm": 2.9751561669437883, "learning_rate": 5.627042307355294e-06, "loss": 1.2695, "step": 40680 }, { "epoch": 1.1996166887807755, "grad_norm": 2.9111327167757786, "learning_rate": 5.626485388051707e-06, "loss": 1.303, "step": 40685 }, { "epoch": 1.199764116172785, "grad_norm": 2.928093470501638, "learning_rate": 5.6259284135303945e-06, "loss": 1.3152, "step": 40690 }, { "epoch": 1.1999115435647942, "grad_norm": 2.867136927964134, "learning_rate": 5.625371383807747e-06, "loss": 1.2805, "step": 40695 }, { "epoch": 1.2000589709568037, "grad_norm": 2.8474187598134115, "learning_rate": 5.624814298900154e-06, "loss": 1.3453, "step": 40700 }, { "epoch": 1.2002063983488132, "grad_norm": 2.8948695067596772, "learning_rate": 5.62425715882401e-06, "loss": 1.3016, "step": 40705 }, { "epoch": 1.2003538257408226, "grad_norm": 2.8644978763975297, "learning_rate": 5.623699963595709e-06, "loss": 1.3118, "step": 40710 }, { "epoch": 1.200501253132832, "grad_norm": 2.99604796901658, "learning_rate": 5.623142713231647e-06, "loss": 1.3337, "step": 40715 }, { "epoch": 1.2006486805248415, "grad_norm": 3.011272061179383, "learning_rate": 5.622585407748222e-06, "loss": 1.292, "step": 40720 }, { "epoch": 1.200796107916851, "grad_norm": 2.819075895633539, "learning_rate": 5.622028047161832e-06, "loss": 1.2775, "step": 40725 }, { "epoch": 1.2009435353088604, "grad_norm": 2.9147254933296396, "learning_rate": 5.62147063148888e-06, "loss": 1.3284, "step": 40730 }, { "epoch": 1.20109096270087, "grad_norm": 2.99180247614652, "learning_rate": 5.620913160745767e-06, "loss": 1.3226, "step": 40735 }, { "epoch": 1.2012383900928794, "grad_norm": 2.8775546900837563, "learning_rate": 5.620355634948899e-06, "loss": 1.2547, "step": 40740 }, { "epoch": 1.2013858174848886, "grad_norm": 3.0072952162620754, "learning_rate": 5.61979805411468e-06, "loss": 1.3159, "step": 40745 }, { "epoch": 1.201533244876898, "grad_norm": 2.8967604147323476, "learning_rate": 5.619240418259518e-06, "loss": 1.3475, "step": 40750 }, { "epoch": 1.2016806722689075, "grad_norm": 2.952711867097639, "learning_rate": 5.618682727399822e-06, "loss": 1.3186, "step": 40755 }, { "epoch": 1.201828099660917, "grad_norm": 2.8677242482786984, "learning_rate": 5.618124981552003e-06, "loss": 1.2684, "step": 40760 }, { "epoch": 1.2019755270529264, "grad_norm": 2.9294743065756963, "learning_rate": 5.6175671807324745e-06, "loss": 1.28, "step": 40765 }, { "epoch": 1.2021229544449359, "grad_norm": 2.831292674518838, "learning_rate": 5.617009324957649e-06, "loss": 1.2781, "step": 40770 }, { "epoch": 1.2022703818369453, "grad_norm": 2.9600801894438424, "learning_rate": 5.616451414243941e-06, "loss": 1.2845, "step": 40775 }, { "epoch": 1.2024178092289548, "grad_norm": 2.8542176665603565, "learning_rate": 5.61589344860777e-06, "loss": 1.312, "step": 40780 }, { "epoch": 1.2025652366209643, "grad_norm": 2.88161620987747, "learning_rate": 5.615335428065553e-06, "loss": 1.3054, "step": 40785 }, { "epoch": 1.2027126640129735, "grad_norm": 2.8746387563393028, "learning_rate": 5.61477735263371e-06, "loss": 1.3667, "step": 40790 }, { "epoch": 1.202860091404983, "grad_norm": 2.85256575467479, "learning_rate": 5.614219222328667e-06, "loss": 1.3046, "step": 40795 }, { "epoch": 1.2030075187969924, "grad_norm": 3.0560638770938424, "learning_rate": 5.613661037166841e-06, "loss": 1.2982, "step": 40800 }, { "epoch": 1.2031549461890019, "grad_norm": 2.913549131644582, "learning_rate": 5.613102797164664e-06, "loss": 1.3264, "step": 40805 }, { "epoch": 1.2033023735810113, "grad_norm": 2.8941417202460484, "learning_rate": 5.612544502338557e-06, "loss": 1.2977, "step": 40810 }, { "epoch": 1.2034498009730208, "grad_norm": 2.9215593168884113, "learning_rate": 5.611986152704954e-06, "loss": 1.3223, "step": 40815 }, { "epoch": 1.2035972283650302, "grad_norm": 2.8632337030855086, "learning_rate": 5.61142774828028e-06, "loss": 1.3127, "step": 40820 }, { "epoch": 1.2037446557570397, "grad_norm": 2.8641596657682107, "learning_rate": 5.61086928908097e-06, "loss": 1.373, "step": 40825 }, { "epoch": 1.2038920831490492, "grad_norm": 3.0472528110018353, "learning_rate": 5.610310775123456e-06, "loss": 1.3641, "step": 40830 }, { "epoch": 1.2040395105410586, "grad_norm": 2.9609965399625144, "learning_rate": 5.6097522064241735e-06, "loss": 1.2973, "step": 40835 }, { "epoch": 1.204186937933068, "grad_norm": 2.9745083315926784, "learning_rate": 5.609193582999559e-06, "loss": 1.3646, "step": 40840 }, { "epoch": 1.2043343653250773, "grad_norm": 2.977625972484306, "learning_rate": 5.608634904866051e-06, "loss": 1.3401, "step": 40845 }, { "epoch": 1.2044817927170868, "grad_norm": 2.806801540489649, "learning_rate": 5.608076172040088e-06, "loss": 1.3009, "step": 40850 }, { "epoch": 1.2046292201090962, "grad_norm": 2.9864137585158206, "learning_rate": 5.60751738453811e-06, "loss": 1.3076, "step": 40855 }, { "epoch": 1.2047766475011057, "grad_norm": 2.9131038118071024, "learning_rate": 5.606958542376563e-06, "loss": 1.3236, "step": 40860 }, { "epoch": 1.2049240748931151, "grad_norm": 2.8207123451034652, "learning_rate": 5.606399645571891e-06, "loss": 1.2924, "step": 40865 }, { "epoch": 1.2050715022851246, "grad_norm": 2.947712950918075, "learning_rate": 5.60584069414054e-06, "loss": 1.3289, "step": 40870 }, { "epoch": 1.205218929677134, "grad_norm": 2.846598689705156, "learning_rate": 5.605281688098957e-06, "loss": 1.2886, "step": 40875 }, { "epoch": 1.2053663570691435, "grad_norm": 2.9170112546087785, "learning_rate": 5.604722627463591e-06, "loss": 1.2919, "step": 40880 }, { "epoch": 1.2055137844611528, "grad_norm": 2.9998355364786446, "learning_rate": 5.604163512250895e-06, "loss": 1.3276, "step": 40885 }, { "epoch": 1.2056612118531622, "grad_norm": 2.942087617469415, "learning_rate": 5.60360434247732e-06, "loss": 1.3117, "step": 40890 }, { "epoch": 1.2058086392451717, "grad_norm": 3.0106660347978687, "learning_rate": 5.603045118159321e-06, "loss": 1.3197, "step": 40895 }, { "epoch": 1.2059560666371811, "grad_norm": 2.861493338960833, "learning_rate": 5.602485839313352e-06, "loss": 1.3274, "step": 40900 }, { "epoch": 1.2061034940291906, "grad_norm": 2.9590274983285525, "learning_rate": 5.601926505955874e-06, "loss": 1.3269, "step": 40905 }, { "epoch": 1.2062509214212, "grad_norm": 2.9317804414220805, "learning_rate": 5.601367118103343e-06, "loss": 1.3514, "step": 40910 }, { "epoch": 1.2063983488132095, "grad_norm": 2.9359094184916352, "learning_rate": 5.60080767577222e-06, "loss": 1.344, "step": 40915 }, { "epoch": 1.206545776205219, "grad_norm": 3.01258039126793, "learning_rate": 5.600248178978968e-06, "loss": 1.3415, "step": 40920 }, { "epoch": 1.2066932035972284, "grad_norm": 2.9403885546102777, "learning_rate": 5.599688627740051e-06, "loss": 1.2473, "step": 40925 }, { "epoch": 1.2068406309892379, "grad_norm": 2.863797810487671, "learning_rate": 5.599129022071933e-06, "loss": 1.3341, "step": 40930 }, { "epoch": 1.2069880583812473, "grad_norm": 2.8098580746475683, "learning_rate": 5.598569361991083e-06, "loss": 1.3038, "step": 40935 }, { "epoch": 1.2071354857732566, "grad_norm": 3.000002014581763, "learning_rate": 5.598009647513968e-06, "loss": 1.3518, "step": 40940 }, { "epoch": 1.207282913165266, "grad_norm": 3.0179200290532653, "learning_rate": 5.59744987865706e-06, "loss": 1.3162, "step": 40945 }, { "epoch": 1.2074303405572755, "grad_norm": 2.9403065220432, "learning_rate": 5.596890055436829e-06, "loss": 1.3179, "step": 40950 }, { "epoch": 1.207577767949285, "grad_norm": 2.9288693136146473, "learning_rate": 5.596330177869749e-06, "loss": 1.3431, "step": 40955 }, { "epoch": 1.2077251953412944, "grad_norm": 2.8162561637666923, "learning_rate": 5.595770245972295e-06, "loss": 1.3253, "step": 40960 }, { "epoch": 1.2078726227333039, "grad_norm": 2.981912619382808, "learning_rate": 5.595210259760944e-06, "loss": 1.2948, "step": 40965 }, { "epoch": 1.2080200501253133, "grad_norm": 2.8356649690434743, "learning_rate": 5.594650219252175e-06, "loss": 1.273, "step": 40970 }, { "epoch": 1.2081674775173228, "grad_norm": 2.935306078222377, "learning_rate": 5.594090124462467e-06, "loss": 1.2823, "step": 40975 }, { "epoch": 1.2083149049093322, "grad_norm": 2.985874637391676, "learning_rate": 5.593529975408301e-06, "loss": 1.3022, "step": 40980 }, { "epoch": 1.2084623323013415, "grad_norm": 2.7828672954937326, "learning_rate": 5.59296977210616e-06, "loss": 1.3122, "step": 40985 }, { "epoch": 1.208609759693351, "grad_norm": 2.8728446006319794, "learning_rate": 5.592409514572529e-06, "loss": 1.331, "step": 40990 }, { "epoch": 1.2087571870853604, "grad_norm": 2.9797329200435847, "learning_rate": 5.591849202823894e-06, "loss": 1.3793, "step": 40995 }, { "epoch": 1.2089046144773699, "grad_norm": 2.6814686550810007, "learning_rate": 5.591288836876743e-06, "loss": 1.2516, "step": 41000 }, { "epoch": 1.2089046144773699, "eval_loss": 1.1191432476043701, "eval_runtime": 4.2958, "eval_samples_per_second": 92.183, "eval_steps_per_second": 3.026, "step": 41000 }, { "epoch": 1.2090520418693793, "grad_norm": 2.9832908634741293, "learning_rate": 5.590728416747566e-06, "loss": 1.3339, "step": 41005 }, { "epoch": 1.2091994692613888, "grad_norm": 2.9745089211404654, "learning_rate": 5.590167942452854e-06, "loss": 1.3154, "step": 41010 }, { "epoch": 1.2093468966533982, "grad_norm": 2.874989681261423, "learning_rate": 5.589607414009097e-06, "loss": 1.2876, "step": 41015 }, { "epoch": 1.2094943240454077, "grad_norm": 2.768909981739345, "learning_rate": 5.589046831432793e-06, "loss": 1.3144, "step": 41020 }, { "epoch": 1.2096417514374171, "grad_norm": 2.855062201583074, "learning_rate": 5.588486194740435e-06, "loss": 1.3525, "step": 41025 }, { "epoch": 1.2097891788294266, "grad_norm": 2.8678129763645064, "learning_rate": 5.587925503948522e-06, "loss": 1.3617, "step": 41030 }, { "epoch": 1.209936606221436, "grad_norm": 3.035214590485729, "learning_rate": 5.587364759073553e-06, "loss": 1.3427, "step": 41035 }, { "epoch": 1.2100840336134453, "grad_norm": 2.994470600330275, "learning_rate": 5.586803960132027e-06, "loss": 1.3485, "step": 41040 }, { "epoch": 1.2102314610054548, "grad_norm": 2.944913248911863, "learning_rate": 5.586243107140447e-06, "loss": 1.3547, "step": 41045 }, { "epoch": 1.2103788883974642, "grad_norm": 2.8346428273801174, "learning_rate": 5.585682200115317e-06, "loss": 1.2973, "step": 41050 }, { "epoch": 1.2105263157894737, "grad_norm": 2.866061678046686, "learning_rate": 5.585121239073143e-06, "loss": 1.3683, "step": 41055 }, { "epoch": 1.2106737431814831, "grad_norm": 3.053597046352469, "learning_rate": 5.58456022403043e-06, "loss": 1.3284, "step": 41060 }, { "epoch": 1.2108211705734926, "grad_norm": 2.8844848082117247, "learning_rate": 5.583999155003688e-06, "loss": 1.3089, "step": 41065 }, { "epoch": 1.210968597965502, "grad_norm": 2.846513974397226, "learning_rate": 5.5834380320094254e-06, "loss": 1.269, "step": 41070 }, { "epoch": 1.2111160253575115, "grad_norm": 2.853734772295808, "learning_rate": 5.582876855064157e-06, "loss": 1.3046, "step": 41075 }, { "epoch": 1.2112634527495207, "grad_norm": 2.9618071160431136, "learning_rate": 5.582315624184393e-06, "loss": 1.2726, "step": 41080 }, { "epoch": 1.2114108801415302, "grad_norm": 2.879029210671854, "learning_rate": 5.5817543393866495e-06, "loss": 1.3308, "step": 41085 }, { "epoch": 1.2115583075335397, "grad_norm": 2.8142171013853914, "learning_rate": 5.5811930006874435e-06, "loss": 1.3183, "step": 41090 }, { "epoch": 1.2117057349255491, "grad_norm": 2.8849850835608803, "learning_rate": 5.580631608103292e-06, "loss": 1.3262, "step": 41095 }, { "epoch": 1.2118531623175586, "grad_norm": 2.7361553261826996, "learning_rate": 5.580070161650716e-06, "loss": 1.3319, "step": 41100 }, { "epoch": 1.212000589709568, "grad_norm": 2.7856096942953843, "learning_rate": 5.579508661346235e-06, "loss": 1.3186, "step": 41105 }, { "epoch": 1.2121480171015775, "grad_norm": 2.8841290388398315, "learning_rate": 5.578947107206373e-06, "loss": 1.3161, "step": 41110 }, { "epoch": 1.212295444493587, "grad_norm": 2.9440212923156683, "learning_rate": 5.578385499247654e-06, "loss": 1.3034, "step": 41115 }, { "epoch": 1.2124428718855964, "grad_norm": 2.8833262733561376, "learning_rate": 5.577823837486603e-06, "loss": 1.3549, "step": 41120 }, { "epoch": 1.2125902992776059, "grad_norm": 2.916862335876757, "learning_rate": 5.577262121939748e-06, "loss": 1.3205, "step": 41125 }, { "epoch": 1.2127377266696153, "grad_norm": 2.962816405245559, "learning_rate": 5.57670035262362e-06, "loss": 1.3779, "step": 41130 }, { "epoch": 1.2128851540616246, "grad_norm": 2.9366118013088776, "learning_rate": 5.576138529554748e-06, "loss": 1.3527, "step": 41135 }, { "epoch": 1.213032581453634, "grad_norm": 2.955059315773395, "learning_rate": 5.575576652749664e-06, "loss": 1.292, "step": 41140 }, { "epoch": 1.2131800088456435, "grad_norm": 2.88066999078864, "learning_rate": 5.575014722224902e-06, "loss": 1.3487, "step": 41145 }, { "epoch": 1.213327436237653, "grad_norm": 3.136388978588441, "learning_rate": 5.574452737996999e-06, "loss": 1.3388, "step": 41150 }, { "epoch": 1.2134748636296624, "grad_norm": 2.9491782418165275, "learning_rate": 5.573890700082491e-06, "loss": 1.3096, "step": 41155 }, { "epoch": 1.2136222910216719, "grad_norm": 2.8667409114283195, "learning_rate": 5.573328608497915e-06, "loss": 1.3445, "step": 41160 }, { "epoch": 1.2137697184136813, "grad_norm": 2.9716260821581297, "learning_rate": 5.572766463259813e-06, "loss": 1.3131, "step": 41165 }, { "epoch": 1.2139171458056908, "grad_norm": 2.868851231454626, "learning_rate": 5.5722042643847274e-06, "loss": 1.2765, "step": 41170 }, { "epoch": 1.2140645731977002, "grad_norm": 2.942052000478504, "learning_rate": 5.571642011889201e-06, "loss": 1.3386, "step": 41175 }, { "epoch": 1.2142120005897095, "grad_norm": 2.975907235740973, "learning_rate": 5.571079705789777e-06, "loss": 1.2798, "step": 41180 }, { "epoch": 1.214359427981719, "grad_norm": 2.82957321683824, "learning_rate": 5.570517346103004e-06, "loss": 1.3355, "step": 41185 }, { "epoch": 1.2145068553737284, "grad_norm": 2.912185731448847, "learning_rate": 5.569954932845429e-06, "loss": 1.3029, "step": 41190 }, { "epoch": 1.2146542827657378, "grad_norm": 2.948594982675328, "learning_rate": 5.569392466033602e-06, "loss": 1.3178, "step": 41195 }, { "epoch": 1.2148017101577473, "grad_norm": 2.80027874957884, "learning_rate": 5.568829945684074e-06, "loss": 1.2541, "step": 41200 }, { "epoch": 1.2149491375497568, "grad_norm": 2.8830221333567447, "learning_rate": 5.568267371813397e-06, "loss": 1.3079, "step": 41205 }, { "epoch": 1.2150965649417662, "grad_norm": 2.7525009995785275, "learning_rate": 5.567704744438127e-06, "loss": 1.2767, "step": 41210 }, { "epoch": 1.2152439923337757, "grad_norm": 2.9793199938848347, "learning_rate": 5.5671420635748205e-06, "loss": 1.36, "step": 41215 }, { "epoch": 1.2153914197257851, "grad_norm": 2.966630571777551, "learning_rate": 5.566579329240033e-06, "loss": 1.3063, "step": 41220 }, { "epoch": 1.2155388471177946, "grad_norm": 2.903237476337549, "learning_rate": 5.566016541450324e-06, "loss": 1.2944, "step": 41225 }, { "epoch": 1.215686274509804, "grad_norm": 3.01875076974289, "learning_rate": 5.565453700222255e-06, "loss": 1.2876, "step": 41230 }, { "epoch": 1.2158337019018133, "grad_norm": 2.9536542008004263, "learning_rate": 5.564890805572389e-06, "loss": 1.3047, "step": 41235 }, { "epoch": 1.2159811292938227, "grad_norm": 3.118306475335625, "learning_rate": 5.564327857517286e-06, "loss": 1.3475, "step": 41240 }, { "epoch": 1.2161285566858322, "grad_norm": 2.894441822048577, "learning_rate": 5.563764856073516e-06, "loss": 1.375, "step": 41245 }, { "epoch": 1.2162759840778417, "grad_norm": 3.0883745268712572, "learning_rate": 5.563201801257643e-06, "loss": 1.3305, "step": 41250 }, { "epoch": 1.2164234114698511, "grad_norm": 2.9091922286409395, "learning_rate": 5.562638693086237e-06, "loss": 1.2264, "step": 41255 }, { "epoch": 1.2165708388618606, "grad_norm": 2.8530493479645664, "learning_rate": 5.562075531575868e-06, "loss": 1.2715, "step": 41260 }, { "epoch": 1.21671826625387, "grad_norm": 3.0181829931192383, "learning_rate": 5.561512316743107e-06, "loss": 1.2903, "step": 41265 }, { "epoch": 1.2168656936458795, "grad_norm": 2.9136434549620174, "learning_rate": 5.560949048604528e-06, "loss": 1.308, "step": 41270 }, { "epoch": 1.2170131210378887, "grad_norm": 3.006454777590658, "learning_rate": 5.560385727176704e-06, "loss": 1.3488, "step": 41275 }, { "epoch": 1.2171605484298982, "grad_norm": 2.9762210656078043, "learning_rate": 5.559822352476213e-06, "loss": 1.3427, "step": 41280 }, { "epoch": 1.2173079758219076, "grad_norm": 2.890240730936503, "learning_rate": 5.559258924519634e-06, "loss": 1.2505, "step": 41285 }, { "epoch": 1.217455403213917, "grad_norm": 2.7036806559954742, "learning_rate": 5.558695443323544e-06, "loss": 1.3271, "step": 41290 }, { "epoch": 1.2176028306059266, "grad_norm": 2.899525187195032, "learning_rate": 5.5581319089045265e-06, "loss": 1.311, "step": 41295 }, { "epoch": 1.217750257997936, "grad_norm": 2.9222925356313616, "learning_rate": 5.557568321279162e-06, "loss": 1.2873, "step": 41300 }, { "epoch": 1.2178976853899455, "grad_norm": 2.9930857464389375, "learning_rate": 5.5570046804640355e-06, "loss": 1.3338, "step": 41305 }, { "epoch": 1.218045112781955, "grad_norm": 2.8465640614654064, "learning_rate": 5.556440986475734e-06, "loss": 1.3025, "step": 41310 }, { "epoch": 1.2181925401739644, "grad_norm": 2.8394118962298416, "learning_rate": 5.555877239330842e-06, "loss": 1.2651, "step": 41315 }, { "epoch": 1.2183399675659738, "grad_norm": 3.0190336196511853, "learning_rate": 5.555313439045952e-06, "loss": 1.3273, "step": 41320 }, { "epoch": 1.2184873949579833, "grad_norm": 2.86380837523256, "learning_rate": 5.554749585637652e-06, "loss": 1.2919, "step": 41325 }, { "epoch": 1.2186348223499925, "grad_norm": 2.7903474986681727, "learning_rate": 5.554185679122534e-06, "loss": 1.3242, "step": 41330 }, { "epoch": 1.218782249742002, "grad_norm": 3.060055704853857, "learning_rate": 5.553621719517193e-06, "loss": 1.3268, "step": 41335 }, { "epoch": 1.2189296771340115, "grad_norm": 2.833734050148475, "learning_rate": 5.553057706838223e-06, "loss": 1.2652, "step": 41340 }, { "epoch": 1.219077104526021, "grad_norm": 2.9991342758539328, "learning_rate": 5.55249364110222e-06, "loss": 1.2911, "step": 41345 }, { "epoch": 1.2192245319180304, "grad_norm": 2.888120817216578, "learning_rate": 5.5519295223257844e-06, "loss": 1.3109, "step": 41350 }, { "epoch": 1.2193719593100398, "grad_norm": 3.009593070603992, "learning_rate": 5.551365350525514e-06, "loss": 1.3198, "step": 41355 }, { "epoch": 1.2195193867020493, "grad_norm": 2.9706031874712955, "learning_rate": 5.550801125718013e-06, "loss": 1.3174, "step": 41360 }, { "epoch": 1.2196668140940587, "grad_norm": 3.114625843635556, "learning_rate": 5.550236847919882e-06, "loss": 1.3063, "step": 41365 }, { "epoch": 1.2198142414860682, "grad_norm": 2.840330402461545, "learning_rate": 5.5496725171477265e-06, "loss": 1.3206, "step": 41370 }, { "epoch": 1.2199616688780774, "grad_norm": 2.9965411048869544, "learning_rate": 5.549108133418151e-06, "loss": 1.3625, "step": 41375 }, { "epoch": 1.220109096270087, "grad_norm": 2.790930347428734, "learning_rate": 5.548543696747765e-06, "loss": 1.3528, "step": 41380 }, { "epoch": 1.2202565236620964, "grad_norm": 3.006129226868937, "learning_rate": 5.547979207153177e-06, "loss": 1.3341, "step": 41385 }, { "epoch": 1.2204039510541058, "grad_norm": 2.9252610216609813, "learning_rate": 5.5474146646509985e-06, "loss": 1.3418, "step": 41390 }, { "epoch": 1.2205513784461153, "grad_norm": 2.976464648592688, "learning_rate": 5.54685006925784e-06, "loss": 1.3361, "step": 41395 }, { "epoch": 1.2206988058381247, "grad_norm": 3.0908500227790765, "learning_rate": 5.546285420990316e-06, "loss": 1.3737, "step": 41400 }, { "epoch": 1.2208462332301342, "grad_norm": 2.88553858731325, "learning_rate": 5.545720719865042e-06, "loss": 1.3447, "step": 41405 }, { "epoch": 1.2209936606221437, "grad_norm": 2.8465754646862593, "learning_rate": 5.545155965898637e-06, "loss": 1.2611, "step": 41410 }, { "epoch": 1.221141088014153, "grad_norm": 2.783328129224822, "learning_rate": 5.544591159107717e-06, "loss": 1.2733, "step": 41415 }, { "epoch": 1.2212885154061626, "grad_norm": 3.0189832288029748, "learning_rate": 5.544026299508903e-06, "loss": 1.3392, "step": 41420 }, { "epoch": 1.221435942798172, "grad_norm": 2.714419326973098, "learning_rate": 5.543461387118817e-06, "loss": 1.3132, "step": 41425 }, { "epoch": 1.2215833701901813, "grad_norm": 2.9415845347530047, "learning_rate": 5.542896421954081e-06, "loss": 1.3137, "step": 41430 }, { "epoch": 1.2217307975821907, "grad_norm": 2.907523538107659, "learning_rate": 5.542331404031322e-06, "loss": 1.302, "step": 41435 }, { "epoch": 1.2218782249742002, "grad_norm": 3.1143738555129175, "learning_rate": 5.541766333367163e-06, "loss": 1.311, "step": 41440 }, { "epoch": 1.2220256523662096, "grad_norm": 2.896728856795406, "learning_rate": 5.541201209978235e-06, "loss": 1.3204, "step": 41445 }, { "epoch": 1.222173079758219, "grad_norm": 2.795280911847289, "learning_rate": 5.540636033881165e-06, "loss": 1.2931, "step": 41450 }, { "epoch": 1.2223205071502286, "grad_norm": 2.9777897446712527, "learning_rate": 5.540070805092585e-06, "loss": 1.2955, "step": 41455 }, { "epoch": 1.222467934542238, "grad_norm": 2.8765360656614836, "learning_rate": 5.539505523629128e-06, "loss": 1.3218, "step": 41460 }, { "epoch": 1.2226153619342475, "grad_norm": 2.994553384722694, "learning_rate": 5.538940189507428e-06, "loss": 1.3487, "step": 41465 }, { "epoch": 1.2227627893262567, "grad_norm": 2.9624808679536323, "learning_rate": 5.538374802744119e-06, "loss": 1.3576, "step": 41470 }, { "epoch": 1.2229102167182662, "grad_norm": 2.839470213657204, "learning_rate": 5.537809363355841e-06, "loss": 1.2742, "step": 41475 }, { "epoch": 1.2230576441102756, "grad_norm": 2.9328759676602956, "learning_rate": 5.537243871359229e-06, "loss": 1.3336, "step": 41480 }, { "epoch": 1.223205071502285, "grad_norm": 2.912147200912782, "learning_rate": 5.536678326770926e-06, "loss": 1.3708, "step": 41485 }, { "epoch": 1.2233524988942945, "grad_norm": 2.8614104012528654, "learning_rate": 5.536112729607574e-06, "loss": 1.2854, "step": 41490 }, { "epoch": 1.223499926286304, "grad_norm": 2.9867584520899517, "learning_rate": 5.535547079885816e-06, "loss": 1.3488, "step": 41495 }, { "epoch": 1.2236473536783135, "grad_norm": 3.0639422941383594, "learning_rate": 5.534981377622294e-06, "loss": 1.3323, "step": 41500 }, { "epoch": 1.2236473536783135, "eval_loss": 1.1179852485656738, "eval_runtime": 4.1518, "eval_samples_per_second": 95.379, "eval_steps_per_second": 3.131, "step": 41500 }, { "epoch": 1.223794781070323, "grad_norm": 2.9154759968804247, "learning_rate": 5.5344156228336575e-06, "loss": 1.316, "step": 41505 }, { "epoch": 1.2239422084623324, "grad_norm": 2.939861756410793, "learning_rate": 5.533849815536555e-06, "loss": 1.3045, "step": 41510 }, { "epoch": 1.2240896358543418, "grad_norm": 2.9336598612307294, "learning_rate": 5.533283955747633e-06, "loss": 1.3229, "step": 41515 }, { "epoch": 1.2242370632463513, "grad_norm": 3.032030918763865, "learning_rate": 5.532718043483546e-06, "loss": 1.3626, "step": 41520 }, { "epoch": 1.2243844906383605, "grad_norm": 2.8531358463854963, "learning_rate": 5.532152078760943e-06, "loss": 1.3119, "step": 41525 }, { "epoch": 1.22453191803037, "grad_norm": 2.9679763081489763, "learning_rate": 5.531586061596481e-06, "loss": 1.3408, "step": 41530 }, { "epoch": 1.2246793454223794, "grad_norm": 3.000977670495012, "learning_rate": 5.531019992006815e-06, "loss": 1.333, "step": 41535 }, { "epoch": 1.224826772814389, "grad_norm": 3.1673079294106707, "learning_rate": 5.530453870008601e-06, "loss": 1.3463, "step": 41540 }, { "epoch": 1.2249742002063984, "grad_norm": 2.8074642356472617, "learning_rate": 5.5298876956185e-06, "loss": 1.3186, "step": 41545 }, { "epoch": 1.2251216275984078, "grad_norm": 2.9049527910455497, "learning_rate": 5.5293214688531715e-06, "loss": 1.3268, "step": 41550 }, { "epoch": 1.2252690549904173, "grad_norm": 3.0757299693822486, "learning_rate": 5.528755189729276e-06, "loss": 1.3873, "step": 41555 }, { "epoch": 1.2254164823824267, "grad_norm": 3.087071878917389, "learning_rate": 5.528188858263479e-06, "loss": 1.3866, "step": 41560 }, { "epoch": 1.225563909774436, "grad_norm": 2.8244550697708037, "learning_rate": 5.527622474472443e-06, "loss": 1.3626, "step": 41565 }, { "epoch": 1.2257113371664454, "grad_norm": 2.9499035923573076, "learning_rate": 5.527056038372837e-06, "loss": 1.2962, "step": 41570 }, { "epoch": 1.2258587645584549, "grad_norm": 3.0416575293212804, "learning_rate": 5.526489549981327e-06, "loss": 1.3144, "step": 41575 }, { "epoch": 1.2260061919504643, "grad_norm": 2.818455082886469, "learning_rate": 5.5259230093145845e-06, "loss": 1.3165, "step": 41580 }, { "epoch": 1.2261536193424738, "grad_norm": 2.8007913234256336, "learning_rate": 5.5253564163892785e-06, "loss": 1.3174, "step": 41585 }, { "epoch": 1.2263010467344833, "grad_norm": 2.927954499580427, "learning_rate": 5.524789771222082e-06, "loss": 1.3163, "step": 41590 }, { "epoch": 1.2264484741264927, "grad_norm": 2.9191156960246265, "learning_rate": 5.524223073829672e-06, "loss": 1.3246, "step": 41595 }, { "epoch": 1.2265959015185022, "grad_norm": 2.936069487954452, "learning_rate": 5.523656324228721e-06, "loss": 1.2925, "step": 41600 }, { "epoch": 1.2267433289105116, "grad_norm": 3.038378162177573, "learning_rate": 5.5230895224359065e-06, "loss": 1.2827, "step": 41605 }, { "epoch": 1.226890756302521, "grad_norm": 2.756512762665468, "learning_rate": 5.522522668467909e-06, "loss": 1.2723, "step": 41610 }, { "epoch": 1.2270381836945305, "grad_norm": 2.9541541536996516, "learning_rate": 5.521955762341408e-06, "loss": 1.2752, "step": 41615 }, { "epoch": 1.2271856110865398, "grad_norm": 2.9938206640876923, "learning_rate": 5.521388804073085e-06, "loss": 1.3069, "step": 41620 }, { "epoch": 1.2273330384785492, "grad_norm": 2.9145678767414966, "learning_rate": 5.520821793679626e-06, "loss": 1.2962, "step": 41625 }, { "epoch": 1.2274804658705587, "grad_norm": 2.917978476590178, "learning_rate": 5.5202547311777114e-06, "loss": 1.3086, "step": 41630 }, { "epoch": 1.2276278932625682, "grad_norm": 3.03772742546375, "learning_rate": 5.51968761658403e-06, "loss": 1.2859, "step": 41635 }, { "epoch": 1.2277753206545776, "grad_norm": 3.1210030044324926, "learning_rate": 5.519120449915269e-06, "loss": 1.3025, "step": 41640 }, { "epoch": 1.227922748046587, "grad_norm": 3.107729606639877, "learning_rate": 5.5185532311881195e-06, "loss": 1.315, "step": 41645 }, { "epoch": 1.2280701754385965, "grad_norm": 2.852727908316262, "learning_rate": 5.517985960419272e-06, "loss": 1.2969, "step": 41650 }, { "epoch": 1.228217602830606, "grad_norm": 2.9772380152957854, "learning_rate": 5.517418637625419e-06, "loss": 1.3136, "step": 41655 }, { "epoch": 1.2283650302226155, "grad_norm": 3.110800044557021, "learning_rate": 5.5168512628232534e-06, "loss": 1.3032, "step": 41660 }, { "epoch": 1.2285124576146247, "grad_norm": 2.7766441582467327, "learning_rate": 5.516283836029472e-06, "loss": 1.3257, "step": 41665 }, { "epoch": 1.2286598850066341, "grad_norm": 3.050821429196367, "learning_rate": 5.515716357260773e-06, "loss": 1.318, "step": 41670 }, { "epoch": 1.2288073123986436, "grad_norm": 2.723208445467562, "learning_rate": 5.515148826533853e-06, "loss": 1.3321, "step": 41675 }, { "epoch": 1.228954739790653, "grad_norm": 2.844317200423807, "learning_rate": 5.514581243865413e-06, "loss": 1.3265, "step": 41680 }, { "epoch": 1.2291021671826625, "grad_norm": 2.8893939457141324, "learning_rate": 5.514013609272156e-06, "loss": 1.3251, "step": 41685 }, { "epoch": 1.229249594574672, "grad_norm": 2.985819470133204, "learning_rate": 5.513445922770784e-06, "loss": 1.3444, "step": 41690 }, { "epoch": 1.2293970219666814, "grad_norm": 2.8435010222983066, "learning_rate": 5.512878184378001e-06, "loss": 1.296, "step": 41695 }, { "epoch": 1.229544449358691, "grad_norm": 2.915050440402629, "learning_rate": 5.512310394110515e-06, "loss": 1.3419, "step": 41700 }, { "epoch": 1.2296918767507004, "grad_norm": 2.8991624181254982, "learning_rate": 5.511742551985035e-06, "loss": 1.3501, "step": 41705 }, { "epoch": 1.2298393041427098, "grad_norm": 2.8778096699894067, "learning_rate": 5.511174658018266e-06, "loss": 1.2944, "step": 41710 }, { "epoch": 1.2299867315347193, "grad_norm": 2.8365616961371547, "learning_rate": 5.510606712226923e-06, "loss": 1.3185, "step": 41715 }, { "epoch": 1.2301341589267285, "grad_norm": 2.9665041896592403, "learning_rate": 5.510038714627717e-06, "loss": 1.3534, "step": 41720 }, { "epoch": 1.230281586318738, "grad_norm": 2.973438230310968, "learning_rate": 5.509470665237362e-06, "loss": 1.3613, "step": 41725 }, { "epoch": 1.2304290137107474, "grad_norm": 2.9638385643873213, "learning_rate": 5.508902564072573e-06, "loss": 1.3561, "step": 41730 }, { "epoch": 1.2305764411027569, "grad_norm": 3.0545980272152176, "learning_rate": 5.508334411150069e-06, "loss": 1.3971, "step": 41735 }, { "epoch": 1.2307238684947663, "grad_norm": 2.794000135319694, "learning_rate": 5.507766206486566e-06, "loss": 1.3661, "step": 41740 }, { "epoch": 1.2308712958867758, "grad_norm": 3.0024285819228775, "learning_rate": 5.507197950098785e-06, "loss": 1.3018, "step": 41745 }, { "epoch": 1.2310187232787853, "grad_norm": 2.908828944515556, "learning_rate": 5.506629642003449e-06, "loss": 1.2859, "step": 41750 }, { "epoch": 1.2311661506707947, "grad_norm": 2.921786323209833, "learning_rate": 5.506061282217279e-06, "loss": 1.3285, "step": 41755 }, { "epoch": 1.231313578062804, "grad_norm": 2.947317873965702, "learning_rate": 5.505492870757001e-06, "loss": 1.3078, "step": 41760 }, { "epoch": 1.2314610054548134, "grad_norm": 2.8548267903954234, "learning_rate": 5.504924407639341e-06, "loss": 1.3204, "step": 41765 }, { "epoch": 1.2316084328468229, "grad_norm": 2.8670915039240152, "learning_rate": 5.504355892881025e-06, "loss": 1.3111, "step": 41770 }, { "epoch": 1.2317558602388323, "grad_norm": 3.0547550914793318, "learning_rate": 5.5037873264987835e-06, "loss": 1.3563, "step": 41775 }, { "epoch": 1.2319032876308418, "grad_norm": 3.0906394508099755, "learning_rate": 5.50321870850935e-06, "loss": 1.3542, "step": 41780 }, { "epoch": 1.2320507150228512, "grad_norm": 2.8751446527342908, "learning_rate": 5.502650038929452e-06, "loss": 1.3231, "step": 41785 }, { "epoch": 1.2321981424148607, "grad_norm": 3.0222033312504974, "learning_rate": 5.502081317775825e-06, "loss": 1.3293, "step": 41790 }, { "epoch": 1.2323455698068702, "grad_norm": 2.996124978703292, "learning_rate": 5.5015125450652044e-06, "loss": 1.336, "step": 41795 }, { "epoch": 1.2324929971988796, "grad_norm": 2.8681822494182563, "learning_rate": 5.500943720814328e-06, "loss": 1.3192, "step": 41800 }, { "epoch": 1.232640424590889, "grad_norm": 2.977611202646797, "learning_rate": 5.500374845039934e-06, "loss": 1.3108, "step": 41805 }, { "epoch": 1.2327878519828985, "grad_norm": 2.8335408763707406, "learning_rate": 5.499805917758761e-06, "loss": 1.3584, "step": 41810 }, { "epoch": 1.2329352793749078, "grad_norm": 3.1795988338314856, "learning_rate": 5.499236938987549e-06, "loss": 1.3037, "step": 41815 }, { "epoch": 1.2330827067669172, "grad_norm": 3.03270545316, "learning_rate": 5.498667908743045e-06, "loss": 1.3033, "step": 41820 }, { "epoch": 1.2332301341589267, "grad_norm": 2.8843473486989706, "learning_rate": 5.49809882704199e-06, "loss": 1.3306, "step": 41825 }, { "epoch": 1.2333775615509361, "grad_norm": 2.926395294647912, "learning_rate": 5.497529693901131e-06, "loss": 1.2864, "step": 41830 }, { "epoch": 1.2335249889429456, "grad_norm": 2.9865193064684794, "learning_rate": 5.4969605093372155e-06, "loss": 1.3506, "step": 41835 }, { "epoch": 1.233672416334955, "grad_norm": 2.8650381226390027, "learning_rate": 5.496391273366992e-06, "loss": 1.3211, "step": 41840 }, { "epoch": 1.2338198437269645, "grad_norm": 2.7545928415343157, "learning_rate": 5.495821986007211e-06, "loss": 1.31, "step": 41845 }, { "epoch": 1.233967271118974, "grad_norm": 2.869566997163451, "learning_rate": 5.495252647274624e-06, "loss": 1.3178, "step": 41850 }, { "epoch": 1.2341146985109834, "grad_norm": 3.008743361528379, "learning_rate": 5.494683257185985e-06, "loss": 1.3298, "step": 41855 }, { "epoch": 1.2342621259029927, "grad_norm": 2.675775113725598, "learning_rate": 5.494113815758051e-06, "loss": 1.2572, "step": 41860 }, { "epoch": 1.2344095532950021, "grad_norm": 2.909302506250961, "learning_rate": 5.493544323007574e-06, "loss": 1.3023, "step": 41865 }, { "epoch": 1.2345569806870116, "grad_norm": 3.002900519300161, "learning_rate": 5.492974778951315e-06, "loss": 1.3685, "step": 41870 }, { "epoch": 1.234704408079021, "grad_norm": 3.0432105454472147, "learning_rate": 5.492405183606033e-06, "loss": 1.3486, "step": 41875 }, { "epoch": 1.2348518354710305, "grad_norm": 2.731951355578481, "learning_rate": 5.49183553698849e-06, "loss": 1.3055, "step": 41880 }, { "epoch": 1.23499926286304, "grad_norm": 2.99072604110379, "learning_rate": 5.491265839115447e-06, "loss": 1.3074, "step": 41885 }, { "epoch": 1.2351466902550494, "grad_norm": 2.9150799659386677, "learning_rate": 5.4906960900036675e-06, "loss": 1.2735, "step": 41890 }, { "epoch": 1.2352941176470589, "grad_norm": 2.981488815332928, "learning_rate": 5.490126289669918e-06, "loss": 1.3574, "step": 41895 }, { "epoch": 1.2354415450390683, "grad_norm": 2.7809364942279187, "learning_rate": 5.489556438130965e-06, "loss": 1.3036, "step": 41900 }, { "epoch": 1.2355889724310778, "grad_norm": 2.86630394830732, "learning_rate": 5.4889865354035784e-06, "loss": 1.3442, "step": 41905 }, { "epoch": 1.2357363998230873, "grad_norm": 3.1574268633559006, "learning_rate": 5.488416581504527e-06, "loss": 1.3371, "step": 41910 }, { "epoch": 1.2358838272150965, "grad_norm": 2.8933273194275047, "learning_rate": 5.487846576450582e-06, "loss": 1.3355, "step": 41915 }, { "epoch": 1.236031254607106, "grad_norm": 2.894852721504027, "learning_rate": 5.487276520258518e-06, "loss": 1.2914, "step": 41920 }, { "epoch": 1.2361786819991154, "grad_norm": 2.9267146390334715, "learning_rate": 5.486706412945108e-06, "loss": 1.3246, "step": 41925 }, { "epoch": 1.2363261093911249, "grad_norm": 2.8227223292865657, "learning_rate": 5.486136254527128e-06, "loss": 1.371, "step": 41930 }, { "epoch": 1.2364735367831343, "grad_norm": 2.9201368693998573, "learning_rate": 5.485566045021357e-06, "loss": 1.315, "step": 41935 }, { "epoch": 1.2366209641751438, "grad_norm": 3.0066032461006515, "learning_rate": 5.4849957844445745e-06, "loss": 1.2859, "step": 41940 }, { "epoch": 1.2367683915671532, "grad_norm": 2.8873341391735567, "learning_rate": 5.484425472813559e-06, "loss": 1.3144, "step": 41945 }, { "epoch": 1.2369158189591627, "grad_norm": 2.9988451775998, "learning_rate": 5.483855110145093e-06, "loss": 1.318, "step": 41950 }, { "epoch": 1.237063246351172, "grad_norm": 2.924266832465468, "learning_rate": 5.4832846964559604e-06, "loss": 1.2481, "step": 41955 }, { "epoch": 1.2372106737431814, "grad_norm": 2.8876645656171522, "learning_rate": 5.482714231762947e-06, "loss": 1.3471, "step": 41960 }, { "epoch": 1.2373581011351908, "grad_norm": 2.9312328071842235, "learning_rate": 5.482143716082838e-06, "loss": 1.2972, "step": 41965 }, { "epoch": 1.2375055285272003, "grad_norm": 2.877058321806719, "learning_rate": 5.481573149432424e-06, "loss": 1.3338, "step": 41970 }, { "epoch": 1.2376529559192098, "grad_norm": 3.1021234977161036, "learning_rate": 5.481002531828491e-06, "loss": 1.3695, "step": 41975 }, { "epoch": 1.2378003833112192, "grad_norm": 2.9879464349218163, "learning_rate": 5.480431863287833e-06, "loss": 1.3438, "step": 41980 }, { "epoch": 1.2379478107032287, "grad_norm": 3.1985878204029445, "learning_rate": 5.47986114382724e-06, "loss": 1.3456, "step": 41985 }, { "epoch": 1.2380952380952381, "grad_norm": 2.7703548979453174, "learning_rate": 5.479290373463509e-06, "loss": 1.3041, "step": 41990 }, { "epoch": 1.2382426654872476, "grad_norm": 2.890768657230077, "learning_rate": 5.478719552213434e-06, "loss": 1.3055, "step": 41995 }, { "epoch": 1.238390092879257, "grad_norm": 3.0039372675794054, "learning_rate": 5.478148680093812e-06, "loss": 1.3046, "step": 42000 }, { "epoch": 1.238390092879257, "eval_loss": 1.117411494255066, "eval_runtime": 4.2655, "eval_samples_per_second": 92.839, "eval_steps_per_second": 3.048, "step": 42000 }, { "epoch": 1.2385375202712665, "grad_norm": 2.857330480261903, "learning_rate": 5.477577757121441e-06, "loss": 1.2446, "step": 42005 }, { "epoch": 1.2386849476632757, "grad_norm": 2.9900766350425396, "learning_rate": 5.4770067833131235e-06, "loss": 1.2994, "step": 42010 }, { "epoch": 1.2388323750552852, "grad_norm": 2.76234649452097, "learning_rate": 5.476435758685658e-06, "loss": 1.2956, "step": 42015 }, { "epoch": 1.2389798024472947, "grad_norm": 2.922918088346929, "learning_rate": 5.47586468325585e-06, "loss": 1.2937, "step": 42020 }, { "epoch": 1.2391272298393041, "grad_norm": 3.094976821269643, "learning_rate": 5.475293557040502e-06, "loss": 1.3177, "step": 42025 }, { "epoch": 1.2392746572313136, "grad_norm": 3.0414966130931496, "learning_rate": 5.4747223800564214e-06, "loss": 1.3116, "step": 42030 }, { "epoch": 1.239422084623323, "grad_norm": 2.94347324426813, "learning_rate": 5.474151152320415e-06, "loss": 1.3406, "step": 42035 }, { "epoch": 1.2395695120153325, "grad_norm": 2.8371055583478406, "learning_rate": 5.4735798738492926e-06, "loss": 1.3148, "step": 42040 }, { "epoch": 1.239716939407342, "grad_norm": 2.845189389984292, "learning_rate": 5.473008544659863e-06, "loss": 1.3092, "step": 42045 }, { "epoch": 1.2398643667993514, "grad_norm": 3.0615870749435508, "learning_rate": 5.472437164768941e-06, "loss": 1.2795, "step": 42050 }, { "epoch": 1.2400117941913607, "grad_norm": 2.8629870437542535, "learning_rate": 5.4718657341933375e-06, "loss": 1.3701, "step": 42055 }, { "epoch": 1.24015922158337, "grad_norm": 2.825769583956192, "learning_rate": 5.47129425294987e-06, "loss": 1.3228, "step": 42060 }, { "epoch": 1.2403066489753796, "grad_norm": 2.845924424794292, "learning_rate": 5.470722721055353e-06, "loss": 1.2947, "step": 42065 }, { "epoch": 1.240454076367389, "grad_norm": 3.0633446524255143, "learning_rate": 5.470151138526605e-06, "loss": 1.3528, "step": 42070 }, { "epoch": 1.2406015037593985, "grad_norm": 2.945784827287823, "learning_rate": 5.469579505380445e-06, "loss": 1.3129, "step": 42075 }, { "epoch": 1.240748931151408, "grad_norm": 2.8979500030183134, "learning_rate": 5.469007821633695e-06, "loss": 1.2693, "step": 42080 }, { "epoch": 1.2408963585434174, "grad_norm": 2.9037970827469706, "learning_rate": 5.468436087303178e-06, "loss": 1.3165, "step": 42085 }, { "epoch": 1.2410437859354269, "grad_norm": 2.7589627209944236, "learning_rate": 5.4678643024057145e-06, "loss": 1.2562, "step": 42090 }, { "epoch": 1.2411912133274363, "grad_norm": 3.036605438323928, "learning_rate": 5.467292466958135e-06, "loss": 1.2891, "step": 42095 }, { "epoch": 1.2413386407194458, "grad_norm": 2.931804809761287, "learning_rate": 5.466720580977261e-06, "loss": 1.3425, "step": 42100 }, { "epoch": 1.2414860681114552, "grad_norm": 2.987962663736277, "learning_rate": 5.4661486444799245e-06, "loss": 1.3205, "step": 42105 }, { "epoch": 1.2416334955034645, "grad_norm": 2.7923697455378496, "learning_rate": 5.4655766574829555e-06, "loss": 1.3368, "step": 42110 }, { "epoch": 1.241780922895474, "grad_norm": 3.118776236307272, "learning_rate": 5.465004620003184e-06, "loss": 1.3382, "step": 42115 }, { "epoch": 1.2419283502874834, "grad_norm": 3.0676494265239147, "learning_rate": 5.464432532057444e-06, "loss": 1.3657, "step": 42120 }, { "epoch": 1.2420757776794928, "grad_norm": 2.75938444872715, "learning_rate": 5.463860393662567e-06, "loss": 1.3016, "step": 42125 }, { "epoch": 1.2422232050715023, "grad_norm": 2.898848557193391, "learning_rate": 5.463288204835392e-06, "loss": 1.3045, "step": 42130 }, { "epoch": 1.2423706324635118, "grad_norm": 3.2052342054770016, "learning_rate": 5.462715965592755e-06, "loss": 1.2651, "step": 42135 }, { "epoch": 1.2425180598555212, "grad_norm": 2.7605277278036553, "learning_rate": 5.462143675951496e-06, "loss": 1.2671, "step": 42140 }, { "epoch": 1.2426654872475307, "grad_norm": 2.8306227292473776, "learning_rate": 5.4615713359284525e-06, "loss": 1.2918, "step": 42145 }, { "epoch": 1.24281291463954, "grad_norm": 3.0594305070023995, "learning_rate": 5.46099894554047e-06, "loss": 1.3261, "step": 42150 }, { "epoch": 1.2429603420315494, "grad_norm": 3.004116617618241, "learning_rate": 5.460426504804389e-06, "loss": 1.3204, "step": 42155 }, { "epoch": 1.2431077694235588, "grad_norm": 2.919550711344634, "learning_rate": 5.459854013737053e-06, "loss": 1.2814, "step": 42160 }, { "epoch": 1.2432551968155683, "grad_norm": 2.8639309468306986, "learning_rate": 5.459281472355313e-06, "loss": 1.3205, "step": 42165 }, { "epoch": 1.2434026242075777, "grad_norm": 2.986588735549436, "learning_rate": 5.458708880676013e-06, "loss": 1.3305, "step": 42170 }, { "epoch": 1.2435500515995872, "grad_norm": 2.9184383178470807, "learning_rate": 5.458136238716003e-06, "loss": 1.3337, "step": 42175 }, { "epoch": 1.2436974789915967, "grad_norm": 3.0055284654081764, "learning_rate": 5.4575635464921335e-06, "loss": 1.3435, "step": 42180 }, { "epoch": 1.2438449063836061, "grad_norm": 2.921276383474725, "learning_rate": 5.456990804021256e-06, "loss": 1.3381, "step": 42185 }, { "epoch": 1.2439923337756156, "grad_norm": 2.969206024094805, "learning_rate": 5.456418011320226e-06, "loss": 1.3844, "step": 42190 }, { "epoch": 1.244139761167625, "grad_norm": 2.8247369162517275, "learning_rate": 5.455845168405898e-06, "loss": 1.3406, "step": 42195 }, { "epoch": 1.2442871885596345, "grad_norm": 3.0148286554979866, "learning_rate": 5.455272275295127e-06, "loss": 1.3195, "step": 42200 }, { "epoch": 1.2444346159516437, "grad_norm": 2.8770341790725715, "learning_rate": 5.454699332004772e-06, "loss": 1.3359, "step": 42205 }, { "epoch": 1.2445820433436532, "grad_norm": 2.8681841742753864, "learning_rate": 5.4541263385516925e-06, "loss": 1.2591, "step": 42210 }, { "epoch": 1.2447294707356626, "grad_norm": 2.8435848759179576, "learning_rate": 5.453553294952751e-06, "loss": 1.3117, "step": 42215 }, { "epoch": 1.244876898127672, "grad_norm": 2.9397566628476697, "learning_rate": 5.452980201224807e-06, "loss": 1.3267, "step": 42220 }, { "epoch": 1.2450243255196816, "grad_norm": 3.0267479820815515, "learning_rate": 5.452407057384728e-06, "loss": 1.295, "step": 42225 }, { "epoch": 1.245171752911691, "grad_norm": 2.8355571873433223, "learning_rate": 5.451833863449376e-06, "loss": 1.2919, "step": 42230 }, { "epoch": 1.2453191803037005, "grad_norm": 2.899502493788513, "learning_rate": 5.451260619435618e-06, "loss": 1.2999, "step": 42235 }, { "epoch": 1.24546660769571, "grad_norm": 2.798813634341105, "learning_rate": 5.450687325360325e-06, "loss": 1.2941, "step": 42240 }, { "epoch": 1.2456140350877192, "grad_norm": 2.98473466306745, "learning_rate": 5.4501139812403654e-06, "loss": 1.3527, "step": 42245 }, { "epoch": 1.2457614624797286, "grad_norm": 2.9567174605119817, "learning_rate": 5.449540587092611e-06, "loss": 1.3033, "step": 42250 }, { "epoch": 1.245908889871738, "grad_norm": 2.866133514105907, "learning_rate": 5.448967142933933e-06, "loss": 1.3168, "step": 42255 }, { "epoch": 1.2460563172637475, "grad_norm": 2.861376499139745, "learning_rate": 5.448393648781208e-06, "loss": 1.302, "step": 42260 }, { "epoch": 1.246203744655757, "grad_norm": 2.8978347690838775, "learning_rate": 5.44782010465131e-06, "loss": 1.3752, "step": 42265 }, { "epoch": 1.2463511720477665, "grad_norm": 2.884469238174768, "learning_rate": 5.447246510561117e-06, "loss": 1.3222, "step": 42270 }, { "epoch": 1.246498599439776, "grad_norm": 2.7829613539054936, "learning_rate": 5.446672866527507e-06, "loss": 1.3141, "step": 42275 }, { "epoch": 1.2466460268317854, "grad_norm": 2.960972507479958, "learning_rate": 5.4460991725673615e-06, "loss": 1.3216, "step": 42280 }, { "epoch": 1.2467934542237948, "grad_norm": 2.724728837748935, "learning_rate": 5.44552542869756e-06, "loss": 1.3204, "step": 42285 }, { "epoch": 1.2469408816158043, "grad_norm": 2.91835822667148, "learning_rate": 5.444951634934986e-06, "loss": 1.3389, "step": 42290 }, { "epoch": 1.2470883090078138, "grad_norm": 3.0470122477842163, "learning_rate": 5.444377791296525e-06, "loss": 1.2916, "step": 42295 }, { "epoch": 1.247235736399823, "grad_norm": 2.913433818876007, "learning_rate": 5.443803897799065e-06, "loss": 1.3069, "step": 42300 }, { "epoch": 1.2473831637918324, "grad_norm": 2.937721801616452, "learning_rate": 5.443229954459489e-06, "loss": 1.3099, "step": 42305 }, { "epoch": 1.247530591183842, "grad_norm": 3.033774425717223, "learning_rate": 5.442655961294688e-06, "loss": 1.3322, "step": 42310 }, { "epoch": 1.2476780185758514, "grad_norm": 2.952350370783742, "learning_rate": 5.442081918321553e-06, "loss": 1.3417, "step": 42315 }, { "epoch": 1.2478254459678608, "grad_norm": 2.839708491765947, "learning_rate": 5.441507825556976e-06, "loss": 1.3126, "step": 42320 }, { "epoch": 1.2479728733598703, "grad_norm": 2.815740367898091, "learning_rate": 5.44093368301785e-06, "loss": 1.2621, "step": 42325 }, { "epoch": 1.2481203007518797, "grad_norm": 2.9876728118017337, "learning_rate": 5.440359490721069e-06, "loss": 1.3347, "step": 42330 }, { "epoch": 1.2482677281438892, "grad_norm": 2.9535229978544613, "learning_rate": 5.439785248683529e-06, "loss": 1.3459, "step": 42335 }, { "epoch": 1.2484151555358987, "grad_norm": 2.7228773936891777, "learning_rate": 5.439210956922129e-06, "loss": 1.3307, "step": 42340 }, { "epoch": 1.248562582927908, "grad_norm": 3.1949087316353575, "learning_rate": 5.4386366154537685e-06, "loss": 1.296, "step": 42345 }, { "epoch": 1.2487100103199174, "grad_norm": 2.9872270106594727, "learning_rate": 5.438062224295346e-06, "loss": 1.2995, "step": 42350 }, { "epoch": 1.2488574377119268, "grad_norm": 2.9990844051115735, "learning_rate": 5.437487783463767e-06, "loss": 1.3392, "step": 42355 }, { "epoch": 1.2490048651039363, "grad_norm": 3.0268117165701143, "learning_rate": 5.436913292975931e-06, "loss": 1.3424, "step": 42360 }, { "epoch": 1.2491522924959457, "grad_norm": 2.9724201988222436, "learning_rate": 5.436338752848746e-06, "loss": 1.3786, "step": 42365 }, { "epoch": 1.2492997198879552, "grad_norm": 2.622869479561184, "learning_rate": 5.435764163099117e-06, "loss": 1.2895, "step": 42370 }, { "epoch": 1.2494471472799646, "grad_norm": 2.7865584377381323, "learning_rate": 5.435189523743953e-06, "loss": 1.3361, "step": 42375 }, { "epoch": 1.249594574671974, "grad_norm": 2.750355033697953, "learning_rate": 5.434614834800163e-06, "loss": 1.3267, "step": 42380 }, { "epoch": 1.2497420020639836, "grad_norm": 2.868305097917261, "learning_rate": 5.434040096284659e-06, "loss": 1.2867, "step": 42385 }, { "epoch": 1.249889429455993, "grad_norm": 2.9927345679882253, "learning_rate": 5.433465308214351e-06, "loss": 1.3258, "step": 42390 }, { "epoch": 1.2500368568480025, "grad_norm": 3.1159153501282058, "learning_rate": 5.432890470606153e-06, "loss": 1.277, "step": 42395 }, { "epoch": 1.2501842842400117, "grad_norm": 2.929828106783337, "learning_rate": 5.432315583476983e-06, "loss": 1.2956, "step": 42400 }, { "epoch": 1.2503317116320212, "grad_norm": 3.05802652421425, "learning_rate": 5.431740646843755e-06, "loss": 1.3442, "step": 42405 }, { "epoch": 1.2504791390240306, "grad_norm": 2.925211996511857, "learning_rate": 5.431165660723389e-06, "loss": 1.334, "step": 42410 }, { "epoch": 1.25062656641604, "grad_norm": 2.857259455144434, "learning_rate": 5.430590625132803e-06, "loss": 1.3691, "step": 42415 }, { "epoch": 1.2507739938080495, "grad_norm": 2.843965684604264, "learning_rate": 5.430015540088919e-06, "loss": 1.2933, "step": 42420 }, { "epoch": 1.250921421200059, "grad_norm": 2.8470764612772856, "learning_rate": 5.429440405608659e-06, "loss": 1.3372, "step": 42425 }, { "epoch": 1.2510688485920685, "grad_norm": 2.9082866396622626, "learning_rate": 5.4288652217089485e-06, "loss": 1.3031, "step": 42430 }, { "epoch": 1.251216275984078, "grad_norm": 2.8469987696877044, "learning_rate": 5.42828998840671e-06, "loss": 1.3418, "step": 42435 }, { "epoch": 1.2513637033760872, "grad_norm": 2.90473155965297, "learning_rate": 5.4277147057188735e-06, "loss": 1.313, "step": 42440 }, { "epoch": 1.2515111307680966, "grad_norm": 2.858629263679027, "learning_rate": 5.427139373662366e-06, "loss": 1.3268, "step": 42445 }, { "epoch": 1.251658558160106, "grad_norm": 2.9751137426281304, "learning_rate": 5.426563992254116e-06, "loss": 1.3476, "step": 42450 }, { "epoch": 1.2518059855521155, "grad_norm": 3.003935402863472, "learning_rate": 5.425988561511058e-06, "loss": 1.3188, "step": 42455 }, { "epoch": 1.251953412944125, "grad_norm": 2.8784118678081807, "learning_rate": 5.425413081450121e-06, "loss": 1.3188, "step": 42460 }, { "epoch": 1.2521008403361344, "grad_norm": 2.855627467454535, "learning_rate": 5.424837552088242e-06, "loss": 1.3114, "step": 42465 }, { "epoch": 1.252248267728144, "grad_norm": 2.8867875751546745, "learning_rate": 5.424261973442356e-06, "loss": 1.3233, "step": 42470 }, { "epoch": 1.2523956951201534, "grad_norm": 2.9131895399868344, "learning_rate": 5.423686345529399e-06, "loss": 1.3048, "step": 42475 }, { "epoch": 1.2525431225121628, "grad_norm": 2.9304792622319, "learning_rate": 5.42311066836631e-06, "loss": 1.3501, "step": 42480 }, { "epoch": 1.2526905499041723, "grad_norm": 2.780018389730464, "learning_rate": 5.4225349419700295e-06, "loss": 1.3256, "step": 42485 }, { "epoch": 1.2528379772961817, "grad_norm": 2.939474016069868, "learning_rate": 5.421959166357499e-06, "loss": 1.3214, "step": 42490 }, { "epoch": 1.2529854046881912, "grad_norm": 2.8645020261341703, "learning_rate": 5.421383341545659e-06, "loss": 1.2933, "step": 42495 }, { "epoch": 1.2531328320802004, "grad_norm": 3.1105269434623497, "learning_rate": 5.4208074675514565e-06, "loss": 1.3659, "step": 42500 }, { "epoch": 1.2531328320802004, "eval_loss": 1.1151139736175537, "eval_runtime": 4.1927, "eval_samples_per_second": 94.45, "eval_steps_per_second": 3.101, "step": 42500 }, { "epoch": 1.25328025947221, "grad_norm": 2.989489437832552, "learning_rate": 5.420231544391837e-06, "loss": 1.3646, "step": 42505 }, { "epoch": 1.2534276868642193, "grad_norm": 2.9797318686265446, "learning_rate": 5.419655572083747e-06, "loss": 1.307, "step": 42510 }, { "epoch": 1.2535751142562288, "grad_norm": 2.8844644237524775, "learning_rate": 5.419079550644135e-06, "loss": 1.3119, "step": 42515 }, { "epoch": 1.2537225416482383, "grad_norm": 3.0372806689541147, "learning_rate": 5.4185034800899505e-06, "loss": 1.3477, "step": 42520 }, { "epoch": 1.2538699690402477, "grad_norm": 2.936090613820008, "learning_rate": 5.417927360438148e-06, "loss": 1.3128, "step": 42525 }, { "epoch": 1.2540173964322572, "grad_norm": 2.9618576298482617, "learning_rate": 5.417351191705677e-06, "loss": 1.3686, "step": 42530 }, { "epoch": 1.2541648238242664, "grad_norm": 2.9217764330459577, "learning_rate": 5.416774973909493e-06, "loss": 1.2992, "step": 42535 }, { "epoch": 1.2543122512162759, "grad_norm": 3.008287283405077, "learning_rate": 5.4161987070665535e-06, "loss": 1.3366, "step": 42540 }, { "epoch": 1.2544596786082853, "grad_norm": 3.056749989923566, "learning_rate": 5.415622391193813e-06, "loss": 1.3529, "step": 42545 }, { "epoch": 1.2546071060002948, "grad_norm": 2.877792744545462, "learning_rate": 5.415046026308232e-06, "loss": 1.3534, "step": 42550 }, { "epoch": 1.2547545333923042, "grad_norm": 3.0319806431456553, "learning_rate": 5.41446961242677e-06, "loss": 1.3003, "step": 42555 }, { "epoch": 1.2549019607843137, "grad_norm": 2.820194342832218, "learning_rate": 5.413893149566391e-06, "loss": 1.3048, "step": 42560 }, { "epoch": 1.2550493881763232, "grad_norm": 2.844580861331109, "learning_rate": 5.413316637744054e-06, "loss": 1.3425, "step": 42565 }, { "epoch": 1.2551968155683326, "grad_norm": 2.986947129092742, "learning_rate": 5.4127400769767255e-06, "loss": 1.3288, "step": 42570 }, { "epoch": 1.255344242960342, "grad_norm": 2.8192377241948687, "learning_rate": 5.412163467281372e-06, "loss": 1.3246, "step": 42575 }, { "epoch": 1.2554916703523515, "grad_norm": 2.9411047466159825, "learning_rate": 5.411586808674961e-06, "loss": 1.3583, "step": 42580 }, { "epoch": 1.255639097744361, "grad_norm": 2.9837928759461403, "learning_rate": 5.411010101174461e-06, "loss": 1.3149, "step": 42585 }, { "epoch": 1.2557865251363705, "grad_norm": 2.9298825273070896, "learning_rate": 5.410433344796842e-06, "loss": 1.379, "step": 42590 }, { "epoch": 1.2559339525283797, "grad_norm": 2.9270214938511323, "learning_rate": 5.409856539559075e-06, "loss": 1.3379, "step": 42595 }, { "epoch": 1.2560813799203892, "grad_norm": 2.860270388017642, "learning_rate": 5.409279685478135e-06, "loss": 1.3118, "step": 42600 }, { "epoch": 1.2562288073123986, "grad_norm": 2.9459213687641697, "learning_rate": 5.408702782570996e-06, "loss": 1.2861, "step": 42605 }, { "epoch": 1.256376234704408, "grad_norm": 3.0219908634230026, "learning_rate": 5.4081258308546315e-06, "loss": 1.3032, "step": 42610 }, { "epoch": 1.2565236620964175, "grad_norm": 2.949017625675625, "learning_rate": 5.407548830346023e-06, "loss": 1.3699, "step": 42615 }, { "epoch": 1.256671089488427, "grad_norm": 2.876428111802795, "learning_rate": 5.406971781062146e-06, "loss": 1.3181, "step": 42620 }, { "epoch": 1.2568185168804364, "grad_norm": 2.946250551636512, "learning_rate": 5.406394683019983e-06, "loss": 1.3436, "step": 42625 }, { "epoch": 1.256965944272446, "grad_norm": 2.847439687199971, "learning_rate": 5.405817536236515e-06, "loss": 1.3189, "step": 42630 }, { "epoch": 1.2571133716644551, "grad_norm": 2.87304839795143, "learning_rate": 5.405240340728725e-06, "loss": 1.2949, "step": 42635 }, { "epoch": 1.2572607990564646, "grad_norm": 2.9206047116528704, "learning_rate": 5.404663096513599e-06, "loss": 1.309, "step": 42640 }, { "epoch": 1.257408226448474, "grad_norm": 2.8822112456588695, "learning_rate": 5.4040858036081204e-06, "loss": 1.3611, "step": 42645 }, { "epoch": 1.2575556538404835, "grad_norm": 2.967823149846891, "learning_rate": 5.403508462029279e-06, "loss": 1.3047, "step": 42650 }, { "epoch": 1.257703081232493, "grad_norm": 3.060524447004178, "learning_rate": 5.402931071794063e-06, "loss": 1.3322, "step": 42655 }, { "epoch": 1.2578505086245024, "grad_norm": 2.9554347120517885, "learning_rate": 5.402353632919463e-06, "loss": 1.3468, "step": 42660 }, { "epoch": 1.2579979360165119, "grad_norm": 3.001702252417205, "learning_rate": 5.40177614542247e-06, "loss": 1.3531, "step": 42665 }, { "epoch": 1.2581453634085213, "grad_norm": 2.8952737179816195, "learning_rate": 5.401198609320079e-06, "loss": 1.3134, "step": 42670 }, { "epoch": 1.2582927908005308, "grad_norm": 2.8822080347451657, "learning_rate": 5.400621024629282e-06, "loss": 1.3411, "step": 42675 }, { "epoch": 1.2584402181925403, "grad_norm": 2.8905466838894442, "learning_rate": 5.4000433913670765e-06, "loss": 1.3081, "step": 42680 }, { "epoch": 1.2585876455845497, "grad_norm": 2.7617058633579616, "learning_rate": 5.399465709550461e-06, "loss": 1.2854, "step": 42685 }, { "epoch": 1.2587350729765592, "grad_norm": 2.8965377119859794, "learning_rate": 5.3988879791964335e-06, "loss": 1.2892, "step": 42690 }, { "epoch": 1.2588825003685684, "grad_norm": 2.9222685192978983, "learning_rate": 5.398310200321994e-06, "loss": 1.316, "step": 42695 }, { "epoch": 1.2590299277605779, "grad_norm": 2.9963234736863447, "learning_rate": 5.3977323729441445e-06, "loss": 1.3246, "step": 42700 }, { "epoch": 1.2591773551525873, "grad_norm": 2.9434190611863604, "learning_rate": 5.397154497079888e-06, "loss": 1.3784, "step": 42705 }, { "epoch": 1.2593247825445968, "grad_norm": 3.1058328747899218, "learning_rate": 5.39657657274623e-06, "loss": 1.3012, "step": 42710 }, { "epoch": 1.2594722099366062, "grad_norm": 2.8742451808026375, "learning_rate": 5.395998599960177e-06, "loss": 1.3186, "step": 42715 }, { "epoch": 1.2596196373286157, "grad_norm": 2.930947410428594, "learning_rate": 5.3954205787387355e-06, "loss": 1.3174, "step": 42720 }, { "epoch": 1.2597670647206252, "grad_norm": 2.8875836396536188, "learning_rate": 5.3948425090989135e-06, "loss": 1.3188, "step": 42725 }, { "epoch": 1.2599144921126344, "grad_norm": 2.819237560315597, "learning_rate": 5.394264391057723e-06, "loss": 1.2975, "step": 42730 }, { "epoch": 1.2600619195046439, "grad_norm": 2.958975900632541, "learning_rate": 5.393686224632175e-06, "loss": 1.337, "step": 42735 }, { "epoch": 1.2602093468966533, "grad_norm": 2.886550940440333, "learning_rate": 5.393108009839283e-06, "loss": 1.2881, "step": 42740 }, { "epoch": 1.2603567742886628, "grad_norm": 3.161029812875266, "learning_rate": 5.392529746696063e-06, "loss": 1.2916, "step": 42745 }, { "epoch": 1.2605042016806722, "grad_norm": 3.1112465027982785, "learning_rate": 5.391951435219528e-06, "loss": 1.3094, "step": 42750 }, { "epoch": 1.2606516290726817, "grad_norm": 2.868581197096648, "learning_rate": 5.3913730754266975e-06, "loss": 1.386, "step": 42755 }, { "epoch": 1.2607990564646911, "grad_norm": 3.0745690629877442, "learning_rate": 5.39079466733459e-06, "loss": 1.3488, "step": 42760 }, { "epoch": 1.2609464838567006, "grad_norm": 2.9716512833827267, "learning_rate": 5.390216210960226e-06, "loss": 1.2519, "step": 42765 }, { "epoch": 1.26109391124871, "grad_norm": 2.8810101872308485, "learning_rate": 5.389637706320628e-06, "loss": 1.3598, "step": 42770 }, { "epoch": 1.2612413386407195, "grad_norm": 2.8786812077865584, "learning_rate": 5.389059153432816e-06, "loss": 1.3597, "step": 42775 }, { "epoch": 1.261388766032729, "grad_norm": 2.916948035209413, "learning_rate": 5.388480552313818e-06, "loss": 1.329, "step": 42780 }, { "epoch": 1.2615361934247384, "grad_norm": 2.7953749576056124, "learning_rate": 5.387901902980658e-06, "loss": 1.3317, "step": 42785 }, { "epoch": 1.2616836208167477, "grad_norm": 2.6949289867541144, "learning_rate": 5.387323205450366e-06, "loss": 1.3007, "step": 42790 }, { "epoch": 1.2618310482087571, "grad_norm": 3.0149760503571916, "learning_rate": 5.386744459739967e-06, "loss": 1.2879, "step": 42795 }, { "epoch": 1.2619784756007666, "grad_norm": 2.918689685526972, "learning_rate": 5.386165665866496e-06, "loss": 1.3466, "step": 42800 }, { "epoch": 1.262125902992776, "grad_norm": 2.9850508386100816, "learning_rate": 5.38558682384698e-06, "loss": 1.2979, "step": 42805 }, { "epoch": 1.2622733303847855, "grad_norm": 2.9243564141466076, "learning_rate": 5.385007933698454e-06, "loss": 1.3382, "step": 42810 }, { "epoch": 1.262420757776795, "grad_norm": 2.8839998677371255, "learning_rate": 5.384428995437952e-06, "loss": 1.3055, "step": 42815 }, { "epoch": 1.2625681851688044, "grad_norm": 2.8740603252722536, "learning_rate": 5.383850009082513e-06, "loss": 1.3218, "step": 42820 }, { "epoch": 1.2627156125608137, "grad_norm": 2.8897101795324196, "learning_rate": 5.383270974649169e-06, "loss": 1.2693, "step": 42825 }, { "epoch": 1.2628630399528231, "grad_norm": 2.7772615493193227, "learning_rate": 5.382691892154964e-06, "loss": 1.2614, "step": 42830 }, { "epoch": 1.2630104673448326, "grad_norm": 2.8926456500953175, "learning_rate": 5.3821127616169345e-06, "loss": 1.3476, "step": 42835 }, { "epoch": 1.263157894736842, "grad_norm": 2.956814145954603, "learning_rate": 5.381533583052123e-06, "loss": 1.3367, "step": 42840 }, { "epoch": 1.2633053221288515, "grad_norm": 2.8457319303704973, "learning_rate": 5.380954356477573e-06, "loss": 1.259, "step": 42845 }, { "epoch": 1.263452749520861, "grad_norm": 2.9129885452131794, "learning_rate": 5.380375081910329e-06, "loss": 1.3223, "step": 42850 }, { "epoch": 1.2636001769128704, "grad_norm": 3.075656400676337, "learning_rate": 5.379795759367435e-06, "loss": 1.2386, "step": 42855 }, { "epoch": 1.2637476043048799, "grad_norm": 2.7707087064228433, "learning_rate": 5.379216388865942e-06, "loss": 1.2737, "step": 42860 }, { "epoch": 1.2638950316968893, "grad_norm": 2.9027070230321077, "learning_rate": 5.378636970422896e-06, "loss": 1.3372, "step": 42865 }, { "epoch": 1.2640424590888988, "grad_norm": 2.802074613258611, "learning_rate": 5.378057504055346e-06, "loss": 1.3362, "step": 42870 }, { "epoch": 1.2641898864809082, "grad_norm": 2.7931168192729463, "learning_rate": 5.377477989780347e-06, "loss": 1.2679, "step": 42875 }, { "epoch": 1.2643373138729177, "grad_norm": 2.9751069360673634, "learning_rate": 5.376898427614949e-06, "loss": 1.312, "step": 42880 }, { "epoch": 1.2644847412649272, "grad_norm": 3.02050321873014, "learning_rate": 5.376318817576206e-06, "loss": 1.3487, "step": 42885 }, { "epoch": 1.2646321686569364, "grad_norm": 2.8352108459647347, "learning_rate": 5.375739159681175e-06, "loss": 1.3379, "step": 42890 }, { "epoch": 1.2647795960489459, "grad_norm": 2.9501545104970517, "learning_rate": 5.375159453946914e-06, "loss": 1.2702, "step": 42895 }, { "epoch": 1.2649270234409553, "grad_norm": 2.9029114768400723, "learning_rate": 5.37457970039048e-06, "loss": 1.3095, "step": 42900 }, { "epoch": 1.2650744508329648, "grad_norm": 2.8178507349276147, "learning_rate": 5.373999899028932e-06, "loss": 1.3306, "step": 42905 }, { "epoch": 1.2652218782249742, "grad_norm": 2.9618363829081975, "learning_rate": 5.373420049879333e-06, "loss": 1.4074, "step": 42910 }, { "epoch": 1.2653693056169837, "grad_norm": 3.0187195094538057, "learning_rate": 5.372840152958746e-06, "loss": 1.3797, "step": 42915 }, { "epoch": 1.2655167330089931, "grad_norm": 2.943267092046461, "learning_rate": 5.3722602082842345e-06, "loss": 1.3559, "step": 42920 }, { "epoch": 1.2656641604010024, "grad_norm": 2.8460511048257504, "learning_rate": 5.371680215872863e-06, "loss": 1.2936, "step": 42925 }, { "epoch": 1.2658115877930118, "grad_norm": 2.819354376167344, "learning_rate": 5.371100175741701e-06, "loss": 1.2893, "step": 42930 }, { "epoch": 1.2659590151850213, "grad_norm": 2.881896836694769, "learning_rate": 5.370520087907814e-06, "loss": 1.3213, "step": 42935 }, { "epoch": 1.2661064425770308, "grad_norm": 2.9889195536708644, "learning_rate": 5.369939952388272e-06, "loss": 1.319, "step": 42940 }, { "epoch": 1.2662538699690402, "grad_norm": 2.92205526277992, "learning_rate": 5.369359769200149e-06, "loss": 1.2818, "step": 42945 }, { "epoch": 1.2664012973610497, "grad_norm": 2.8794492557935936, "learning_rate": 5.368779538360515e-06, "loss": 1.3549, "step": 42950 }, { "epoch": 1.2665487247530591, "grad_norm": 2.7914773207095678, "learning_rate": 5.368199259886444e-06, "loss": 1.3098, "step": 42955 }, { "epoch": 1.2666961521450686, "grad_norm": 2.913412609871779, "learning_rate": 5.3676189337950125e-06, "loss": 1.345, "step": 42960 }, { "epoch": 1.266843579537078, "grad_norm": 2.8652874328701827, "learning_rate": 5.367038560103296e-06, "loss": 1.3118, "step": 42965 }, { "epoch": 1.2669910069290875, "grad_norm": 2.815211449059798, "learning_rate": 5.366458138828374e-06, "loss": 1.283, "step": 42970 }, { "epoch": 1.267138434321097, "grad_norm": 2.9294120117523557, "learning_rate": 5.365877669987327e-06, "loss": 1.3366, "step": 42975 }, { "epoch": 1.2672858617131064, "grad_norm": 2.8754792729306455, "learning_rate": 5.365297153597232e-06, "loss": 1.2828, "step": 42980 }, { "epoch": 1.2674332891051157, "grad_norm": 2.7866385960694604, "learning_rate": 5.3647165896751755e-06, "loss": 1.2944, "step": 42985 }, { "epoch": 1.2675807164971251, "grad_norm": 3.0478277374802283, "learning_rate": 5.364135978238238e-06, "loss": 1.3305, "step": 42990 }, { "epoch": 1.2677281438891346, "grad_norm": 2.9172248218665047, "learning_rate": 5.363555319303507e-06, "loss": 1.2965, "step": 42995 }, { "epoch": 1.267875571281144, "grad_norm": 3.1420545414258516, "learning_rate": 5.362974612888069e-06, "loss": 1.3582, "step": 43000 }, { "epoch": 1.267875571281144, "eval_loss": 1.1137293577194214, "eval_runtime": 4.3271, "eval_samples_per_second": 91.517, "eval_steps_per_second": 3.004, "step": 43000 }, { "epoch": 1.2680229986731535, "grad_norm": 3.1117481540199017, "learning_rate": 5.362393859009011e-06, "loss": 1.3555, "step": 43005 }, { "epoch": 1.268170426065163, "grad_norm": 2.8375899673555076, "learning_rate": 5.361813057683422e-06, "loss": 1.3007, "step": 43010 }, { "epoch": 1.2683178534571724, "grad_norm": 2.744178612311521, "learning_rate": 5.361232208928393e-06, "loss": 1.3369, "step": 43015 }, { "epoch": 1.2684652808491816, "grad_norm": 2.940760129867753, "learning_rate": 5.3606513127610165e-06, "loss": 1.3494, "step": 43020 }, { "epoch": 1.268612708241191, "grad_norm": 2.955736986809661, "learning_rate": 5.360070369198386e-06, "loss": 1.2964, "step": 43025 }, { "epoch": 1.2687601356332006, "grad_norm": 3.0626602600410773, "learning_rate": 5.3594893782575974e-06, "loss": 1.323, "step": 43030 }, { "epoch": 1.26890756302521, "grad_norm": 2.9572644937919903, "learning_rate": 5.358908339955745e-06, "loss": 1.3464, "step": 43035 }, { "epoch": 1.2690549904172195, "grad_norm": 3.01932381205025, "learning_rate": 5.358327254309927e-06, "loss": 1.3009, "step": 43040 }, { "epoch": 1.269202417809229, "grad_norm": 2.9711538635351253, "learning_rate": 5.357746121337244e-06, "loss": 1.3666, "step": 43045 }, { "epoch": 1.2693498452012384, "grad_norm": 2.7924000663554898, "learning_rate": 5.357164941054795e-06, "loss": 1.327, "step": 43050 }, { "epoch": 1.2694972725932478, "grad_norm": 2.772485946169405, "learning_rate": 5.3565837134796834e-06, "loss": 1.2981, "step": 43055 }, { "epoch": 1.2696446999852573, "grad_norm": 2.8421783348911505, "learning_rate": 5.35600243862901e-06, "loss": 1.3484, "step": 43060 }, { "epoch": 1.2697921273772668, "grad_norm": 3.0260635135318403, "learning_rate": 5.355421116519882e-06, "loss": 1.33, "step": 43065 }, { "epoch": 1.2699395547692762, "grad_norm": 2.9293184986271656, "learning_rate": 5.354839747169404e-06, "loss": 1.322, "step": 43070 }, { "epoch": 1.2700869821612857, "grad_norm": 2.9405569290942797, "learning_rate": 5.354258330594684e-06, "loss": 1.3291, "step": 43075 }, { "epoch": 1.2702344095532951, "grad_norm": 2.9261109874448237, "learning_rate": 5.353676866812831e-06, "loss": 1.313, "step": 43080 }, { "epoch": 1.2703818369453044, "grad_norm": 2.9139740934650025, "learning_rate": 5.353095355840955e-06, "loss": 1.3502, "step": 43085 }, { "epoch": 1.2705292643373138, "grad_norm": 2.851008666182043, "learning_rate": 5.352513797696167e-06, "loss": 1.2775, "step": 43090 }, { "epoch": 1.2706766917293233, "grad_norm": 2.8901005416565098, "learning_rate": 5.351932192395582e-06, "loss": 1.2735, "step": 43095 }, { "epoch": 1.2708241191213328, "grad_norm": 2.8262664170675307, "learning_rate": 5.351350539956311e-06, "loss": 1.2933, "step": 43100 }, { "epoch": 1.2709715465133422, "grad_norm": 2.8672763021670833, "learning_rate": 5.350768840395474e-06, "loss": 1.3155, "step": 43105 }, { "epoch": 1.2711189739053517, "grad_norm": 2.965070578808663, "learning_rate": 5.350187093730184e-06, "loss": 1.3576, "step": 43110 }, { "epoch": 1.2712664012973611, "grad_norm": 2.8967743662937697, "learning_rate": 5.349605299977563e-06, "loss": 1.3204, "step": 43115 }, { "epoch": 1.2714138286893704, "grad_norm": 2.9656604080928717, "learning_rate": 5.349023459154728e-06, "loss": 1.3062, "step": 43120 }, { "epoch": 1.2715612560813798, "grad_norm": 2.889230393958603, "learning_rate": 5.348441571278802e-06, "loss": 1.3031, "step": 43125 }, { "epoch": 1.2717086834733893, "grad_norm": 2.9041394713174262, "learning_rate": 5.347859636366909e-06, "loss": 1.2947, "step": 43130 }, { "epoch": 1.2718561108653987, "grad_norm": 2.9861495738100294, "learning_rate": 5.347277654436172e-06, "loss": 1.3609, "step": 43135 }, { "epoch": 1.2720035382574082, "grad_norm": 2.877399635641539, "learning_rate": 5.346695625503715e-06, "loss": 1.2968, "step": 43140 }, { "epoch": 1.2721509656494177, "grad_norm": 2.887225417412392, "learning_rate": 5.3461135495866655e-06, "loss": 1.2554, "step": 43145 }, { "epoch": 1.2722983930414271, "grad_norm": 2.948095384586044, "learning_rate": 5.345531426702152e-06, "loss": 1.3298, "step": 43150 }, { "epoch": 1.2724458204334366, "grad_norm": 2.8527958130890396, "learning_rate": 5.344949256867305e-06, "loss": 1.3285, "step": 43155 }, { "epoch": 1.272593247825446, "grad_norm": 2.8100141318261698, "learning_rate": 5.344367040099256e-06, "loss": 1.2614, "step": 43160 }, { "epoch": 1.2727406752174555, "grad_norm": 2.988236337257089, "learning_rate": 5.343784776415135e-06, "loss": 1.3425, "step": 43165 }, { "epoch": 1.272888102609465, "grad_norm": 2.866807737770798, "learning_rate": 5.343202465832077e-06, "loss": 1.3124, "step": 43170 }, { "epoch": 1.2730355300014744, "grad_norm": 2.8754880270738257, "learning_rate": 5.3426201083672176e-06, "loss": 1.3711, "step": 43175 }, { "epoch": 1.2731829573934836, "grad_norm": 2.9165321637037427, "learning_rate": 5.342037704037693e-06, "loss": 1.2735, "step": 43180 }, { "epoch": 1.273330384785493, "grad_norm": 2.8137121132233363, "learning_rate": 5.341455252860641e-06, "loss": 1.2953, "step": 43185 }, { "epoch": 1.2734778121775026, "grad_norm": 2.7483003124661374, "learning_rate": 5.3408727548532e-06, "loss": 1.3018, "step": 43190 }, { "epoch": 1.273625239569512, "grad_norm": 2.979609595527757, "learning_rate": 5.3402902100325136e-06, "loss": 1.2963, "step": 43195 }, { "epoch": 1.2737726669615215, "grad_norm": 3.0406443050876435, "learning_rate": 5.339707618415721e-06, "loss": 1.3214, "step": 43200 }, { "epoch": 1.273920094353531, "grad_norm": 2.879775056353849, "learning_rate": 5.339124980019965e-06, "loss": 1.2911, "step": 43205 }, { "epoch": 1.2740675217455404, "grad_norm": 2.8429935641863207, "learning_rate": 5.338542294862394e-06, "loss": 1.2794, "step": 43210 }, { "epoch": 1.2742149491375496, "grad_norm": 2.965880569874426, "learning_rate": 5.33795956296015e-06, "loss": 1.2689, "step": 43215 }, { "epoch": 1.274362376529559, "grad_norm": 2.8994492040319404, "learning_rate": 5.337376784330385e-06, "loss": 1.3329, "step": 43220 }, { "epoch": 1.2745098039215685, "grad_norm": 2.9512061849557734, "learning_rate": 5.336793958990243e-06, "loss": 1.3095, "step": 43225 }, { "epoch": 1.274657231313578, "grad_norm": 2.8840541105341266, "learning_rate": 5.3362110869568775e-06, "loss": 1.3393, "step": 43230 }, { "epoch": 1.2748046587055875, "grad_norm": 2.997454010566236, "learning_rate": 5.33562816824744e-06, "loss": 1.3469, "step": 43235 }, { "epoch": 1.274952086097597, "grad_norm": 2.874254098882858, "learning_rate": 5.335045202879084e-06, "loss": 1.2709, "step": 43240 }, { "epoch": 1.2750995134896064, "grad_norm": 2.888596574351054, "learning_rate": 5.3344621908689616e-06, "loss": 1.2927, "step": 43245 }, { "epoch": 1.2752469408816158, "grad_norm": 3.1561683398365172, "learning_rate": 5.33387913223423e-06, "loss": 1.3246, "step": 43250 }, { "epoch": 1.2753943682736253, "grad_norm": 2.8924555458987733, "learning_rate": 5.333296026992047e-06, "loss": 1.3397, "step": 43255 }, { "epoch": 1.2755417956656347, "grad_norm": 2.8755599354081527, "learning_rate": 5.332712875159571e-06, "loss": 1.3159, "step": 43260 }, { "epoch": 1.2756892230576442, "grad_norm": 2.8682460908306835, "learning_rate": 5.332129676753961e-06, "loss": 1.3228, "step": 43265 }, { "epoch": 1.2758366504496537, "grad_norm": 2.924058037162083, "learning_rate": 5.331546431792377e-06, "loss": 1.2913, "step": 43270 }, { "epoch": 1.275984077841663, "grad_norm": 2.936961513573556, "learning_rate": 5.330963140291985e-06, "loss": 1.3163, "step": 43275 }, { "epoch": 1.2761315052336724, "grad_norm": 2.9255769123487525, "learning_rate": 5.330379802269947e-06, "loss": 1.3279, "step": 43280 }, { "epoch": 1.2762789326256818, "grad_norm": 3.0771183359889465, "learning_rate": 5.329796417743428e-06, "loss": 1.3355, "step": 43285 }, { "epoch": 1.2764263600176913, "grad_norm": 3.038087220551301, "learning_rate": 5.329212986729597e-06, "loss": 1.3449, "step": 43290 }, { "epoch": 1.2765737874097007, "grad_norm": 2.989160178797202, "learning_rate": 5.328629509245619e-06, "loss": 1.3467, "step": 43295 }, { "epoch": 1.2767212148017102, "grad_norm": 2.8736084102391004, "learning_rate": 5.3280459853086655e-06, "loss": 1.3249, "step": 43300 }, { "epoch": 1.2768686421937196, "grad_norm": 2.838191858851142, "learning_rate": 5.327462414935907e-06, "loss": 1.273, "step": 43305 }, { "epoch": 1.277016069585729, "grad_norm": 2.964491208979367, "learning_rate": 5.326878798144516e-06, "loss": 1.3335, "step": 43310 }, { "epoch": 1.2771634969777383, "grad_norm": 2.892971276511245, "learning_rate": 5.326295134951666e-06, "loss": 1.2846, "step": 43315 }, { "epoch": 1.2773109243697478, "grad_norm": 2.791271359141157, "learning_rate": 5.325711425374531e-06, "loss": 1.288, "step": 43320 }, { "epoch": 1.2774583517617573, "grad_norm": 2.8583889271886207, "learning_rate": 5.325127669430289e-06, "loss": 1.3393, "step": 43325 }, { "epoch": 1.2776057791537667, "grad_norm": 2.887262577855155, "learning_rate": 5.324543867136115e-06, "loss": 1.2917, "step": 43330 }, { "epoch": 1.2777532065457762, "grad_norm": 2.8215042976081155, "learning_rate": 5.323960018509191e-06, "loss": 1.299, "step": 43335 }, { "epoch": 1.2779006339377856, "grad_norm": 2.8098618201830465, "learning_rate": 5.323376123566697e-06, "loss": 1.3082, "step": 43340 }, { "epoch": 1.278048061329795, "grad_norm": 3.012568709873359, "learning_rate": 5.322792182325813e-06, "loss": 1.3456, "step": 43345 }, { "epoch": 1.2781954887218046, "grad_norm": 2.8290555915352904, "learning_rate": 5.322208194803722e-06, "loss": 1.2791, "step": 43350 }, { "epoch": 1.278342916113814, "grad_norm": 2.82146552773007, "learning_rate": 5.3216241610176105e-06, "loss": 1.3273, "step": 43355 }, { "epoch": 1.2784903435058235, "grad_norm": 2.928911406665922, "learning_rate": 5.321040080984663e-06, "loss": 1.3123, "step": 43360 }, { "epoch": 1.278637770897833, "grad_norm": 2.8820476868570806, "learning_rate": 5.320455954722069e-06, "loss": 1.3489, "step": 43365 }, { "epoch": 1.2787851982898424, "grad_norm": 3.018488698080991, "learning_rate": 5.3198717822470135e-06, "loss": 1.2804, "step": 43370 }, { "epoch": 1.2789326256818516, "grad_norm": 2.9510905797138083, "learning_rate": 5.319287563576689e-06, "loss": 1.3877, "step": 43375 }, { "epoch": 1.279080053073861, "grad_norm": 3.0747946656185143, "learning_rate": 5.318703298728286e-06, "loss": 1.3079, "step": 43380 }, { "epoch": 1.2792274804658705, "grad_norm": 2.8488824100661363, "learning_rate": 5.318118987718997e-06, "loss": 1.3007, "step": 43385 }, { "epoch": 1.27937490785788, "grad_norm": 2.852386670643889, "learning_rate": 5.317534630566015e-06, "loss": 1.3013, "step": 43390 }, { "epoch": 1.2795223352498895, "grad_norm": 2.9267284586072875, "learning_rate": 5.3169502272865394e-06, "loss": 1.3433, "step": 43395 }, { "epoch": 1.279669762641899, "grad_norm": 2.9977815004926907, "learning_rate": 5.316365777897761e-06, "loss": 1.3032, "step": 43400 }, { "epoch": 1.2798171900339084, "grad_norm": 3.0444615637439925, "learning_rate": 5.315781282416883e-06, "loss": 1.3278, "step": 43405 }, { "epoch": 1.2799646174259176, "grad_norm": 2.91604165130961, "learning_rate": 5.315196740861102e-06, "loss": 1.3083, "step": 43410 }, { "epoch": 1.280112044817927, "grad_norm": 2.832573983110568, "learning_rate": 5.31461215324762e-06, "loss": 1.3326, "step": 43415 }, { "epoch": 1.2802594722099365, "grad_norm": 2.962857848713919, "learning_rate": 5.314027519593639e-06, "loss": 1.3032, "step": 43420 }, { "epoch": 1.280406899601946, "grad_norm": 2.829828790376282, "learning_rate": 5.313442839916362e-06, "loss": 1.2632, "step": 43425 }, { "epoch": 1.2805543269939554, "grad_norm": 2.8781158514546363, "learning_rate": 5.312858114232993e-06, "loss": 1.3297, "step": 43430 }, { "epoch": 1.280701754385965, "grad_norm": 3.002269166419398, "learning_rate": 5.312273342560741e-06, "loss": 1.3924, "step": 43435 }, { "epoch": 1.2808491817779744, "grad_norm": 2.890273013600503, "learning_rate": 5.311688524916811e-06, "loss": 1.3789, "step": 43440 }, { "epoch": 1.2809966091699838, "grad_norm": 2.7707414700442063, "learning_rate": 5.3111036613184136e-06, "loss": 1.3102, "step": 43445 }, { "epoch": 1.2811440365619933, "grad_norm": 2.827492959551878, "learning_rate": 5.310518751782758e-06, "loss": 1.2911, "step": 43450 }, { "epoch": 1.2812914639540027, "grad_norm": 2.8324307083092983, "learning_rate": 5.309933796327057e-06, "loss": 1.3027, "step": 43455 }, { "epoch": 1.2814388913460122, "grad_norm": 2.925275038599175, "learning_rate": 5.309348794968522e-06, "loss": 1.3159, "step": 43460 }, { "epoch": 1.2815863187380216, "grad_norm": 2.8007885065857376, "learning_rate": 5.308763747724367e-06, "loss": 1.3077, "step": 43465 }, { "epoch": 1.2817337461300309, "grad_norm": 2.99723308207539, "learning_rate": 5.308178654611811e-06, "loss": 1.347, "step": 43470 }, { "epoch": 1.2818811735220403, "grad_norm": 2.8627381908635905, "learning_rate": 5.3075935156480675e-06, "loss": 1.3472, "step": 43475 }, { "epoch": 1.2820286009140498, "grad_norm": 2.989176893575712, "learning_rate": 5.307008330850356e-06, "loss": 1.3117, "step": 43480 }, { "epoch": 1.2821760283060593, "grad_norm": 2.88683075452363, "learning_rate": 5.306423100235897e-06, "loss": 1.3274, "step": 43485 }, { "epoch": 1.2823234556980687, "grad_norm": 2.7419025346508348, "learning_rate": 5.305837823821911e-06, "loss": 1.3105, "step": 43490 }, { "epoch": 1.2824708830900782, "grad_norm": 2.972683949209302, "learning_rate": 5.305252501625621e-06, "loss": 1.3247, "step": 43495 }, { "epoch": 1.2826183104820876, "grad_norm": 2.9698004478723843, "learning_rate": 5.3046671336642495e-06, "loss": 1.2981, "step": 43500 }, { "epoch": 1.2826183104820876, "eval_loss": 1.1128203868865967, "eval_runtime": 4.1968, "eval_samples_per_second": 94.357, "eval_steps_per_second": 3.098, "step": 43500 }, { "epoch": 1.282765737874097, "grad_norm": 2.8974746550705204, "learning_rate": 5.304081719955023e-06, "loss": 1.3173, "step": 43505 }, { "epoch": 1.2829131652661063, "grad_norm": 2.942647074288078, "learning_rate": 5.303496260515166e-06, "loss": 1.2647, "step": 43510 }, { "epoch": 1.2830605926581158, "grad_norm": 2.7022051298284655, "learning_rate": 5.302910755361908e-06, "loss": 1.2541, "step": 43515 }, { "epoch": 1.2832080200501252, "grad_norm": 3.0078260269315074, "learning_rate": 5.30232520451248e-06, "loss": 1.341, "step": 43520 }, { "epoch": 1.2833554474421347, "grad_norm": 2.9765359477091287, "learning_rate": 5.301739607984109e-06, "loss": 1.3332, "step": 43525 }, { "epoch": 1.2835028748341442, "grad_norm": 2.9013428399750856, "learning_rate": 5.301153965794026e-06, "loss": 1.3264, "step": 43530 }, { "epoch": 1.2836503022261536, "grad_norm": 2.955383621264382, "learning_rate": 5.300568277959469e-06, "loss": 1.2939, "step": 43535 }, { "epoch": 1.283797729618163, "grad_norm": 3.0280466561615134, "learning_rate": 5.299982544497669e-06, "loss": 1.3166, "step": 43540 }, { "epoch": 1.2839451570101725, "grad_norm": 2.9092685792450563, "learning_rate": 5.2993967654258615e-06, "loss": 1.2969, "step": 43545 }, { "epoch": 1.284092584402182, "grad_norm": 3.1907323180262455, "learning_rate": 5.298810940761287e-06, "loss": 1.2952, "step": 43550 }, { "epoch": 1.2842400117941914, "grad_norm": 2.9116390582401404, "learning_rate": 5.298225070521182e-06, "loss": 1.3441, "step": 43555 }, { "epoch": 1.284387439186201, "grad_norm": 2.993072417846721, "learning_rate": 5.2976391547227845e-06, "loss": 1.298, "step": 43560 }, { "epoch": 1.2845348665782104, "grad_norm": 3.0313971442943486, "learning_rate": 5.297053193383339e-06, "loss": 1.367, "step": 43565 }, { "epoch": 1.2846822939702196, "grad_norm": 2.8043555693172926, "learning_rate": 5.296467186520086e-06, "loss": 1.3137, "step": 43570 }, { "epoch": 1.284829721362229, "grad_norm": 2.828016620332547, "learning_rate": 5.295881134150271e-06, "loss": 1.3219, "step": 43575 }, { "epoch": 1.2849771487542385, "grad_norm": 2.990875239466988, "learning_rate": 5.295295036291138e-06, "loss": 1.3104, "step": 43580 }, { "epoch": 1.285124576146248, "grad_norm": 2.911470899221843, "learning_rate": 5.294708892959934e-06, "loss": 1.3032, "step": 43585 }, { "epoch": 1.2852720035382574, "grad_norm": 2.9080961623201893, "learning_rate": 5.294122704173907e-06, "loss": 1.2977, "step": 43590 }, { "epoch": 1.285419430930267, "grad_norm": 2.7443626066702707, "learning_rate": 5.293536469950305e-06, "loss": 1.3105, "step": 43595 }, { "epoch": 1.2855668583222764, "grad_norm": 2.7756603449123425, "learning_rate": 5.292950190306382e-06, "loss": 1.2628, "step": 43600 }, { "epoch": 1.2857142857142856, "grad_norm": 3.0027506579744307, "learning_rate": 5.292363865259386e-06, "loss": 1.319, "step": 43605 }, { "epoch": 1.285861713106295, "grad_norm": 2.822092100812722, "learning_rate": 5.2917774948265735e-06, "loss": 1.3348, "step": 43610 }, { "epoch": 1.2860091404983045, "grad_norm": 2.9900333659195004, "learning_rate": 5.291191079025197e-06, "loss": 1.3471, "step": 43615 }, { "epoch": 1.286156567890314, "grad_norm": 2.880344047490397, "learning_rate": 5.2906046178725125e-06, "loss": 1.2844, "step": 43620 }, { "epoch": 1.2863039952823234, "grad_norm": 2.8008003858435186, "learning_rate": 5.29001811138578e-06, "loss": 1.3191, "step": 43625 }, { "epoch": 1.2864514226743329, "grad_norm": 2.9086982413565967, "learning_rate": 5.289431559582254e-06, "loss": 1.3271, "step": 43630 }, { "epoch": 1.2865988500663423, "grad_norm": 3.254799721522414, "learning_rate": 5.2888449624791965e-06, "loss": 1.3035, "step": 43635 }, { "epoch": 1.2867462774583518, "grad_norm": 2.986018048443392, "learning_rate": 5.288258320093869e-06, "loss": 1.3249, "step": 43640 }, { "epoch": 1.2868937048503613, "grad_norm": 2.9432643192766323, "learning_rate": 5.287671632443535e-06, "loss": 1.3744, "step": 43645 }, { "epoch": 1.2870411322423707, "grad_norm": 3.106933459364704, "learning_rate": 5.287084899545457e-06, "loss": 1.3179, "step": 43650 }, { "epoch": 1.2871885596343802, "grad_norm": 3.2570891887447186, "learning_rate": 5.286498121416901e-06, "loss": 1.278, "step": 43655 }, { "epoch": 1.2873359870263896, "grad_norm": 2.900358528430846, "learning_rate": 5.2859112980751324e-06, "loss": 1.3196, "step": 43660 }, { "epoch": 1.2874834144183989, "grad_norm": 2.7850558766583253, "learning_rate": 5.285324429537421e-06, "loss": 1.2698, "step": 43665 }, { "epoch": 1.2876308418104083, "grad_norm": 2.8101749130480056, "learning_rate": 5.2847375158210335e-06, "loss": 1.2988, "step": 43670 }, { "epoch": 1.2877782692024178, "grad_norm": 2.978197073874224, "learning_rate": 5.284150556943244e-06, "loss": 1.3275, "step": 43675 }, { "epoch": 1.2879256965944272, "grad_norm": 2.8172106657854394, "learning_rate": 5.283563552921322e-06, "loss": 1.2798, "step": 43680 }, { "epoch": 1.2880731239864367, "grad_norm": 3.0119163534284734, "learning_rate": 5.28297650377254e-06, "loss": 1.295, "step": 43685 }, { "epoch": 1.2882205513784462, "grad_norm": 2.8577395474362635, "learning_rate": 5.282389409514175e-06, "loss": 1.3112, "step": 43690 }, { "epoch": 1.2883679787704556, "grad_norm": 2.9612299193499965, "learning_rate": 5.281802270163501e-06, "loss": 1.3191, "step": 43695 }, { "epoch": 1.2885154061624648, "grad_norm": 2.895738373537081, "learning_rate": 5.281215085737798e-06, "loss": 1.3244, "step": 43700 }, { "epoch": 1.2886628335544743, "grad_norm": 2.974704292895211, "learning_rate": 5.28062785625434e-06, "loss": 1.3503, "step": 43705 }, { "epoch": 1.2888102609464838, "grad_norm": 2.816358935508686, "learning_rate": 5.280040581730411e-06, "loss": 1.2499, "step": 43710 }, { "epoch": 1.2889576883384932, "grad_norm": 3.2152576066231866, "learning_rate": 5.279453262183291e-06, "loss": 1.3332, "step": 43715 }, { "epoch": 1.2891051157305027, "grad_norm": 2.8291351630424475, "learning_rate": 5.278865897630262e-06, "loss": 1.3582, "step": 43720 }, { "epoch": 1.2892525431225121, "grad_norm": 2.9183315067190807, "learning_rate": 5.278278488088608e-06, "loss": 1.3284, "step": 43725 }, { "epoch": 1.2893999705145216, "grad_norm": 2.871693827837933, "learning_rate": 5.277691033575615e-06, "loss": 1.3307, "step": 43730 }, { "epoch": 1.289547397906531, "grad_norm": 2.991542183897079, "learning_rate": 5.277103534108569e-06, "loss": 1.3604, "step": 43735 }, { "epoch": 1.2896948252985405, "grad_norm": 2.8956296404435236, "learning_rate": 5.2765159897047565e-06, "loss": 1.2723, "step": 43740 }, { "epoch": 1.28984225269055, "grad_norm": 2.850738147056294, "learning_rate": 5.2759284003814675e-06, "loss": 1.3026, "step": 43745 }, { "epoch": 1.2899896800825594, "grad_norm": 2.9860224427604978, "learning_rate": 5.275340766155995e-06, "loss": 1.3199, "step": 43750 }, { "epoch": 1.290137107474569, "grad_norm": 2.875805501069425, "learning_rate": 5.274753087045628e-06, "loss": 1.3059, "step": 43755 }, { "epoch": 1.2902845348665783, "grad_norm": 3.0105224364947305, "learning_rate": 5.274165363067659e-06, "loss": 1.352, "step": 43760 }, { "epoch": 1.2904319622585876, "grad_norm": 2.8258085257187995, "learning_rate": 5.2735775942393855e-06, "loss": 1.3311, "step": 43765 }, { "epoch": 1.290579389650597, "grad_norm": 2.8010702956196507, "learning_rate": 5.2729897805781e-06, "loss": 1.2547, "step": 43770 }, { "epoch": 1.2907268170426065, "grad_norm": 3.0205706824881133, "learning_rate": 5.2724019221011015e-06, "loss": 1.317, "step": 43775 }, { "epoch": 1.290874244434616, "grad_norm": 2.8659216426001493, "learning_rate": 5.271814018825689e-06, "loss": 1.3031, "step": 43780 }, { "epoch": 1.2910216718266254, "grad_norm": 2.9016701414746735, "learning_rate": 5.271226070769161e-06, "loss": 1.3184, "step": 43785 }, { "epoch": 1.2911690992186349, "grad_norm": 2.941742260696916, "learning_rate": 5.270638077948819e-06, "loss": 1.2694, "step": 43790 }, { "epoch": 1.2913165266106443, "grad_norm": 2.7704007727982938, "learning_rate": 5.270050040381964e-06, "loss": 1.3326, "step": 43795 }, { "epoch": 1.2914639540026536, "grad_norm": 2.8543295530233754, "learning_rate": 5.269461958085902e-06, "loss": 1.2648, "step": 43800 }, { "epoch": 1.291611381394663, "grad_norm": 2.9748045978222306, "learning_rate": 5.2688738310779365e-06, "loss": 1.2624, "step": 43805 }, { "epoch": 1.2917588087866725, "grad_norm": 2.915096483384289, "learning_rate": 5.2682856593753734e-06, "loss": 1.3386, "step": 43810 }, { "epoch": 1.291906236178682, "grad_norm": 2.7970454885857703, "learning_rate": 5.267697442995522e-06, "loss": 1.2745, "step": 43815 }, { "epoch": 1.2920536635706914, "grad_norm": 2.8769551326623075, "learning_rate": 5.2671091819556904e-06, "loss": 1.3038, "step": 43820 }, { "epoch": 1.2922010909627009, "grad_norm": 2.8924612760191364, "learning_rate": 5.266520876273188e-06, "loss": 1.314, "step": 43825 }, { "epoch": 1.2923485183547103, "grad_norm": 2.8222940938411623, "learning_rate": 5.265932525965329e-06, "loss": 1.3068, "step": 43830 }, { "epoch": 1.2924959457467198, "grad_norm": 3.063343813278949, "learning_rate": 5.265344131049421e-06, "loss": 1.299, "step": 43835 }, { "epoch": 1.2926433731387292, "grad_norm": 2.954635983566279, "learning_rate": 5.264755691542784e-06, "loss": 1.3398, "step": 43840 }, { "epoch": 1.2927908005307387, "grad_norm": 2.893818433432319, "learning_rate": 5.264167207462731e-06, "loss": 1.2805, "step": 43845 }, { "epoch": 1.2929382279227482, "grad_norm": 2.921395876441712, "learning_rate": 5.263578678826579e-06, "loss": 1.3162, "step": 43850 }, { "epoch": 1.2930856553147576, "grad_norm": 2.9031898291405107, "learning_rate": 5.262990105651646e-06, "loss": 1.3238, "step": 43855 }, { "epoch": 1.2932330827067668, "grad_norm": 2.9269970882538323, "learning_rate": 5.262401487955251e-06, "loss": 1.3612, "step": 43860 }, { "epoch": 1.2933805100987763, "grad_norm": 2.847549768826495, "learning_rate": 5.261812825754715e-06, "loss": 1.3299, "step": 43865 }, { "epoch": 1.2935279374907858, "grad_norm": 2.974155501181657, "learning_rate": 5.26122411906736e-06, "loss": 1.3542, "step": 43870 }, { "epoch": 1.2936753648827952, "grad_norm": 2.8532968593773416, "learning_rate": 5.260635367910511e-06, "loss": 1.3207, "step": 43875 }, { "epoch": 1.2938227922748047, "grad_norm": 2.8826904548816397, "learning_rate": 5.26004657230149e-06, "loss": 1.3193, "step": 43880 }, { "epoch": 1.2939702196668141, "grad_norm": 2.7801127447217735, "learning_rate": 5.259457732257626e-06, "loss": 1.3006, "step": 43885 }, { "epoch": 1.2941176470588236, "grad_norm": 2.991389306245732, "learning_rate": 5.258868847796243e-06, "loss": 1.3088, "step": 43890 }, { "epoch": 1.2942650744508328, "grad_norm": 2.8738122706171714, "learning_rate": 5.2582799189346715e-06, "loss": 1.2733, "step": 43895 }, { "epoch": 1.2944125018428423, "grad_norm": 2.996141478372797, "learning_rate": 5.2576909456902405e-06, "loss": 1.3124, "step": 43900 }, { "epoch": 1.2945599292348517, "grad_norm": 2.859728084293042, "learning_rate": 5.257101928080284e-06, "loss": 1.3282, "step": 43905 }, { "epoch": 1.2947073566268612, "grad_norm": 3.0357778236122326, "learning_rate": 5.2565128661221306e-06, "loss": 1.3718, "step": 43910 }, { "epoch": 1.2948547840188707, "grad_norm": 2.980818300543497, "learning_rate": 5.255923759833117e-06, "loss": 1.319, "step": 43915 }, { "epoch": 1.2950022114108801, "grad_norm": 3.061991498537033, "learning_rate": 5.255334609230576e-06, "loss": 1.352, "step": 43920 }, { "epoch": 1.2951496388028896, "grad_norm": 2.866547643117132, "learning_rate": 5.254745414331846e-06, "loss": 1.322, "step": 43925 }, { "epoch": 1.295297066194899, "grad_norm": 2.92752923706996, "learning_rate": 5.254156175154263e-06, "loss": 1.3221, "step": 43930 }, { "epoch": 1.2954444935869085, "grad_norm": 2.7853856811855318, "learning_rate": 5.253566891715168e-06, "loss": 1.3158, "step": 43935 }, { "epoch": 1.295591920978918, "grad_norm": 3.0429239634760394, "learning_rate": 5.2529775640319e-06, "loss": 1.3495, "step": 43940 }, { "epoch": 1.2957393483709274, "grad_norm": 2.9555526203316145, "learning_rate": 5.2523881921218e-06, "loss": 1.3338, "step": 43945 }, { "epoch": 1.2958867757629369, "grad_norm": 3.053445532981693, "learning_rate": 5.251798776002213e-06, "loss": 1.2585, "step": 43950 }, { "epoch": 1.296034203154946, "grad_norm": 2.998179999625007, "learning_rate": 5.251209315690482e-06, "loss": 1.33, "step": 43955 }, { "epoch": 1.2961816305469556, "grad_norm": 3.0175656274351637, "learning_rate": 5.250619811203953e-06, "loss": 1.2924, "step": 43960 }, { "epoch": 1.296329057938965, "grad_norm": 2.9303328194085156, "learning_rate": 5.250030262559972e-06, "loss": 1.3051, "step": 43965 }, { "epoch": 1.2964764853309745, "grad_norm": 2.844293165809458, "learning_rate": 5.249440669775888e-06, "loss": 1.3253, "step": 43970 }, { "epoch": 1.296623912722984, "grad_norm": 2.80134346860458, "learning_rate": 5.24885103286905e-06, "loss": 1.3408, "step": 43975 }, { "epoch": 1.2967713401149934, "grad_norm": 2.7234429366690223, "learning_rate": 5.248261351856809e-06, "loss": 1.3135, "step": 43980 }, { "epoch": 1.2969187675070029, "grad_norm": 2.863319482076904, "learning_rate": 5.247671626756516e-06, "loss": 1.3046, "step": 43985 }, { "epoch": 1.2970661948990123, "grad_norm": 2.8220129087481634, "learning_rate": 5.247081857585527e-06, "loss": 1.3146, "step": 43990 }, { "epoch": 1.2972136222910216, "grad_norm": 2.8770886400272397, "learning_rate": 5.2464920443611935e-06, "loss": 1.3425, "step": 43995 }, { "epoch": 1.297361049683031, "grad_norm": 2.8904885873090866, "learning_rate": 5.245902187100873e-06, "loss": 1.3262, "step": 44000 }, { "epoch": 1.297361049683031, "eval_loss": 1.111588478088379, "eval_runtime": 4.2832, "eval_samples_per_second": 92.454, "eval_steps_per_second": 3.035, "step": 44000 }, { "epoch": 1.2975084770750405, "grad_norm": 2.926486107093969, "learning_rate": 5.245312285821922e-06, "loss": 1.304, "step": 44005 }, { "epoch": 1.29765590446705, "grad_norm": 2.7343663908920175, "learning_rate": 5.2447223405417e-06, "loss": 1.3003, "step": 44010 }, { "epoch": 1.2978033318590594, "grad_norm": 2.924251625238292, "learning_rate": 5.244132351277566e-06, "loss": 1.2833, "step": 44015 }, { "epoch": 1.2979507592510688, "grad_norm": 3.131537492071789, "learning_rate": 5.243542318046882e-06, "loss": 1.3359, "step": 44020 }, { "epoch": 1.2980981866430783, "grad_norm": 2.8803212774092315, "learning_rate": 5.242952240867009e-06, "loss": 1.3526, "step": 44025 }, { "epoch": 1.2982456140350878, "grad_norm": 2.8448436255445495, "learning_rate": 5.242362119755312e-06, "loss": 1.3269, "step": 44030 }, { "epoch": 1.2983930414270972, "grad_norm": 2.9874454201580365, "learning_rate": 5.2417719547291565e-06, "loss": 1.2842, "step": 44035 }, { "epoch": 1.2985404688191067, "grad_norm": 2.8211915187680052, "learning_rate": 5.241181745805908e-06, "loss": 1.3134, "step": 44040 }, { "epoch": 1.2986878962111161, "grad_norm": 3.0081214596629477, "learning_rate": 5.240591493002933e-06, "loss": 1.3031, "step": 44045 }, { "epoch": 1.2988353236031256, "grad_norm": 3.0404896569936866, "learning_rate": 5.2400011963376006e-06, "loss": 1.3191, "step": 44050 }, { "epoch": 1.2989827509951348, "grad_norm": 2.8247857210726943, "learning_rate": 5.239410855827282e-06, "loss": 1.319, "step": 44055 }, { "epoch": 1.2991301783871443, "grad_norm": 2.73248466769048, "learning_rate": 5.238820471489349e-06, "loss": 1.3403, "step": 44060 }, { "epoch": 1.2992776057791537, "grad_norm": 2.9543877249827695, "learning_rate": 5.238230043341172e-06, "loss": 1.2724, "step": 44065 }, { "epoch": 1.2994250331711632, "grad_norm": 2.9013989567874003, "learning_rate": 5.237639571400128e-06, "loss": 1.25, "step": 44070 }, { "epoch": 1.2995724605631727, "grad_norm": 2.838698728954663, "learning_rate": 5.237049055683591e-06, "loss": 1.3214, "step": 44075 }, { "epoch": 1.2997198879551821, "grad_norm": 2.787376809061474, "learning_rate": 5.236458496208936e-06, "loss": 1.3157, "step": 44080 }, { "epoch": 1.2998673153471916, "grad_norm": 2.8628160050544484, "learning_rate": 5.235867892993543e-06, "loss": 1.3492, "step": 44085 }, { "epoch": 1.3000147427392008, "grad_norm": 2.912359911805233, "learning_rate": 5.235277246054792e-06, "loss": 1.3062, "step": 44090 }, { "epoch": 1.3001621701312103, "grad_norm": 2.8264764496717567, "learning_rate": 5.234686555410061e-06, "loss": 1.2869, "step": 44095 }, { "epoch": 1.3003095975232197, "grad_norm": 2.98650697784353, "learning_rate": 5.234095821076732e-06, "loss": 1.3036, "step": 44100 }, { "epoch": 1.3004570249152292, "grad_norm": 2.8462986716010303, "learning_rate": 5.23350504307219e-06, "loss": 1.3129, "step": 44105 }, { "epoch": 1.3006044523072386, "grad_norm": 2.946020442228462, "learning_rate": 5.232914221413817e-06, "loss": 1.3771, "step": 44110 }, { "epoch": 1.300751879699248, "grad_norm": 2.697907708723344, "learning_rate": 5.232323356119001e-06, "loss": 1.2355, "step": 44115 }, { "epoch": 1.3008993070912576, "grad_norm": 2.8978094949381283, "learning_rate": 5.2317324472051275e-06, "loss": 1.3509, "step": 44120 }, { "epoch": 1.301046734483267, "grad_norm": 2.8192416661513673, "learning_rate": 5.231141494689584e-06, "loss": 1.2937, "step": 44125 }, { "epoch": 1.3011941618752765, "grad_norm": 2.794104355037783, "learning_rate": 5.230550498589761e-06, "loss": 1.3161, "step": 44130 }, { "epoch": 1.301341589267286, "grad_norm": 2.7511894641744035, "learning_rate": 5.229959458923048e-06, "loss": 1.3059, "step": 44135 }, { "epoch": 1.3014890166592954, "grad_norm": 2.8716319301820823, "learning_rate": 5.229368375706839e-06, "loss": 1.3356, "step": 44140 }, { "epoch": 1.3016364440513049, "grad_norm": 2.966339251175064, "learning_rate": 5.2287772489585265e-06, "loss": 1.3109, "step": 44145 }, { "epoch": 1.301783871443314, "grad_norm": 2.9081015063501985, "learning_rate": 5.2281860786955046e-06, "loss": 1.3424, "step": 44150 }, { "epoch": 1.3019312988353235, "grad_norm": 2.8376703175578184, "learning_rate": 5.22759486493517e-06, "loss": 1.2951, "step": 44155 }, { "epoch": 1.302078726227333, "grad_norm": 2.801603174739388, "learning_rate": 5.227003607694918e-06, "loss": 1.2583, "step": 44160 }, { "epoch": 1.3022261536193425, "grad_norm": 2.961103956343017, "learning_rate": 5.226412306992149e-06, "loss": 1.3573, "step": 44165 }, { "epoch": 1.302373581011352, "grad_norm": 2.953092402127313, "learning_rate": 5.225820962844263e-06, "loss": 1.2931, "step": 44170 }, { "epoch": 1.3025210084033614, "grad_norm": 2.8652539614435772, "learning_rate": 5.225229575268659e-06, "loss": 1.2973, "step": 44175 }, { "epoch": 1.3026684357953708, "grad_norm": 2.8571508503060112, "learning_rate": 5.22463814428274e-06, "loss": 1.2838, "step": 44180 }, { "epoch": 1.3028158631873803, "grad_norm": 2.946940039054255, "learning_rate": 5.224046669903909e-06, "loss": 1.3572, "step": 44185 }, { "epoch": 1.3029632905793895, "grad_norm": 2.7143573820023823, "learning_rate": 5.2234551521495725e-06, "loss": 1.322, "step": 44190 }, { "epoch": 1.303110717971399, "grad_norm": 2.8213315856914094, "learning_rate": 5.222863591037135e-06, "loss": 1.302, "step": 44195 }, { "epoch": 1.3032581453634084, "grad_norm": 3.170460197993724, "learning_rate": 5.222271986584006e-06, "loss": 1.2976, "step": 44200 }, { "epoch": 1.303405572755418, "grad_norm": 2.8657658363531007, "learning_rate": 5.221680338807592e-06, "loss": 1.3283, "step": 44205 }, { "epoch": 1.3035530001474274, "grad_norm": 2.888764082796274, "learning_rate": 5.221088647725302e-06, "loss": 1.2836, "step": 44210 }, { "epoch": 1.3037004275394368, "grad_norm": 2.8696539348119234, "learning_rate": 5.220496913354551e-06, "loss": 1.2806, "step": 44215 }, { "epoch": 1.3038478549314463, "grad_norm": 2.7812693205261683, "learning_rate": 5.219905135712747e-06, "loss": 1.3107, "step": 44220 }, { "epoch": 1.3039952823234557, "grad_norm": 2.995885578894327, "learning_rate": 5.219313314817308e-06, "loss": 1.3778, "step": 44225 }, { "epoch": 1.3041427097154652, "grad_norm": 3.0199718367782302, "learning_rate": 5.2187214506856454e-06, "loss": 1.36, "step": 44230 }, { "epoch": 1.3042901371074747, "grad_norm": 3.0178998279677973, "learning_rate": 5.218129543335177e-06, "loss": 1.3084, "step": 44235 }, { "epoch": 1.3044375644994841, "grad_norm": 2.899512896360383, "learning_rate": 5.217537592783322e-06, "loss": 1.2862, "step": 44240 }, { "epoch": 1.3045849918914936, "grad_norm": 2.9523393822554325, "learning_rate": 5.216945599047496e-06, "loss": 1.3459, "step": 44245 }, { "epoch": 1.3047324192835028, "grad_norm": 2.7974539224977324, "learning_rate": 5.216353562145123e-06, "loss": 1.3169, "step": 44250 }, { "epoch": 1.3048798466755123, "grad_norm": 2.7923845058065737, "learning_rate": 5.215761482093619e-06, "loss": 1.3103, "step": 44255 }, { "epoch": 1.3050272740675217, "grad_norm": 2.785993948297979, "learning_rate": 5.215169358910411e-06, "loss": 1.3172, "step": 44260 }, { "epoch": 1.3051747014595312, "grad_norm": 2.827845529720462, "learning_rate": 5.214577192612921e-06, "loss": 1.2998, "step": 44265 }, { "epoch": 1.3053221288515406, "grad_norm": 2.7877059037767715, "learning_rate": 5.213984983218576e-06, "loss": 1.3269, "step": 44270 }, { "epoch": 1.30546955624355, "grad_norm": 2.8498837986348113, "learning_rate": 5.213392730744801e-06, "loss": 1.3325, "step": 44275 }, { "epoch": 1.3056169836355596, "grad_norm": 2.8634164293050737, "learning_rate": 5.2128004352090235e-06, "loss": 1.346, "step": 44280 }, { "epoch": 1.3057644110275688, "grad_norm": 2.9426998459509965, "learning_rate": 5.212208096628671e-06, "loss": 1.3516, "step": 44285 }, { "epoch": 1.3059118384195783, "grad_norm": 3.144494308365816, "learning_rate": 5.211615715021177e-06, "loss": 1.3228, "step": 44290 }, { "epoch": 1.3060592658115877, "grad_norm": 2.806629645542391, "learning_rate": 5.211023290403972e-06, "loss": 1.2979, "step": 44295 }, { "epoch": 1.3062066932035972, "grad_norm": 2.8958813082115924, "learning_rate": 5.2104308227944884e-06, "loss": 1.3224, "step": 44300 }, { "epoch": 1.3063541205956066, "grad_norm": 2.9287710481145925, "learning_rate": 5.2098383122101604e-06, "loss": 1.2939, "step": 44305 }, { "epoch": 1.306501547987616, "grad_norm": 3.101518156961266, "learning_rate": 5.2092457586684236e-06, "loss": 1.3102, "step": 44310 }, { "epoch": 1.3066489753796255, "grad_norm": 2.7623133887076095, "learning_rate": 5.208653162186713e-06, "loss": 1.2937, "step": 44315 }, { "epoch": 1.306796402771635, "grad_norm": 2.937585047849902, "learning_rate": 5.208060522782467e-06, "loss": 1.3411, "step": 44320 }, { "epoch": 1.3069438301636445, "grad_norm": 2.8816749233717593, "learning_rate": 5.207467840473125e-06, "loss": 1.3677, "step": 44325 }, { "epoch": 1.307091257555654, "grad_norm": 2.9090615121091608, "learning_rate": 5.20687511527613e-06, "loss": 1.3485, "step": 44330 }, { "epoch": 1.3072386849476634, "grad_norm": 3.007056217692039, "learning_rate": 5.206282347208919e-06, "loss": 1.3557, "step": 44335 }, { "epoch": 1.3073861123396728, "grad_norm": 2.9016354744603996, "learning_rate": 5.2056895362889375e-06, "loss": 1.2633, "step": 44340 }, { "epoch": 1.307533539731682, "grad_norm": 2.759420845210278, "learning_rate": 5.205096682533629e-06, "loss": 1.3066, "step": 44345 }, { "epoch": 1.3076809671236915, "grad_norm": 2.7153391687534865, "learning_rate": 5.20450378596044e-06, "loss": 1.3275, "step": 44350 }, { "epoch": 1.307828394515701, "grad_norm": 3.026342683337778, "learning_rate": 5.203910846586814e-06, "loss": 1.3719, "step": 44355 }, { "epoch": 1.3079758219077104, "grad_norm": 3.231412569652049, "learning_rate": 5.203317864430204e-06, "loss": 1.3245, "step": 44360 }, { "epoch": 1.30812324929972, "grad_norm": 2.73322643894996, "learning_rate": 5.202724839508054e-06, "loss": 1.2756, "step": 44365 }, { "epoch": 1.3082706766917294, "grad_norm": 3.0162068062498673, "learning_rate": 5.202131771837819e-06, "loss": 1.3334, "step": 44370 }, { "epoch": 1.3084181040837388, "grad_norm": 2.892385065332884, "learning_rate": 5.201538661436947e-06, "loss": 1.3445, "step": 44375 }, { "epoch": 1.308565531475748, "grad_norm": 2.8086058914682845, "learning_rate": 5.200945508322893e-06, "loss": 1.2985, "step": 44380 }, { "epoch": 1.3087129588677575, "grad_norm": 2.8098268524701346, "learning_rate": 5.2003523125131106e-06, "loss": 1.3204, "step": 44385 }, { "epoch": 1.308860386259767, "grad_norm": 2.9237208550930602, "learning_rate": 5.1997590740250545e-06, "loss": 1.2994, "step": 44390 }, { "epoch": 1.3090078136517764, "grad_norm": 2.8760661662763165, "learning_rate": 5.1991657928761824e-06, "loss": 1.343, "step": 44395 }, { "epoch": 1.3091552410437859, "grad_norm": 2.927804877678883, "learning_rate": 5.198572469083952e-06, "loss": 1.3074, "step": 44400 }, { "epoch": 1.3093026684357953, "grad_norm": 2.911667825994581, "learning_rate": 5.197979102665824e-06, "loss": 1.3091, "step": 44405 }, { "epoch": 1.3094500958278048, "grad_norm": 2.921140546863329, "learning_rate": 5.197385693639256e-06, "loss": 1.3413, "step": 44410 }, { "epoch": 1.3095975232198143, "grad_norm": 2.9918612290992366, "learning_rate": 5.1967922420217115e-06, "loss": 1.3304, "step": 44415 }, { "epoch": 1.3097449506118237, "grad_norm": 2.880454032653907, "learning_rate": 5.196198747830652e-06, "loss": 1.2934, "step": 44420 }, { "epoch": 1.3098923780038332, "grad_norm": 2.8188228486308193, "learning_rate": 5.195605211083545e-06, "loss": 1.3136, "step": 44425 }, { "epoch": 1.3100398053958426, "grad_norm": 2.8182979518023936, "learning_rate": 5.195011631797853e-06, "loss": 1.3454, "step": 44430 }, { "epoch": 1.310187232787852, "grad_norm": 2.9810214719086985, "learning_rate": 5.194418009991045e-06, "loss": 1.3354, "step": 44435 }, { "epoch": 1.3103346601798616, "grad_norm": 2.882054624328432, "learning_rate": 5.1938243456805866e-06, "loss": 1.3027, "step": 44440 }, { "epoch": 1.3104820875718708, "grad_norm": 2.9852257945441556, "learning_rate": 5.1932306388839476e-06, "loss": 1.3312, "step": 44445 }, { "epoch": 1.3106295149638802, "grad_norm": 2.7442932106573097, "learning_rate": 5.1926368896185995e-06, "loss": 1.3353, "step": 44450 }, { "epoch": 1.3107769423558897, "grad_norm": 2.8328000726075455, "learning_rate": 5.192043097902013e-06, "loss": 1.3907, "step": 44455 }, { "epoch": 1.3109243697478992, "grad_norm": 2.7532057776084837, "learning_rate": 5.1914492637516634e-06, "loss": 1.2871, "step": 44460 }, { "epoch": 1.3110717971399086, "grad_norm": 3.0750932670422206, "learning_rate": 5.190855387185021e-06, "loss": 1.2786, "step": 44465 }, { "epoch": 1.311219224531918, "grad_norm": 2.9233895601867834, "learning_rate": 5.190261468219566e-06, "loss": 1.3306, "step": 44470 }, { "epoch": 1.3113666519239275, "grad_norm": 2.725716425592353, "learning_rate": 5.189667506872771e-06, "loss": 1.3311, "step": 44475 }, { "epoch": 1.3115140793159368, "grad_norm": 2.9150247099339874, "learning_rate": 5.189073503162116e-06, "loss": 1.303, "step": 44480 }, { "epoch": 1.3116615067079462, "grad_norm": 2.8626250789323646, "learning_rate": 5.188479457105081e-06, "loss": 1.3151, "step": 44485 }, { "epoch": 1.3118089340999557, "grad_norm": 2.713955386571639, "learning_rate": 5.187885368719144e-06, "loss": 1.2523, "step": 44490 }, { "epoch": 1.3119563614919652, "grad_norm": 3.0122882832545916, "learning_rate": 5.187291238021789e-06, "loss": 1.3203, "step": 44495 }, { "epoch": 1.3121037888839746, "grad_norm": 2.926251680122388, "learning_rate": 5.186697065030498e-06, "loss": 1.326, "step": 44500 }, { "epoch": 1.3121037888839746, "eval_loss": 1.1101247072219849, "eval_runtime": 4.2097, "eval_samples_per_second": 94.068, "eval_steps_per_second": 3.088, "step": 44500 }, { "epoch": 1.312251216275984, "grad_norm": 3.093926068749459, "learning_rate": 5.186102849762755e-06, "loss": 1.3102, "step": 44505 }, { "epoch": 1.3123986436679935, "grad_norm": 2.763138710075161, "learning_rate": 5.1855085922360465e-06, "loss": 1.2843, "step": 44510 }, { "epoch": 1.312546071060003, "grad_norm": 2.796108945405319, "learning_rate": 5.184914292467858e-06, "loss": 1.327, "step": 44515 }, { "epoch": 1.3126934984520124, "grad_norm": 3.0676340972453273, "learning_rate": 5.184319950475677e-06, "loss": 1.3608, "step": 44520 }, { "epoch": 1.312840925844022, "grad_norm": 2.879148283088265, "learning_rate": 5.183725566276995e-06, "loss": 1.3435, "step": 44525 }, { "epoch": 1.3129883532360314, "grad_norm": 2.906670686320148, "learning_rate": 5.1831311398893e-06, "loss": 1.3309, "step": 44530 }, { "epoch": 1.3131357806280408, "grad_norm": 2.89470399902556, "learning_rate": 5.182536671330086e-06, "loss": 1.3347, "step": 44535 }, { "epoch": 1.31328320802005, "grad_norm": 2.665591200186678, "learning_rate": 5.181942160616843e-06, "loss": 1.319, "step": 44540 }, { "epoch": 1.3134306354120595, "grad_norm": 2.83455691920516, "learning_rate": 5.181347607767068e-06, "loss": 1.33, "step": 44545 }, { "epoch": 1.313578062804069, "grad_norm": 2.8307196998745856, "learning_rate": 5.180753012798255e-06, "loss": 1.2996, "step": 44550 }, { "epoch": 1.3137254901960784, "grad_norm": 2.9330810851491025, "learning_rate": 5.180158375727901e-06, "loss": 1.3018, "step": 44555 }, { "epoch": 1.3138729175880879, "grad_norm": 2.8763354898718934, "learning_rate": 5.179563696573503e-06, "loss": 1.3701, "step": 44560 }, { "epoch": 1.3140203449800973, "grad_norm": 2.756415232947367, "learning_rate": 5.178968975352564e-06, "loss": 1.3757, "step": 44565 }, { "epoch": 1.3141677723721068, "grad_norm": 2.7987434433095086, "learning_rate": 5.1783742120825776e-06, "loss": 1.3276, "step": 44570 }, { "epoch": 1.314315199764116, "grad_norm": 2.9639748608627046, "learning_rate": 5.17777940678105e-06, "loss": 1.2769, "step": 44575 }, { "epoch": 1.3144626271561255, "grad_norm": 2.8345702110203552, "learning_rate": 5.177184559465482e-06, "loss": 1.3382, "step": 44580 }, { "epoch": 1.314610054548135, "grad_norm": 2.8416799056646007, "learning_rate": 5.17658967015338e-06, "loss": 1.3275, "step": 44585 }, { "epoch": 1.3147574819401444, "grad_norm": 2.82254337223937, "learning_rate": 5.175994738862248e-06, "loss": 1.3112, "step": 44590 }, { "epoch": 1.3149049093321539, "grad_norm": 2.8124535057976674, "learning_rate": 5.175399765609592e-06, "loss": 1.317, "step": 44595 }, { "epoch": 1.3150523367241633, "grad_norm": 2.912509763963976, "learning_rate": 5.17480475041292e-06, "loss": 1.3407, "step": 44600 }, { "epoch": 1.3151997641161728, "grad_norm": 2.876454052663234, "learning_rate": 5.174209693289741e-06, "loss": 1.2808, "step": 44605 }, { "epoch": 1.3153471915081822, "grad_norm": 3.018699622100178, "learning_rate": 5.173614594257567e-06, "loss": 1.3174, "step": 44610 }, { "epoch": 1.3154946189001917, "grad_norm": 2.944277237295084, "learning_rate": 5.173019453333907e-06, "loss": 1.3563, "step": 44615 }, { "epoch": 1.3156420462922012, "grad_norm": 2.8863404588319446, "learning_rate": 5.172424270536274e-06, "loss": 1.2868, "step": 44620 }, { "epoch": 1.3157894736842106, "grad_norm": 2.828208083816011, "learning_rate": 5.1718290458821835e-06, "loss": 1.3236, "step": 44625 }, { "epoch": 1.31593690107622, "grad_norm": 2.8291067125438167, "learning_rate": 5.17123377938915e-06, "loss": 1.3041, "step": 44630 }, { "epoch": 1.3160843284682293, "grad_norm": 2.8768820656672127, "learning_rate": 5.170638471074689e-06, "loss": 1.2915, "step": 44635 }, { "epoch": 1.3162317558602388, "grad_norm": 2.830466008548401, "learning_rate": 5.170043120956319e-06, "loss": 1.3277, "step": 44640 }, { "epoch": 1.3163791832522482, "grad_norm": 2.6970743528610663, "learning_rate": 5.169447729051559e-06, "loss": 1.3254, "step": 44645 }, { "epoch": 1.3165266106442577, "grad_norm": 2.8694110995999824, "learning_rate": 5.168852295377929e-06, "loss": 1.273, "step": 44650 }, { "epoch": 1.3166740380362671, "grad_norm": 2.7465582700216817, "learning_rate": 5.168256819952949e-06, "loss": 1.2862, "step": 44655 }, { "epoch": 1.3168214654282766, "grad_norm": 2.928540174438906, "learning_rate": 5.167661302794142e-06, "loss": 1.3134, "step": 44660 }, { "epoch": 1.316968892820286, "grad_norm": 2.795772183221152, "learning_rate": 5.167065743919034e-06, "loss": 1.2708, "step": 44665 }, { "epoch": 1.3171163202122955, "grad_norm": 2.948049611641663, "learning_rate": 5.166470143345148e-06, "loss": 1.3296, "step": 44670 }, { "epoch": 1.3172637476043048, "grad_norm": 2.942098075019376, "learning_rate": 5.1658745010900105e-06, "loss": 1.2727, "step": 44675 }, { "epoch": 1.3174111749963142, "grad_norm": 3.0157328464309106, "learning_rate": 5.165278817171148e-06, "loss": 1.3387, "step": 44680 }, { "epoch": 1.3175586023883237, "grad_norm": 2.801067517026398, "learning_rate": 5.164683091606092e-06, "loss": 1.289, "step": 44685 }, { "epoch": 1.3177060297803331, "grad_norm": 2.910437554372092, "learning_rate": 5.164087324412371e-06, "loss": 1.3188, "step": 44690 }, { "epoch": 1.3178534571723426, "grad_norm": 2.7780451121431917, "learning_rate": 5.1634915156075145e-06, "loss": 1.3157, "step": 44695 }, { "epoch": 1.318000884564352, "grad_norm": 2.880979959440419, "learning_rate": 5.162895665209056e-06, "loss": 1.2933, "step": 44700 }, { "epoch": 1.3181483119563615, "grad_norm": 2.7775077767430147, "learning_rate": 5.162299773234529e-06, "loss": 1.3131, "step": 44705 }, { "epoch": 1.318295739348371, "grad_norm": 2.899635412036098, "learning_rate": 5.161703839701469e-06, "loss": 1.3564, "step": 44710 }, { "epoch": 1.3184431667403804, "grad_norm": 3.031750671685257, "learning_rate": 5.161107864627411e-06, "loss": 1.3373, "step": 44715 }, { "epoch": 1.3185905941323899, "grad_norm": 2.8613130281070793, "learning_rate": 5.160511848029893e-06, "loss": 1.3298, "step": 44720 }, { "epoch": 1.3187380215243993, "grad_norm": 2.9468712490209743, "learning_rate": 5.159915789926454e-06, "loss": 1.3186, "step": 44725 }, { "epoch": 1.3188854489164088, "grad_norm": 2.9480968781847015, "learning_rate": 5.159319690334632e-06, "loss": 1.3224, "step": 44730 }, { "epoch": 1.319032876308418, "grad_norm": 2.921269573365652, "learning_rate": 5.158723549271969e-06, "loss": 1.3233, "step": 44735 }, { "epoch": 1.3191803037004275, "grad_norm": 2.897816605653113, "learning_rate": 5.158127366756007e-06, "loss": 1.3256, "step": 44740 }, { "epoch": 1.319327731092437, "grad_norm": 2.8725778211743767, "learning_rate": 5.15753114280429e-06, "loss": 1.3101, "step": 44745 }, { "epoch": 1.3194751584844464, "grad_norm": 2.9308191046799603, "learning_rate": 5.156934877434361e-06, "loss": 1.314, "step": 44750 }, { "epoch": 1.3196225858764559, "grad_norm": 2.823602545356308, "learning_rate": 5.156338570663766e-06, "loss": 1.3541, "step": 44755 }, { "epoch": 1.3197700132684653, "grad_norm": 2.947408189666072, "learning_rate": 5.155742222510055e-06, "loss": 1.3349, "step": 44760 }, { "epoch": 1.3199174406604748, "grad_norm": 2.947704126995886, "learning_rate": 5.155145832990772e-06, "loss": 1.3359, "step": 44765 }, { "epoch": 1.320064868052484, "grad_norm": 3.121585997267202, "learning_rate": 5.15454940212347e-06, "loss": 1.3614, "step": 44770 }, { "epoch": 1.3202122954444935, "grad_norm": 3.054712950854767, "learning_rate": 5.153952929925697e-06, "loss": 1.3372, "step": 44775 }, { "epoch": 1.320359722836503, "grad_norm": 3.0027602533213265, "learning_rate": 5.1533564164150054e-06, "loss": 1.316, "step": 44780 }, { "epoch": 1.3205071502285124, "grad_norm": 3.055924278231767, "learning_rate": 5.152759861608949e-06, "loss": 1.279, "step": 44785 }, { "epoch": 1.3206545776205219, "grad_norm": 2.919526857072536, "learning_rate": 5.152163265525081e-06, "loss": 1.3545, "step": 44790 }, { "epoch": 1.3208020050125313, "grad_norm": 2.9511928714598548, "learning_rate": 5.1515666281809606e-06, "loss": 1.3208, "step": 44795 }, { "epoch": 1.3209494324045408, "grad_norm": 2.928515918402254, "learning_rate": 5.150969949594141e-06, "loss": 1.323, "step": 44800 }, { "epoch": 1.3210968597965502, "grad_norm": 2.9994268653828478, "learning_rate": 5.150373229782178e-06, "loss": 1.2744, "step": 44805 }, { "epoch": 1.3212442871885597, "grad_norm": 2.898973880171699, "learning_rate": 5.149776468762636e-06, "loss": 1.2983, "step": 44810 }, { "epoch": 1.3213917145805691, "grad_norm": 2.753845474782173, "learning_rate": 5.149179666553073e-06, "loss": 1.3253, "step": 44815 }, { "epoch": 1.3215391419725786, "grad_norm": 2.7717757836098547, "learning_rate": 5.148582823171051e-06, "loss": 1.33, "step": 44820 }, { "epoch": 1.321686569364588, "grad_norm": 2.935302049814009, "learning_rate": 5.147985938634132e-06, "loss": 1.3219, "step": 44825 }, { "epoch": 1.3218339967565973, "grad_norm": 2.8561011189609387, "learning_rate": 5.147389012959879e-06, "loss": 1.2774, "step": 44830 }, { "epoch": 1.3219814241486068, "grad_norm": 2.971630989292533, "learning_rate": 5.14679204616586e-06, "loss": 1.3378, "step": 44835 }, { "epoch": 1.3221288515406162, "grad_norm": 2.7564184077177782, "learning_rate": 5.14619503826964e-06, "loss": 1.2964, "step": 44840 }, { "epoch": 1.3222762789326257, "grad_norm": 2.8347208056180353, "learning_rate": 5.145597989288786e-06, "loss": 1.3017, "step": 44845 }, { "epoch": 1.3224237063246351, "grad_norm": 2.9018018339238227, "learning_rate": 5.145000899240869e-06, "loss": 1.3763, "step": 44850 }, { "epoch": 1.3225711337166446, "grad_norm": 2.9602716256505466, "learning_rate": 5.144403768143456e-06, "loss": 1.3677, "step": 44855 }, { "epoch": 1.322718561108654, "grad_norm": 3.0204440498196456, "learning_rate": 5.143806596014121e-06, "loss": 1.2778, "step": 44860 }, { "epoch": 1.3228659885006635, "grad_norm": 3.0143985781973437, "learning_rate": 5.143209382870435e-06, "loss": 1.2915, "step": 44865 }, { "epoch": 1.3230134158926727, "grad_norm": 2.83407405681203, "learning_rate": 5.142612128729972e-06, "loss": 1.3016, "step": 44870 }, { "epoch": 1.3231608432846822, "grad_norm": 2.8713721677146524, "learning_rate": 5.142014833610307e-06, "loss": 1.2986, "step": 44875 }, { "epoch": 1.3233082706766917, "grad_norm": 2.914523204944686, "learning_rate": 5.141417497529016e-06, "loss": 1.3137, "step": 44880 }, { "epoch": 1.3234556980687011, "grad_norm": 2.957921073930078, "learning_rate": 5.140820120503676e-06, "loss": 1.3385, "step": 44885 }, { "epoch": 1.3236031254607106, "grad_norm": 2.8663577993910643, "learning_rate": 5.1402227025518664e-06, "loss": 1.2938, "step": 44890 }, { "epoch": 1.32375055285272, "grad_norm": 2.957691968488141, "learning_rate": 5.139625243691166e-06, "loss": 1.3572, "step": 44895 }, { "epoch": 1.3238979802447295, "grad_norm": 2.740249376779972, "learning_rate": 5.139027743939156e-06, "loss": 1.2677, "step": 44900 }, { "epoch": 1.324045407636739, "grad_norm": 2.829942797492076, "learning_rate": 5.1384302033134185e-06, "loss": 1.3134, "step": 44905 }, { "epoch": 1.3241928350287484, "grad_norm": 2.9712839719785444, "learning_rate": 5.137832621831537e-06, "loss": 1.3717, "step": 44910 }, { "epoch": 1.3243402624207579, "grad_norm": 2.9746826493166303, "learning_rate": 5.137234999511096e-06, "loss": 1.3346, "step": 44915 }, { "epoch": 1.3244876898127673, "grad_norm": 2.924524088154139, "learning_rate": 5.13663733636968e-06, "loss": 1.3359, "step": 44920 }, { "epoch": 1.3246351172047768, "grad_norm": 2.8484779852236164, "learning_rate": 5.136039632424879e-06, "loss": 1.2963, "step": 44925 }, { "epoch": 1.324782544596786, "grad_norm": 2.7097405770041636, "learning_rate": 5.1354418876942765e-06, "loss": 1.3619, "step": 44930 }, { "epoch": 1.3249299719887955, "grad_norm": 2.778248549837725, "learning_rate": 5.134844102195467e-06, "loss": 1.2985, "step": 44935 }, { "epoch": 1.325077399380805, "grad_norm": 2.956172064156228, "learning_rate": 5.134246275946037e-06, "loss": 1.3022, "step": 44940 }, { "epoch": 1.3252248267728144, "grad_norm": 2.9374182558026636, "learning_rate": 5.133648408963579e-06, "loss": 1.3016, "step": 44945 }, { "epoch": 1.3253722541648238, "grad_norm": 2.970213868663759, "learning_rate": 5.133050501265689e-06, "loss": 1.334, "step": 44950 }, { "epoch": 1.3255196815568333, "grad_norm": 2.769460734322287, "learning_rate": 5.132452552869957e-06, "loss": 1.3168, "step": 44955 }, { "epoch": 1.3256671089488428, "grad_norm": 2.7913757479487185, "learning_rate": 5.13185456379398e-06, "loss": 1.27, "step": 44960 }, { "epoch": 1.325814536340852, "grad_norm": 2.826330117751777, "learning_rate": 5.131256534055356e-06, "loss": 1.2964, "step": 44965 }, { "epoch": 1.3259619637328615, "grad_norm": 2.9691020264979167, "learning_rate": 5.130658463671679e-06, "loss": 1.3511, "step": 44970 }, { "epoch": 1.326109391124871, "grad_norm": 2.9796408109416985, "learning_rate": 5.1300603526605514e-06, "loss": 1.3417, "step": 44975 }, { "epoch": 1.3262568185168804, "grad_norm": 2.8453149893766816, "learning_rate": 5.129462201039572e-06, "loss": 1.3101, "step": 44980 }, { "epoch": 1.3264042459088898, "grad_norm": 3.002433342177515, "learning_rate": 5.1288640088263425e-06, "loss": 1.3284, "step": 44985 }, { "epoch": 1.3265516733008993, "grad_norm": 2.9655053997165104, "learning_rate": 5.128265776038465e-06, "loss": 1.3699, "step": 44990 }, { "epoch": 1.3266991006929088, "grad_norm": 2.8522420715865326, "learning_rate": 5.127667502693542e-06, "loss": 1.3018, "step": 44995 }, { "epoch": 1.3268465280849182, "grad_norm": 2.728715180015117, "learning_rate": 5.12706918880918e-06, "loss": 1.3025, "step": 45000 }, { "epoch": 1.3268465280849182, "eval_loss": 1.1105644702911377, "eval_runtime": 4.2813, "eval_samples_per_second": 92.496, "eval_steps_per_second": 3.036, "step": 45000 }, { "epoch": 1.3269939554769277, "grad_norm": 3.0298256751001382, "learning_rate": 5.126470834402986e-06, "loss": 1.2741, "step": 45005 }, { "epoch": 1.3271413828689371, "grad_norm": 3.0389076900397867, "learning_rate": 5.125872439492565e-06, "loss": 1.3976, "step": 45010 }, { "epoch": 1.3272888102609466, "grad_norm": 2.906318230233326, "learning_rate": 5.125274004095527e-06, "loss": 1.3063, "step": 45015 }, { "epoch": 1.327436237652956, "grad_norm": 2.7903615086482074, "learning_rate": 5.124675528229481e-06, "loss": 1.277, "step": 45020 }, { "epoch": 1.3275836650449653, "grad_norm": 2.8549315526487287, "learning_rate": 5.124077011912037e-06, "loss": 1.3476, "step": 45025 }, { "epoch": 1.3277310924369747, "grad_norm": 2.916830819130525, "learning_rate": 5.12347845516081e-06, "loss": 1.3009, "step": 45030 }, { "epoch": 1.3278785198289842, "grad_norm": 2.948588241337105, "learning_rate": 5.122879857993408e-06, "loss": 1.2843, "step": 45035 }, { "epoch": 1.3280259472209937, "grad_norm": 2.92244481273732, "learning_rate": 5.122281220427449e-06, "loss": 1.3687, "step": 45040 }, { "epoch": 1.328173374613003, "grad_norm": 2.825533285349375, "learning_rate": 5.12168254248055e-06, "loss": 1.2521, "step": 45045 }, { "epoch": 1.3283208020050126, "grad_norm": 2.8438384833705843, "learning_rate": 5.121083824170324e-06, "loss": 1.3014, "step": 45050 }, { "epoch": 1.328468229397022, "grad_norm": 2.919710013552683, "learning_rate": 5.120485065514393e-06, "loss": 1.3487, "step": 45055 }, { "epoch": 1.3286156567890315, "grad_norm": 2.886708011680144, "learning_rate": 5.119886266530372e-06, "loss": 1.2922, "step": 45060 }, { "epoch": 1.3287630841810407, "grad_norm": 2.923015002190777, "learning_rate": 5.119287427235885e-06, "loss": 1.3776, "step": 45065 }, { "epoch": 1.3289105115730502, "grad_norm": 2.6859654850149948, "learning_rate": 5.11868854764855e-06, "loss": 1.2915, "step": 45070 }, { "epoch": 1.3290579389650596, "grad_norm": 2.878810178963613, "learning_rate": 5.118089627785992e-06, "loss": 1.29, "step": 45075 }, { "epoch": 1.329205366357069, "grad_norm": 2.807032606878893, "learning_rate": 5.117490667665836e-06, "loss": 1.2995, "step": 45080 }, { "epoch": 1.3293527937490786, "grad_norm": 2.887251217961538, "learning_rate": 5.116891667305705e-06, "loss": 1.3533, "step": 45085 }, { "epoch": 1.329500221141088, "grad_norm": 2.9486025379349634, "learning_rate": 5.116292626723226e-06, "loss": 1.3235, "step": 45090 }, { "epoch": 1.3296476485330975, "grad_norm": 2.8921165686029897, "learning_rate": 5.115693545936026e-06, "loss": 1.285, "step": 45095 }, { "epoch": 1.329795075925107, "grad_norm": 3.006478430548971, "learning_rate": 5.1150944249617335e-06, "loss": 1.321, "step": 45100 }, { "epoch": 1.3299425033171164, "grad_norm": 2.9392079262082507, "learning_rate": 5.114495263817979e-06, "loss": 1.3503, "step": 45105 }, { "epoch": 1.3300899307091258, "grad_norm": 2.8155601992486985, "learning_rate": 5.113896062522395e-06, "loss": 1.2827, "step": 45110 }, { "epoch": 1.3302373581011353, "grad_norm": 3.1044572131713206, "learning_rate": 5.1132968210926095e-06, "loss": 1.3457, "step": 45115 }, { "epoch": 1.3303847854931448, "grad_norm": 2.866013549402472, "learning_rate": 5.112697539546259e-06, "loss": 1.3106, "step": 45120 }, { "epoch": 1.330532212885154, "grad_norm": 3.0076047351365913, "learning_rate": 5.1120982179009774e-06, "loss": 1.305, "step": 45125 }, { "epoch": 1.3306796402771635, "grad_norm": 2.9300113086569097, "learning_rate": 5.1114988561744e-06, "loss": 1.2781, "step": 45130 }, { "epoch": 1.330827067669173, "grad_norm": 2.801826157920945, "learning_rate": 5.1108994543841654e-06, "loss": 1.3503, "step": 45135 }, { "epoch": 1.3309744950611824, "grad_norm": 2.806831935483237, "learning_rate": 5.11030001254791e-06, "loss": 1.3031, "step": 45140 }, { "epoch": 1.3311219224531918, "grad_norm": 2.9418970287386004, "learning_rate": 5.109700530683272e-06, "loss": 1.3522, "step": 45145 }, { "epoch": 1.3312693498452013, "grad_norm": 2.8103896503157126, "learning_rate": 5.109101008807895e-06, "loss": 1.326, "step": 45150 }, { "epoch": 1.3314167772372107, "grad_norm": 2.9733066685835774, "learning_rate": 5.108501446939419e-06, "loss": 1.3588, "step": 45155 }, { "epoch": 1.33156420462922, "grad_norm": 2.9440402645414543, "learning_rate": 5.1079018450954865e-06, "loss": 1.3525, "step": 45160 }, { "epoch": 1.3317116320212294, "grad_norm": 3.0559119689637417, "learning_rate": 5.10730220329374e-06, "loss": 1.3312, "step": 45165 }, { "epoch": 1.331859059413239, "grad_norm": 2.888015483330649, "learning_rate": 5.1067025215518275e-06, "loss": 1.3693, "step": 45170 }, { "epoch": 1.3320064868052484, "grad_norm": 2.9737092887014422, "learning_rate": 5.106102799887393e-06, "loss": 1.315, "step": 45175 }, { "epoch": 1.3321539141972578, "grad_norm": 2.79835531769525, "learning_rate": 5.105503038318086e-06, "loss": 1.2983, "step": 45180 }, { "epoch": 1.3323013415892673, "grad_norm": 2.8353890993347144, "learning_rate": 5.104903236861556e-06, "loss": 1.2744, "step": 45185 }, { "epoch": 1.3324487689812767, "grad_norm": 2.9332720618952934, "learning_rate": 5.104303395535448e-06, "loss": 1.2713, "step": 45190 }, { "epoch": 1.3325961963732862, "grad_norm": 2.9257248062618744, "learning_rate": 5.103703514357417e-06, "loss": 1.3187, "step": 45195 }, { "epoch": 1.3327436237652956, "grad_norm": 3.0811315221189166, "learning_rate": 5.103103593345115e-06, "loss": 1.3075, "step": 45200 }, { "epoch": 1.332891051157305, "grad_norm": 2.8325185473348564, "learning_rate": 5.102503632516195e-06, "loss": 1.2787, "step": 45205 }, { "epoch": 1.3330384785493146, "grad_norm": 2.958476020231988, "learning_rate": 5.101903631888312e-06, "loss": 1.3258, "step": 45210 }, { "epoch": 1.333185905941324, "grad_norm": 3.000959280878001, "learning_rate": 5.101303591479119e-06, "loss": 1.3568, "step": 45215 }, { "epoch": 1.3333333333333333, "grad_norm": 2.8443894375088297, "learning_rate": 5.100703511306276e-06, "loss": 1.3145, "step": 45220 }, { "epoch": 1.3334807607253427, "grad_norm": 2.743431699249008, "learning_rate": 5.1001033913874394e-06, "loss": 1.2748, "step": 45225 }, { "epoch": 1.3336281881173522, "grad_norm": 2.8301980162192946, "learning_rate": 5.099503231740268e-06, "loss": 1.3379, "step": 45230 }, { "epoch": 1.3337756155093616, "grad_norm": 2.7707547757870117, "learning_rate": 5.098903032382424e-06, "loss": 1.3029, "step": 45235 }, { "epoch": 1.333923042901371, "grad_norm": 2.777292777009934, "learning_rate": 5.098302793331568e-06, "loss": 1.318, "step": 45240 }, { "epoch": 1.3340704702933806, "grad_norm": 2.9482554220173998, "learning_rate": 5.0977025146053625e-06, "loss": 1.3703, "step": 45245 }, { "epoch": 1.33421789768539, "grad_norm": 2.799318243585282, "learning_rate": 5.097102196221472e-06, "loss": 1.296, "step": 45250 }, { "epoch": 1.3343653250773992, "grad_norm": 2.932021248459084, "learning_rate": 5.09650183819756e-06, "loss": 1.3124, "step": 45255 }, { "epoch": 1.3345127524694087, "grad_norm": 2.89526254547961, "learning_rate": 5.095901440551295e-06, "loss": 1.3545, "step": 45260 }, { "epoch": 1.3346601798614182, "grad_norm": 2.85390311798159, "learning_rate": 5.095301003300343e-06, "loss": 1.3228, "step": 45265 }, { "epoch": 1.3348076072534276, "grad_norm": 2.8029451372152003, "learning_rate": 5.0947005264623725e-06, "loss": 1.2767, "step": 45270 }, { "epoch": 1.334955034645437, "grad_norm": 2.7994296085169856, "learning_rate": 5.094100010055054e-06, "loss": 1.3015, "step": 45275 }, { "epoch": 1.3351024620374465, "grad_norm": 2.9751831295479763, "learning_rate": 5.0934994540960564e-06, "loss": 1.2935, "step": 45280 }, { "epoch": 1.335249889429456, "grad_norm": 2.927370365659613, "learning_rate": 5.092898858603055e-06, "loss": 1.3283, "step": 45285 }, { "epoch": 1.3353973168214655, "grad_norm": 2.727470123936911, "learning_rate": 5.092298223593722e-06, "loss": 1.2827, "step": 45290 }, { "epoch": 1.335544744213475, "grad_norm": 2.86689602339389, "learning_rate": 5.091697549085729e-06, "loss": 1.3067, "step": 45295 }, { "epoch": 1.3356921716054844, "grad_norm": 2.806190534139773, "learning_rate": 5.091096835096754e-06, "loss": 1.2745, "step": 45300 }, { "epoch": 1.3358395989974938, "grad_norm": 2.8685979178930125, "learning_rate": 5.090496081644474e-06, "loss": 1.2684, "step": 45305 }, { "epoch": 1.3359870263895033, "grad_norm": 2.8377378828092605, "learning_rate": 5.0898952887465655e-06, "loss": 1.3232, "step": 45310 }, { "epoch": 1.3361344537815127, "grad_norm": 2.8619623586310174, "learning_rate": 5.0892944564207085e-06, "loss": 1.3246, "step": 45315 }, { "epoch": 1.336281881173522, "grad_norm": 2.834673626002683, "learning_rate": 5.088693584684583e-06, "loss": 1.3397, "step": 45320 }, { "epoch": 1.3364293085655314, "grad_norm": 2.694870078933462, "learning_rate": 5.088092673555869e-06, "loss": 1.2917, "step": 45325 }, { "epoch": 1.336576735957541, "grad_norm": 3.039864013171282, "learning_rate": 5.087491723052251e-06, "loss": 1.3017, "step": 45330 }, { "epoch": 1.3367241633495504, "grad_norm": 3.035001703494843, "learning_rate": 5.086890733191411e-06, "loss": 1.3059, "step": 45335 }, { "epoch": 1.3368715907415598, "grad_norm": 2.8604494748373175, "learning_rate": 5.0862897039910365e-06, "loss": 1.3153, "step": 45340 }, { "epoch": 1.3370190181335693, "grad_norm": 2.883688986757106, "learning_rate": 5.08568863546881e-06, "loss": 1.2702, "step": 45345 }, { "epoch": 1.3371664455255787, "grad_norm": 2.909809211129214, "learning_rate": 5.085087527642421e-06, "loss": 1.2914, "step": 45350 }, { "epoch": 1.337313872917588, "grad_norm": 3.049333984337725, "learning_rate": 5.084486380529557e-06, "loss": 1.3509, "step": 45355 }, { "epoch": 1.3374613003095974, "grad_norm": 2.7915933341795167, "learning_rate": 5.083885194147906e-06, "loss": 1.281, "step": 45360 }, { "epoch": 1.3376087277016069, "grad_norm": 2.8363779675090193, "learning_rate": 5.083283968515162e-06, "loss": 1.2994, "step": 45365 }, { "epoch": 1.3377561550936163, "grad_norm": 2.826202724169527, "learning_rate": 5.082682703649013e-06, "loss": 1.3036, "step": 45370 }, { "epoch": 1.3379035824856258, "grad_norm": 2.9235936307615957, "learning_rate": 5.082081399567155e-06, "loss": 1.3455, "step": 45375 }, { "epoch": 1.3380510098776353, "grad_norm": 3.1826960439791794, "learning_rate": 5.08148005628728e-06, "loss": 1.2982, "step": 45380 }, { "epoch": 1.3381984372696447, "grad_norm": 2.89775149009214, "learning_rate": 5.080878673827084e-06, "loss": 1.3223, "step": 45385 }, { "epoch": 1.3383458646616542, "grad_norm": 2.863797089118666, "learning_rate": 5.080277252204263e-06, "loss": 1.3304, "step": 45390 }, { "epoch": 1.3384932920536636, "grad_norm": 2.922411301990715, "learning_rate": 5.079675791436516e-06, "loss": 1.3846, "step": 45395 }, { "epoch": 1.338640719445673, "grad_norm": 2.919547978002894, "learning_rate": 5.07907429154154e-06, "loss": 1.3369, "step": 45400 }, { "epoch": 1.3387881468376825, "grad_norm": 2.908858648098953, "learning_rate": 5.078472752537034e-06, "loss": 1.3441, "step": 45405 }, { "epoch": 1.338935574229692, "grad_norm": 2.8902753776915633, "learning_rate": 5.077871174440702e-06, "loss": 1.3273, "step": 45410 }, { "epoch": 1.3390830016217012, "grad_norm": 2.824462948278476, "learning_rate": 5.077269557270243e-06, "loss": 1.3218, "step": 45415 }, { "epoch": 1.3392304290137107, "grad_norm": 3.0835480298919, "learning_rate": 5.076667901043364e-06, "loss": 1.3634, "step": 45420 }, { "epoch": 1.3393778564057202, "grad_norm": 2.8838815000711646, "learning_rate": 5.076066205777766e-06, "loss": 1.3222, "step": 45425 }, { "epoch": 1.3395252837977296, "grad_norm": 2.891136737878884, "learning_rate": 5.075464471491155e-06, "loss": 1.3386, "step": 45430 }, { "epoch": 1.339672711189739, "grad_norm": 3.009036429502354, "learning_rate": 5.074862698201238e-06, "loss": 1.2956, "step": 45435 }, { "epoch": 1.3398201385817485, "grad_norm": 2.7971976122286604, "learning_rate": 5.074260885925724e-06, "loss": 1.3326, "step": 45440 }, { "epoch": 1.339967565973758, "grad_norm": 2.7036601650214873, "learning_rate": 5.073659034682323e-06, "loss": 1.2661, "step": 45445 }, { "epoch": 1.3401149933657672, "grad_norm": 2.753733064303237, "learning_rate": 5.073057144488741e-06, "loss": 1.2862, "step": 45450 }, { "epoch": 1.3402624207577767, "grad_norm": 2.897038054815405, "learning_rate": 5.072455215362693e-06, "loss": 1.3305, "step": 45455 }, { "epoch": 1.3404098481497861, "grad_norm": 2.879525348478393, "learning_rate": 5.071853247321889e-06, "loss": 1.2762, "step": 45460 }, { "epoch": 1.3405572755417956, "grad_norm": 2.8885205986865654, "learning_rate": 5.071251240384044e-06, "loss": 1.2812, "step": 45465 }, { "epoch": 1.340704702933805, "grad_norm": 2.8463041449292468, "learning_rate": 5.070649194566874e-06, "loss": 1.2974, "step": 45470 }, { "epoch": 1.3408521303258145, "grad_norm": 2.8263915736639875, "learning_rate": 5.0700471098880914e-06, "loss": 1.2583, "step": 45475 }, { "epoch": 1.340999557717824, "grad_norm": 2.944532197335078, "learning_rate": 5.069444986365418e-06, "loss": 1.3326, "step": 45480 }, { "epoch": 1.3411469851098334, "grad_norm": 2.8590660549271787, "learning_rate": 5.068842824016567e-06, "loss": 1.2877, "step": 45485 }, { "epoch": 1.341294412501843, "grad_norm": 3.123320415930491, "learning_rate": 5.06824062285926e-06, "loss": 1.3632, "step": 45490 }, { "epoch": 1.3414418398938524, "grad_norm": 2.8080006606414396, "learning_rate": 5.067638382911219e-06, "loss": 1.3187, "step": 45495 }, { "epoch": 1.3415892672858618, "grad_norm": 2.9537944349331755, "learning_rate": 5.067036104190163e-06, "loss": 1.271, "step": 45500 }, { "epoch": 1.3415892672858618, "eval_loss": 1.1086945533752441, "eval_runtime": 4.1663, "eval_samples_per_second": 95.049, "eval_steps_per_second": 3.12, "step": 45500 }, { "epoch": 1.3417366946778713, "grad_norm": 2.8126344745728704, "learning_rate": 5.066433786713815e-06, "loss": 1.2452, "step": 45505 }, { "epoch": 1.3418841220698805, "grad_norm": 2.902339431814695, "learning_rate": 5.0658314304999e-06, "loss": 1.3329, "step": 45510 }, { "epoch": 1.34203154946189, "grad_norm": 2.814826759568061, "learning_rate": 5.065229035566143e-06, "loss": 1.2536, "step": 45515 }, { "epoch": 1.3421789768538994, "grad_norm": 2.7921202787563484, "learning_rate": 5.064626601930269e-06, "loss": 1.2832, "step": 45520 }, { "epoch": 1.3423264042459089, "grad_norm": 2.8476042750677366, "learning_rate": 5.064024129610008e-06, "loss": 1.3576, "step": 45525 }, { "epoch": 1.3424738316379183, "grad_norm": 2.896424012429017, "learning_rate": 5.063421618623085e-06, "loss": 1.3108, "step": 45530 }, { "epoch": 1.3426212590299278, "grad_norm": 3.316303490516692, "learning_rate": 5.062819068987231e-06, "loss": 1.3101, "step": 45535 }, { "epoch": 1.3427686864219373, "grad_norm": 2.8600923659581436, "learning_rate": 5.062216480720177e-06, "loss": 1.3233, "step": 45540 }, { "epoch": 1.3429161138139467, "grad_norm": 2.93012495625339, "learning_rate": 5.061613853839656e-06, "loss": 1.3501, "step": 45545 }, { "epoch": 1.343063541205956, "grad_norm": 2.903861655663467, "learning_rate": 5.061011188363399e-06, "loss": 1.3137, "step": 45550 }, { "epoch": 1.3432109685979654, "grad_norm": 2.8403881050606934, "learning_rate": 5.0604084843091405e-06, "loss": 1.3165, "step": 45555 }, { "epoch": 1.3433583959899749, "grad_norm": 2.7563029007937923, "learning_rate": 5.059805741694615e-06, "loss": 1.2631, "step": 45560 }, { "epoch": 1.3435058233819843, "grad_norm": 2.752572743284422, "learning_rate": 5.0592029605375605e-06, "loss": 1.3015, "step": 45565 }, { "epoch": 1.3436532507739938, "grad_norm": 2.8030127945727994, "learning_rate": 5.058600140855714e-06, "loss": 1.3094, "step": 45570 }, { "epoch": 1.3438006781660032, "grad_norm": 2.815647480741906, "learning_rate": 5.057997282666815e-06, "loss": 1.3475, "step": 45575 }, { "epoch": 1.3439481055580127, "grad_norm": 2.836071470920795, "learning_rate": 5.057394385988601e-06, "loss": 1.3341, "step": 45580 }, { "epoch": 1.3440955329500222, "grad_norm": 2.9811564987869485, "learning_rate": 5.056791450838815e-06, "loss": 1.3208, "step": 45585 }, { "epoch": 1.3442429603420316, "grad_norm": 2.81492488246591, "learning_rate": 5.056188477235199e-06, "loss": 1.2735, "step": 45590 }, { "epoch": 1.344390387734041, "grad_norm": 2.872229360041154, "learning_rate": 5.055585465195494e-06, "loss": 1.3008, "step": 45595 }, { "epoch": 1.3445378151260505, "grad_norm": 3.3058451965360405, "learning_rate": 5.054982414737447e-06, "loss": 1.3346, "step": 45600 }, { "epoch": 1.34468524251806, "grad_norm": 2.873369296365682, "learning_rate": 5.054379325878802e-06, "loss": 1.3387, "step": 45605 }, { "epoch": 1.3448326699100692, "grad_norm": 3.005345413208683, "learning_rate": 5.053776198637306e-06, "loss": 1.309, "step": 45610 }, { "epoch": 1.3449800973020787, "grad_norm": 2.9206629796871133, "learning_rate": 5.053173033030707e-06, "loss": 1.3418, "step": 45615 }, { "epoch": 1.3451275246940881, "grad_norm": 2.8785853466188516, "learning_rate": 5.0525698290767525e-06, "loss": 1.3244, "step": 45620 }, { "epoch": 1.3452749520860976, "grad_norm": 2.8749112421937926, "learning_rate": 5.051966586793194e-06, "loss": 1.3182, "step": 45625 }, { "epoch": 1.345422379478107, "grad_norm": 2.829828057023533, "learning_rate": 5.051363306197782e-06, "loss": 1.341, "step": 45630 }, { "epoch": 1.3455698068701165, "grad_norm": 2.895574981447669, "learning_rate": 5.050759987308269e-06, "loss": 1.3479, "step": 45635 }, { "epoch": 1.345717234262126, "grad_norm": 2.808704392425278, "learning_rate": 5.050156630142407e-06, "loss": 1.2798, "step": 45640 }, { "epoch": 1.3458646616541352, "grad_norm": 2.899042225091617, "learning_rate": 5.049553234717952e-06, "loss": 1.3409, "step": 45645 }, { "epoch": 1.3460120890461447, "grad_norm": 2.8592618061395547, "learning_rate": 5.048949801052659e-06, "loss": 1.315, "step": 45650 }, { "epoch": 1.3461595164381541, "grad_norm": 2.8741825327488333, "learning_rate": 5.048346329164286e-06, "loss": 1.3265, "step": 45655 }, { "epoch": 1.3463069438301636, "grad_norm": 2.8168442754060066, "learning_rate": 5.047742819070589e-06, "loss": 1.2684, "step": 45660 }, { "epoch": 1.346454371222173, "grad_norm": 2.9431978352267723, "learning_rate": 5.047139270789327e-06, "loss": 1.3035, "step": 45665 }, { "epoch": 1.3466017986141825, "grad_norm": 2.9879342662655466, "learning_rate": 5.046535684338261e-06, "loss": 1.2973, "step": 45670 }, { "epoch": 1.346749226006192, "grad_norm": 3.050454975033436, "learning_rate": 5.045932059735153e-06, "loss": 1.3447, "step": 45675 }, { "epoch": 1.3468966533982014, "grad_norm": 2.957936335528445, "learning_rate": 5.045328396997764e-06, "loss": 1.3564, "step": 45680 }, { "epoch": 1.3470440807902109, "grad_norm": 2.907502010939188, "learning_rate": 5.044724696143857e-06, "loss": 1.2763, "step": 45685 }, { "epoch": 1.3471915081822203, "grad_norm": 3.0886253856878176, "learning_rate": 5.0441209571911984e-06, "loss": 1.3258, "step": 45690 }, { "epoch": 1.3473389355742298, "grad_norm": 2.854454068133743, "learning_rate": 5.043517180157552e-06, "loss": 1.3118, "step": 45695 }, { "epoch": 1.3474863629662392, "grad_norm": 2.9971871392103577, "learning_rate": 5.042913365060685e-06, "loss": 1.3298, "step": 45700 }, { "epoch": 1.3476337903582485, "grad_norm": 3.0129059249319456, "learning_rate": 5.042309511918368e-06, "loss": 1.3335, "step": 45705 }, { "epoch": 1.347781217750258, "grad_norm": 2.8088203812816954, "learning_rate": 5.041705620748366e-06, "loss": 1.3011, "step": 45710 }, { "epoch": 1.3479286451422674, "grad_norm": 2.9790294239177704, "learning_rate": 5.0411016915684525e-06, "loss": 1.3233, "step": 45715 }, { "epoch": 1.3480760725342769, "grad_norm": 2.757079160178326, "learning_rate": 5.040497724396396e-06, "loss": 1.3077, "step": 45720 }, { "epoch": 1.3482234999262863, "grad_norm": 2.860549045433948, "learning_rate": 5.039893719249972e-06, "loss": 1.285, "step": 45725 }, { "epoch": 1.3483709273182958, "grad_norm": 2.9478451658178733, "learning_rate": 5.039289676146951e-06, "loss": 1.3267, "step": 45730 }, { "epoch": 1.3485183547103052, "grad_norm": 2.8370951626447867, "learning_rate": 5.03868559510511e-06, "loss": 1.3025, "step": 45735 }, { "epoch": 1.3486657821023147, "grad_norm": 2.930545940041504, "learning_rate": 5.038081476142225e-06, "loss": 1.3483, "step": 45740 }, { "epoch": 1.348813209494324, "grad_norm": 2.925949185828722, "learning_rate": 5.037477319276069e-06, "loss": 1.2853, "step": 45745 }, { "epoch": 1.3489606368863334, "grad_norm": 2.7524607495735265, "learning_rate": 5.036873124524425e-06, "loss": 1.2857, "step": 45750 }, { "epoch": 1.3491080642783428, "grad_norm": 2.996614626806384, "learning_rate": 5.036268891905067e-06, "loss": 1.3149, "step": 45755 }, { "epoch": 1.3492554916703523, "grad_norm": 2.6917345297750677, "learning_rate": 5.035664621435781e-06, "loss": 1.2658, "step": 45760 }, { "epoch": 1.3494029190623618, "grad_norm": 2.7667623365499514, "learning_rate": 5.0350603131343435e-06, "loss": 1.316, "step": 45765 }, { "epoch": 1.3495503464543712, "grad_norm": 2.7588185661521187, "learning_rate": 5.03445596701854e-06, "loss": 1.2831, "step": 45770 }, { "epoch": 1.3496977738463807, "grad_norm": 2.818502951844824, "learning_rate": 5.033851583106152e-06, "loss": 1.3145, "step": 45775 }, { "epoch": 1.3498452012383901, "grad_norm": 2.983843757747358, "learning_rate": 5.033247161414966e-06, "loss": 1.3099, "step": 45780 }, { "epoch": 1.3499926286303996, "grad_norm": 2.9538348748107905, "learning_rate": 5.032642701962768e-06, "loss": 1.3109, "step": 45785 }, { "epoch": 1.350140056022409, "grad_norm": 2.8232048283799163, "learning_rate": 5.032038204767342e-06, "loss": 1.3376, "step": 45790 }, { "epoch": 1.3502874834144185, "grad_norm": 2.9152255295059457, "learning_rate": 5.031433669846479e-06, "loss": 1.3425, "step": 45795 }, { "epoch": 1.350434910806428, "grad_norm": 2.9490110672169694, "learning_rate": 5.030829097217966e-06, "loss": 1.3161, "step": 45800 }, { "epoch": 1.3505823381984372, "grad_norm": 2.7523501920638553, "learning_rate": 5.030224486899596e-06, "loss": 1.2823, "step": 45805 }, { "epoch": 1.3507297655904467, "grad_norm": 2.82390268808695, "learning_rate": 5.029619838909158e-06, "loss": 1.3451, "step": 45810 }, { "epoch": 1.3508771929824561, "grad_norm": 2.921294924473959, "learning_rate": 5.029015153264444e-06, "loss": 1.3533, "step": 45815 }, { "epoch": 1.3510246203744656, "grad_norm": 2.8166401887939303, "learning_rate": 5.028410429983249e-06, "loss": 1.3415, "step": 45820 }, { "epoch": 1.351172047766475, "grad_norm": 2.954901341203725, "learning_rate": 5.027805669083367e-06, "loss": 1.3062, "step": 45825 }, { "epoch": 1.3513194751584845, "grad_norm": 2.854972662368119, "learning_rate": 5.027200870582595e-06, "loss": 1.3457, "step": 45830 }, { "epoch": 1.351466902550494, "grad_norm": 2.873838405428172, "learning_rate": 5.026596034498729e-06, "loss": 1.2935, "step": 45835 }, { "epoch": 1.3516143299425032, "grad_norm": 2.666960779446997, "learning_rate": 5.025991160849566e-06, "loss": 1.2999, "step": 45840 }, { "epoch": 1.3517617573345126, "grad_norm": 2.859650336733124, "learning_rate": 5.025386249652906e-06, "loss": 1.3938, "step": 45845 }, { "epoch": 1.351909184726522, "grad_norm": 2.940215793118805, "learning_rate": 5.02478130092655e-06, "loss": 1.2749, "step": 45850 }, { "epoch": 1.3520566121185316, "grad_norm": 2.8843327292576895, "learning_rate": 5.0241763146882985e-06, "loss": 1.3155, "step": 45855 }, { "epoch": 1.352204039510541, "grad_norm": 2.8773996411312406, "learning_rate": 5.023571290955954e-06, "loss": 1.3444, "step": 45860 }, { "epoch": 1.3523514669025505, "grad_norm": 2.9865110327379627, "learning_rate": 5.0229662297473205e-06, "loss": 1.2918, "step": 45865 }, { "epoch": 1.35249889429456, "grad_norm": 2.8496676434016437, "learning_rate": 5.0223611310802014e-06, "loss": 1.3164, "step": 45870 }, { "epoch": 1.3526463216865694, "grad_norm": 2.8543207073297783, "learning_rate": 5.021755994972403e-06, "loss": 1.307, "step": 45875 }, { "epoch": 1.3527937490785789, "grad_norm": 2.9321695941822288, "learning_rate": 5.0211508214417324e-06, "loss": 1.2973, "step": 45880 }, { "epoch": 1.3529411764705883, "grad_norm": 2.851425071768335, "learning_rate": 5.020545610505998e-06, "loss": 1.3065, "step": 45885 }, { "epoch": 1.3530886038625978, "grad_norm": 2.795695782797966, "learning_rate": 5.019940362183008e-06, "loss": 1.3119, "step": 45890 }, { "epoch": 1.3532360312546072, "grad_norm": 2.994338351014764, "learning_rate": 5.019335076490573e-06, "loss": 1.3097, "step": 45895 }, { "epoch": 1.3533834586466165, "grad_norm": 2.859945164401408, "learning_rate": 5.018729753446502e-06, "loss": 1.3227, "step": 45900 }, { "epoch": 1.353530886038626, "grad_norm": 3.1136701249214074, "learning_rate": 5.018124393068611e-06, "loss": 1.3848, "step": 45905 }, { "epoch": 1.3536783134306354, "grad_norm": 2.9941601393065853, "learning_rate": 5.0175189953747115e-06, "loss": 1.3298, "step": 45910 }, { "epoch": 1.3538257408226448, "grad_norm": 2.8417474473391775, "learning_rate": 5.016913560382619e-06, "loss": 1.3503, "step": 45915 }, { "epoch": 1.3539731682146543, "grad_norm": 2.5784027821745443, "learning_rate": 5.016308088110146e-06, "loss": 1.2834, "step": 45920 }, { "epoch": 1.3541205956066638, "grad_norm": 3.008708927373648, "learning_rate": 5.015702578575113e-06, "loss": 1.2905, "step": 45925 }, { "epoch": 1.3542680229986732, "grad_norm": 2.961964626504604, "learning_rate": 5.0150970317953355e-06, "loss": 1.3089, "step": 45930 }, { "epoch": 1.3544154503906825, "grad_norm": 2.8983705762098912, "learning_rate": 5.014491447788635e-06, "loss": 1.2774, "step": 45935 }, { "epoch": 1.354562877782692, "grad_norm": 3.0018030230725916, "learning_rate": 5.013885826572829e-06, "loss": 1.3436, "step": 45940 }, { "epoch": 1.3547103051747014, "grad_norm": 2.9628507020928723, "learning_rate": 5.013280168165739e-06, "loss": 1.2909, "step": 45945 }, { "epoch": 1.3548577325667108, "grad_norm": 2.888407847948296, "learning_rate": 5.012674472585188e-06, "loss": 1.3472, "step": 45950 }, { "epoch": 1.3550051599587203, "grad_norm": 2.8714141160506457, "learning_rate": 5.012068739848997e-06, "loss": 1.3373, "step": 45955 }, { "epoch": 1.3551525873507297, "grad_norm": 2.848799569190459, "learning_rate": 5.011462969974994e-06, "loss": 1.2971, "step": 45960 }, { "epoch": 1.3553000147427392, "grad_norm": 2.9761165270567305, "learning_rate": 5.010857162981004e-06, "loss": 1.3388, "step": 45965 }, { "epoch": 1.3554474421347487, "grad_norm": 2.8942986583195838, "learning_rate": 5.0102513188848495e-06, "loss": 1.3138, "step": 45970 }, { "epoch": 1.3555948695267581, "grad_norm": 2.832612668278388, "learning_rate": 5.009645437704362e-06, "loss": 1.2975, "step": 45975 }, { "epoch": 1.3557422969187676, "grad_norm": 2.8330801377084844, "learning_rate": 5.009039519457369e-06, "loss": 1.3404, "step": 45980 }, { "epoch": 1.355889724310777, "grad_norm": 2.7801512674535385, "learning_rate": 5.0084335641617e-06, "loss": 1.3095, "step": 45985 }, { "epoch": 1.3560371517027865, "grad_norm": 2.8670132121044194, "learning_rate": 5.007827571835187e-06, "loss": 1.2784, "step": 45990 }, { "epoch": 1.356184579094796, "grad_norm": 3.23505183317418, "learning_rate": 5.007221542495662e-06, "loss": 1.3015, "step": 45995 }, { "epoch": 1.3563320064868052, "grad_norm": 2.9451209954943565, "learning_rate": 5.006615476160957e-06, "loss": 1.2566, "step": 46000 }, { "epoch": 1.3563320064868052, "eval_loss": 1.107494592666626, "eval_runtime": 4.2977, "eval_samples_per_second": 92.142, "eval_steps_per_second": 3.025, "step": 46000 }, { "epoch": 1.3564794338788146, "grad_norm": 2.845040011399067, "learning_rate": 5.006009372848906e-06, "loss": 1.2791, "step": 46005 }, { "epoch": 1.356626861270824, "grad_norm": 3.0033687138662266, "learning_rate": 5.0054032325773455e-06, "loss": 1.3389, "step": 46010 }, { "epoch": 1.3567742886628336, "grad_norm": 2.724847261407768, "learning_rate": 5.0047970553641114e-06, "loss": 1.317, "step": 46015 }, { "epoch": 1.356921716054843, "grad_norm": 3.010347838922852, "learning_rate": 5.004190841227042e-06, "loss": 1.3632, "step": 46020 }, { "epoch": 1.3570691434468525, "grad_norm": 2.885748639904082, "learning_rate": 5.003584590183974e-06, "loss": 1.2926, "step": 46025 }, { "epoch": 1.357216570838862, "grad_norm": 2.9566621649332756, "learning_rate": 5.0029783022527475e-06, "loss": 1.3057, "step": 46030 }, { "epoch": 1.3573639982308712, "grad_norm": 2.7807947649384603, "learning_rate": 5.002371977451204e-06, "loss": 1.3043, "step": 46035 }, { "epoch": 1.3575114256228806, "grad_norm": 2.8855278361005543, "learning_rate": 5.001765615797185e-06, "loss": 1.3271, "step": 46040 }, { "epoch": 1.35765885301489, "grad_norm": 2.9518557830857497, "learning_rate": 5.0011592173085346e-06, "loss": 1.3193, "step": 46045 }, { "epoch": 1.3578062804068995, "grad_norm": 2.798051869841356, "learning_rate": 5.000552782003094e-06, "loss": 1.3756, "step": 46050 }, { "epoch": 1.357953707798909, "grad_norm": 2.824684737820835, "learning_rate": 4.99994630989871e-06, "loss": 1.3124, "step": 46055 }, { "epoch": 1.3581011351909185, "grad_norm": 2.879359540716515, "learning_rate": 4.9993398010132285e-06, "loss": 1.2819, "step": 46060 }, { "epoch": 1.358248562582928, "grad_norm": 2.7746974111915605, "learning_rate": 4.998733255364499e-06, "loss": 1.3114, "step": 46065 }, { "epoch": 1.3583959899749374, "grad_norm": 2.832111308752927, "learning_rate": 4.998126672970366e-06, "loss": 1.278, "step": 46070 }, { "epoch": 1.3585434173669468, "grad_norm": 3.04611397711172, "learning_rate": 4.997520053848679e-06, "loss": 1.3494, "step": 46075 }, { "epoch": 1.3586908447589563, "grad_norm": 2.7881386315665417, "learning_rate": 4.996913398017291e-06, "loss": 1.2829, "step": 46080 }, { "epoch": 1.3588382721509658, "grad_norm": 2.920700179069054, "learning_rate": 4.996306705494053e-06, "loss": 1.3077, "step": 46085 }, { "epoch": 1.3589856995429752, "grad_norm": 2.902989698790448, "learning_rate": 4.995699976296817e-06, "loss": 1.3055, "step": 46090 }, { "epoch": 1.3591331269349844, "grad_norm": 2.917275205472087, "learning_rate": 4.9950932104434365e-06, "loss": 1.3198, "step": 46095 }, { "epoch": 1.359280554326994, "grad_norm": 2.8428573314018037, "learning_rate": 4.994486407951766e-06, "loss": 1.2614, "step": 46100 }, { "epoch": 1.3594279817190034, "grad_norm": 3.004162711251761, "learning_rate": 4.993879568839662e-06, "loss": 1.3725, "step": 46105 }, { "epoch": 1.3595754091110128, "grad_norm": 2.9334400283921935, "learning_rate": 4.9932726931249815e-06, "loss": 1.2912, "step": 46110 }, { "epoch": 1.3597228365030223, "grad_norm": 2.7458906096526268, "learning_rate": 4.992665780825582e-06, "loss": 1.3324, "step": 46115 }, { "epoch": 1.3598702638950317, "grad_norm": 2.8611174447152803, "learning_rate": 4.992058831959324e-06, "loss": 1.3222, "step": 46120 }, { "epoch": 1.3600176912870412, "grad_norm": 2.7383674977659838, "learning_rate": 4.991451846544067e-06, "loss": 1.3023, "step": 46125 }, { "epoch": 1.3601651186790504, "grad_norm": 2.999970883691177, "learning_rate": 4.9908448245976695e-06, "loss": 1.3399, "step": 46130 }, { "epoch": 1.36031254607106, "grad_norm": 2.873908278583792, "learning_rate": 4.990237766137999e-06, "loss": 1.3007, "step": 46135 }, { "epoch": 1.3604599734630693, "grad_norm": 2.789758239940424, "learning_rate": 4.989630671182914e-06, "loss": 1.3489, "step": 46140 }, { "epoch": 1.3606074008550788, "grad_norm": 2.8931331434780496, "learning_rate": 4.9890235397502806e-06, "loss": 1.3303, "step": 46145 }, { "epoch": 1.3607548282470883, "grad_norm": 2.870550911296668, "learning_rate": 4.988416371857966e-06, "loss": 1.2801, "step": 46150 }, { "epoch": 1.3609022556390977, "grad_norm": 2.860586265204895, "learning_rate": 4.9878091675238355e-06, "loss": 1.2883, "step": 46155 }, { "epoch": 1.3610496830311072, "grad_norm": 2.9389343630968003, "learning_rate": 4.987201926765757e-06, "loss": 1.3389, "step": 46160 }, { "epoch": 1.3611971104231166, "grad_norm": 2.8696477144383006, "learning_rate": 4.986594649601598e-06, "loss": 1.3435, "step": 46165 }, { "epoch": 1.361344537815126, "grad_norm": 2.902112244990029, "learning_rate": 4.9859873360492296e-06, "loss": 1.321, "step": 46170 }, { "epoch": 1.3614919652071356, "grad_norm": 2.9526773818475904, "learning_rate": 4.985379986126523e-06, "loss": 1.3065, "step": 46175 }, { "epoch": 1.361639392599145, "grad_norm": 2.8338151575205863, "learning_rate": 4.98477259985135e-06, "loss": 1.3139, "step": 46180 }, { "epoch": 1.3617868199911545, "grad_norm": 2.8269505244542694, "learning_rate": 4.984165177241582e-06, "loss": 1.2925, "step": 46185 }, { "epoch": 1.3619342473831637, "grad_norm": 2.860933927752816, "learning_rate": 4.983557718315095e-06, "loss": 1.3181, "step": 46190 }, { "epoch": 1.3620816747751732, "grad_norm": 2.873994569380294, "learning_rate": 4.982950223089764e-06, "loss": 1.3175, "step": 46195 }, { "epoch": 1.3622291021671826, "grad_norm": 2.8911325075736447, "learning_rate": 4.982342691583465e-06, "loss": 1.3197, "step": 46200 }, { "epoch": 1.362376529559192, "grad_norm": 2.8721618516844583, "learning_rate": 4.981735123814075e-06, "loss": 1.3398, "step": 46205 }, { "epoch": 1.3625239569512015, "grad_norm": 2.774218402037716, "learning_rate": 4.981127519799471e-06, "loss": 1.2739, "step": 46210 }, { "epoch": 1.362671384343211, "grad_norm": 2.900732358747824, "learning_rate": 4.980519879557535e-06, "loss": 1.278, "step": 46215 }, { "epoch": 1.3628188117352205, "grad_norm": 2.9954553461350644, "learning_rate": 4.979912203106146e-06, "loss": 1.3559, "step": 46220 }, { "epoch": 1.36296623912723, "grad_norm": 3.0047386491091994, "learning_rate": 4.979304490463187e-06, "loss": 1.3271, "step": 46225 }, { "epoch": 1.3631136665192392, "grad_norm": 2.795872601626595, "learning_rate": 4.978696741646539e-06, "loss": 1.316, "step": 46230 }, { "epoch": 1.3632610939112486, "grad_norm": 2.8699303696745986, "learning_rate": 4.978088956674087e-06, "loss": 1.3394, "step": 46235 }, { "epoch": 1.363408521303258, "grad_norm": 2.8652993089558683, "learning_rate": 4.977481135563713e-06, "loss": 1.3495, "step": 46240 }, { "epoch": 1.3635559486952675, "grad_norm": 2.9910608774692062, "learning_rate": 4.976873278333307e-06, "loss": 1.2906, "step": 46245 }, { "epoch": 1.363703376087277, "grad_norm": 2.9342173112729317, "learning_rate": 4.976265385000754e-06, "loss": 1.3488, "step": 46250 }, { "epoch": 1.3638508034792864, "grad_norm": 2.781684736283856, "learning_rate": 4.975657455583941e-06, "loss": 1.3061, "step": 46255 }, { "epoch": 1.363998230871296, "grad_norm": 2.876929842761453, "learning_rate": 4.9750494901007575e-06, "loss": 1.347, "step": 46260 }, { "epoch": 1.3641456582633054, "grad_norm": 2.9797344129518493, "learning_rate": 4.974441488569096e-06, "loss": 1.2929, "step": 46265 }, { "epoch": 1.3642930856553148, "grad_norm": 2.8173358988038424, "learning_rate": 4.973833451006843e-06, "loss": 1.3421, "step": 46270 }, { "epoch": 1.3644405130473243, "grad_norm": 2.834871944186562, "learning_rate": 4.973225377431895e-06, "loss": 1.3061, "step": 46275 }, { "epoch": 1.3645879404393337, "grad_norm": 2.8752932069695967, "learning_rate": 4.972617267862142e-06, "loss": 1.2725, "step": 46280 }, { "epoch": 1.3647353678313432, "grad_norm": 2.9309196440548426, "learning_rate": 4.97200912231548e-06, "loss": 1.335, "step": 46285 }, { "epoch": 1.3648827952233524, "grad_norm": 2.887856523087325, "learning_rate": 4.971400940809805e-06, "loss": 1.2932, "step": 46290 }, { "epoch": 1.3650302226153619, "grad_norm": 2.7704412713022575, "learning_rate": 4.970792723363012e-06, "loss": 1.3475, "step": 46295 }, { "epoch": 1.3651776500073713, "grad_norm": 3.0347172253700525, "learning_rate": 4.970184469992999e-06, "loss": 1.3071, "step": 46300 }, { "epoch": 1.3653250773993808, "grad_norm": 3.006202172938002, "learning_rate": 4.969576180717666e-06, "loss": 1.3044, "step": 46305 }, { "epoch": 1.3654725047913903, "grad_norm": 2.783280412327037, "learning_rate": 4.96896785555491e-06, "loss": 1.2942, "step": 46310 }, { "epoch": 1.3656199321833997, "grad_norm": 2.939217793682304, "learning_rate": 4.968359494522633e-06, "loss": 1.3599, "step": 46315 }, { "epoch": 1.3657673595754092, "grad_norm": 2.845754259527644, "learning_rate": 4.967751097638737e-06, "loss": 1.3263, "step": 46320 }, { "epoch": 1.3659147869674184, "grad_norm": 2.759103448581858, "learning_rate": 4.967142664921125e-06, "loss": 1.2944, "step": 46325 }, { "epoch": 1.3660622143594279, "grad_norm": 2.910978670769277, "learning_rate": 4.9665341963877e-06, "loss": 1.3418, "step": 46330 }, { "epoch": 1.3662096417514373, "grad_norm": 2.980825936204096, "learning_rate": 4.9659256920563664e-06, "loss": 1.3693, "step": 46335 }, { "epoch": 1.3663570691434468, "grad_norm": 2.873864334907011, "learning_rate": 4.965317151945031e-06, "loss": 1.3343, "step": 46340 }, { "epoch": 1.3665044965354562, "grad_norm": 2.9557844462055587, "learning_rate": 4.9647085760716e-06, "loss": 1.2984, "step": 46345 }, { "epoch": 1.3666519239274657, "grad_norm": 2.798596634256747, "learning_rate": 4.964099964453983e-06, "loss": 1.2771, "step": 46350 }, { "epoch": 1.3667993513194752, "grad_norm": 2.9662450205358226, "learning_rate": 4.9634913171100895e-06, "loss": 1.2696, "step": 46355 }, { "epoch": 1.3669467787114846, "grad_norm": 2.8164567987651186, "learning_rate": 4.962882634057827e-06, "loss": 1.2826, "step": 46360 }, { "epoch": 1.367094206103494, "grad_norm": 2.8078145264865104, "learning_rate": 4.9622739153151075e-06, "loss": 1.36, "step": 46365 }, { "epoch": 1.3672416334955035, "grad_norm": 2.819925628661547, "learning_rate": 4.961665160899844e-06, "loss": 1.3263, "step": 46370 }, { "epoch": 1.367389060887513, "grad_norm": 2.842003471218673, "learning_rate": 4.961056370829949e-06, "loss": 1.3292, "step": 46375 }, { "epoch": 1.3675364882795225, "grad_norm": 2.8312275823881956, "learning_rate": 4.96044754512334e-06, "loss": 1.3427, "step": 46380 }, { "epoch": 1.3676839156715317, "grad_norm": 2.8588290751234005, "learning_rate": 4.959838683797927e-06, "loss": 1.343, "step": 46385 }, { "epoch": 1.3678313430635411, "grad_norm": 2.9412167809719216, "learning_rate": 4.95922978687163e-06, "loss": 1.3308, "step": 46390 }, { "epoch": 1.3679787704555506, "grad_norm": 2.9376971044978166, "learning_rate": 4.9586208543623665e-06, "loss": 1.3088, "step": 46395 }, { "epoch": 1.36812619784756, "grad_norm": 2.820220801227646, "learning_rate": 4.958011886288054e-06, "loss": 1.3335, "step": 46400 }, { "epoch": 1.3682736252395695, "grad_norm": 2.873095304138027, "learning_rate": 4.957402882666612e-06, "loss": 1.3444, "step": 46405 }, { "epoch": 1.368421052631579, "grad_norm": 2.952323305705568, "learning_rate": 4.956793843515961e-06, "loss": 1.3399, "step": 46410 }, { "epoch": 1.3685684800235884, "grad_norm": 2.840320608989097, "learning_rate": 4.956184768854024e-06, "loss": 1.3466, "step": 46415 }, { "epoch": 1.368715907415598, "grad_norm": 3.0180535997059263, "learning_rate": 4.955575658698723e-06, "loss": 1.3466, "step": 46420 }, { "epoch": 1.3688633348076071, "grad_norm": 2.8640714050059297, "learning_rate": 4.954966513067981e-06, "loss": 1.3697, "step": 46425 }, { "epoch": 1.3690107621996166, "grad_norm": 2.906055288452432, "learning_rate": 4.954357331979723e-06, "loss": 1.307, "step": 46430 }, { "epoch": 1.369158189591626, "grad_norm": 2.865862943872635, "learning_rate": 4.953748115451877e-06, "loss": 1.2889, "step": 46435 }, { "epoch": 1.3693056169836355, "grad_norm": 2.8905056738751607, "learning_rate": 4.953138863502366e-06, "loss": 1.2912, "step": 46440 }, { "epoch": 1.369453044375645, "grad_norm": 2.835993265228536, "learning_rate": 4.952529576149121e-06, "loss": 1.347, "step": 46445 }, { "epoch": 1.3696004717676544, "grad_norm": 2.923919295037303, "learning_rate": 4.95192025341007e-06, "loss": 1.2994, "step": 46450 }, { "epoch": 1.3697478991596639, "grad_norm": 2.8839934512133762, "learning_rate": 4.951310895303144e-06, "loss": 1.3039, "step": 46455 }, { "epoch": 1.3698953265516733, "grad_norm": 2.955477622052868, "learning_rate": 4.950701501846273e-06, "loss": 1.3563, "step": 46460 }, { "epoch": 1.3700427539436828, "grad_norm": 2.9086863595889594, "learning_rate": 4.950092073057389e-06, "loss": 1.3424, "step": 46465 }, { "epoch": 1.3701901813356923, "grad_norm": 2.752630123912814, "learning_rate": 4.949482608954425e-06, "loss": 1.2498, "step": 46470 }, { "epoch": 1.3703376087277017, "grad_norm": 2.7642237132596192, "learning_rate": 4.9488731095553156e-06, "loss": 1.307, "step": 46475 }, { "epoch": 1.3704850361197112, "grad_norm": 2.96532528033441, "learning_rate": 4.948263574877995e-06, "loss": 1.3344, "step": 46480 }, { "epoch": 1.3706324635117204, "grad_norm": 2.724962675929582, "learning_rate": 4.947654004940403e-06, "loss": 1.2779, "step": 46485 }, { "epoch": 1.3707798909037299, "grad_norm": 2.9864993144773138, "learning_rate": 4.947044399760472e-06, "loss": 1.3646, "step": 46490 }, { "epoch": 1.3709273182957393, "grad_norm": 2.8430514765777173, "learning_rate": 4.946434759356144e-06, "loss": 1.2764, "step": 46495 }, { "epoch": 1.3710747456877488, "grad_norm": 3.0207545479601836, "learning_rate": 4.945825083745356e-06, "loss": 1.3671, "step": 46500 }, { "epoch": 1.3710747456877488, "eval_loss": 1.1070607900619507, "eval_runtime": 4.158, "eval_samples_per_second": 95.238, "eval_steps_per_second": 3.126, "step": 46500 }, { "epoch": 1.3712221730797582, "grad_norm": 2.874221420174769, "learning_rate": 4.945215372946049e-06, "loss": 1.3116, "step": 46505 }, { "epoch": 1.3713696004717677, "grad_norm": 2.8717307398048306, "learning_rate": 4.944605626976167e-06, "loss": 1.3836, "step": 46510 }, { "epoch": 1.3715170278637772, "grad_norm": 2.9241847773863285, "learning_rate": 4.943995845853649e-06, "loss": 1.3712, "step": 46515 }, { "epoch": 1.3716644552557864, "grad_norm": 2.824951372357124, "learning_rate": 4.94338602959644e-06, "loss": 1.2996, "step": 46520 }, { "epoch": 1.3718118826477959, "grad_norm": 2.934281686042076, "learning_rate": 4.942776178222485e-06, "loss": 1.3426, "step": 46525 }, { "epoch": 1.3719593100398053, "grad_norm": 2.962116876606557, "learning_rate": 4.942166291749727e-06, "loss": 1.3401, "step": 46530 }, { "epoch": 1.3721067374318148, "grad_norm": 2.8259931838805095, "learning_rate": 4.941556370196116e-06, "loss": 1.344, "step": 46535 }, { "epoch": 1.3722541648238242, "grad_norm": 2.6780334352360424, "learning_rate": 4.9409464135795985e-06, "loss": 1.3427, "step": 46540 }, { "epoch": 1.3724015922158337, "grad_norm": 2.937144050345824, "learning_rate": 4.9403364219181225e-06, "loss": 1.3146, "step": 46545 }, { "epoch": 1.3725490196078431, "grad_norm": 3.0460083022759736, "learning_rate": 4.939726395229638e-06, "loss": 1.3497, "step": 46550 }, { "epoch": 1.3726964469998526, "grad_norm": 2.890350322425675, "learning_rate": 4.939116333532095e-06, "loss": 1.3399, "step": 46555 }, { "epoch": 1.372843874391862, "grad_norm": 2.932338541499596, "learning_rate": 4.938506236843448e-06, "loss": 1.3182, "step": 46560 }, { "epoch": 1.3729913017838715, "grad_norm": 3.04183331501549, "learning_rate": 4.9378961051816466e-06, "loss": 1.3762, "step": 46565 }, { "epoch": 1.373138729175881, "grad_norm": 2.891229539452952, "learning_rate": 4.9372859385646455e-06, "loss": 1.3634, "step": 46570 }, { "epoch": 1.3732861565678904, "grad_norm": 2.8265019387311114, "learning_rate": 4.9366757370104e-06, "loss": 1.3114, "step": 46575 }, { "epoch": 1.3734335839598997, "grad_norm": 2.927609050881612, "learning_rate": 4.936065500536866e-06, "loss": 1.3051, "step": 46580 }, { "epoch": 1.3735810113519091, "grad_norm": 2.7802739735269495, "learning_rate": 4.935455229162001e-06, "loss": 1.2877, "step": 46585 }, { "epoch": 1.3737284387439186, "grad_norm": 3.1860621904800657, "learning_rate": 4.934844922903763e-06, "loss": 1.3326, "step": 46590 }, { "epoch": 1.373875866135928, "grad_norm": 2.815733780966785, "learning_rate": 4.934234581780109e-06, "loss": 1.2669, "step": 46595 }, { "epoch": 1.3740232935279375, "grad_norm": 2.8692289382100444, "learning_rate": 4.933624205809e-06, "loss": 1.3071, "step": 46600 }, { "epoch": 1.374170720919947, "grad_norm": 2.926905508522813, "learning_rate": 4.933013795008396e-06, "loss": 1.3265, "step": 46605 }, { "epoch": 1.3743181483119564, "grad_norm": 3.031302816839284, "learning_rate": 4.932403349396262e-06, "loss": 1.2992, "step": 46610 }, { "epoch": 1.3744655757039657, "grad_norm": 2.8044484273181323, "learning_rate": 4.931792868990559e-06, "loss": 1.359, "step": 46615 }, { "epoch": 1.3746130030959751, "grad_norm": 2.918935423878895, "learning_rate": 4.931182353809251e-06, "loss": 1.2841, "step": 46620 }, { "epoch": 1.3747604304879846, "grad_norm": 2.8194035821889645, "learning_rate": 4.930571803870304e-06, "loss": 1.3135, "step": 46625 }, { "epoch": 1.374907857879994, "grad_norm": 2.9484536785943622, "learning_rate": 4.929961219191683e-06, "loss": 1.3032, "step": 46630 }, { "epoch": 1.3750552852720035, "grad_norm": 2.8615158602849156, "learning_rate": 4.929350599791356e-06, "loss": 1.3163, "step": 46635 }, { "epoch": 1.375202712664013, "grad_norm": 2.826648659637051, "learning_rate": 4.928739945687291e-06, "loss": 1.2964, "step": 46640 }, { "epoch": 1.3753501400560224, "grad_norm": 2.7729762135221963, "learning_rate": 4.928129256897457e-06, "loss": 1.3538, "step": 46645 }, { "epoch": 1.3754975674480319, "grad_norm": 2.860489069912048, "learning_rate": 4.927518533439824e-06, "loss": 1.3131, "step": 46650 }, { "epoch": 1.3756449948400413, "grad_norm": 2.8869912787718754, "learning_rate": 4.926907775332364e-06, "loss": 1.2996, "step": 46655 }, { "epoch": 1.3757924222320508, "grad_norm": 3.076629991257726, "learning_rate": 4.926296982593049e-06, "loss": 1.3488, "step": 46660 }, { "epoch": 1.3759398496240602, "grad_norm": 2.9836361134780938, "learning_rate": 4.925686155239852e-06, "loss": 1.3457, "step": 46665 }, { "epoch": 1.3760872770160697, "grad_norm": 2.8126810159443987, "learning_rate": 4.925075293290748e-06, "loss": 1.3206, "step": 46670 }, { "epoch": 1.3762347044080792, "grad_norm": 2.8820209863323303, "learning_rate": 4.92446439676371e-06, "loss": 1.2855, "step": 46675 }, { "epoch": 1.3763821318000884, "grad_norm": 2.819386686992266, "learning_rate": 4.923853465676717e-06, "loss": 1.3243, "step": 46680 }, { "epoch": 1.3765295591920979, "grad_norm": 2.9245791579602023, "learning_rate": 4.923242500047746e-06, "loss": 1.3241, "step": 46685 }, { "epoch": 1.3766769865841073, "grad_norm": 2.8213750645239934, "learning_rate": 4.9226314998947735e-06, "loss": 1.3034, "step": 46690 }, { "epoch": 1.3768244139761168, "grad_norm": 2.766925242011601, "learning_rate": 4.9220204652357814e-06, "loss": 1.332, "step": 46695 }, { "epoch": 1.3769718413681262, "grad_norm": 2.8560967659419, "learning_rate": 4.921409396088748e-06, "loss": 1.2973, "step": 46700 }, { "epoch": 1.3771192687601357, "grad_norm": 2.737957485899129, "learning_rate": 4.920798292471656e-06, "loss": 1.3025, "step": 46705 }, { "epoch": 1.3772666961521451, "grad_norm": 2.6705561970676714, "learning_rate": 4.920187154402486e-06, "loss": 1.3597, "step": 46710 }, { "epoch": 1.3774141235441544, "grad_norm": 2.884486815712554, "learning_rate": 4.919575981899225e-06, "loss": 1.3053, "step": 46715 }, { "epoch": 1.3775615509361638, "grad_norm": 2.8816281013229954, "learning_rate": 4.918964774979854e-06, "loss": 1.3548, "step": 46720 }, { "epoch": 1.3777089783281733, "grad_norm": 3.072746213984869, "learning_rate": 4.918353533662359e-06, "loss": 1.3217, "step": 46725 }, { "epoch": 1.3778564057201828, "grad_norm": 2.8538794570781807, "learning_rate": 4.917742257964728e-06, "loss": 1.3208, "step": 46730 }, { "epoch": 1.3780038331121922, "grad_norm": 2.8827165769645133, "learning_rate": 4.917130947904947e-06, "loss": 1.3317, "step": 46735 }, { "epoch": 1.3781512605042017, "grad_norm": 2.8517965246518684, "learning_rate": 4.916519603501006e-06, "loss": 1.3418, "step": 46740 }, { "epoch": 1.3782986878962111, "grad_norm": 2.8818603646751835, "learning_rate": 4.915908224770893e-06, "loss": 1.3056, "step": 46745 }, { "epoch": 1.3784461152882206, "grad_norm": 2.7191442878408965, "learning_rate": 4.915296811732599e-06, "loss": 1.2955, "step": 46750 }, { "epoch": 1.37859354268023, "grad_norm": 2.8439259331646523, "learning_rate": 4.9146853644041164e-06, "loss": 1.3851, "step": 46755 }, { "epoch": 1.3787409700722395, "grad_norm": 2.7837502548958866, "learning_rate": 4.914073882803436e-06, "loss": 1.3276, "step": 46760 }, { "epoch": 1.378888397464249, "grad_norm": 2.748202162322573, "learning_rate": 4.913462366948553e-06, "loss": 1.3363, "step": 46765 }, { "epoch": 1.3790358248562584, "grad_norm": 2.880556502411328, "learning_rate": 4.912850816857462e-06, "loss": 1.3196, "step": 46770 }, { "epoch": 1.3791832522482677, "grad_norm": 2.6336663374473273, "learning_rate": 4.912239232548157e-06, "loss": 1.2877, "step": 46775 }, { "epoch": 1.3793306796402771, "grad_norm": 2.7663516631258167, "learning_rate": 4.911627614038635e-06, "loss": 1.2935, "step": 46780 }, { "epoch": 1.3794781070322866, "grad_norm": 2.846937569630585, "learning_rate": 4.911015961346895e-06, "loss": 1.3447, "step": 46785 }, { "epoch": 1.379625534424296, "grad_norm": 2.899277781486231, "learning_rate": 4.910404274490934e-06, "loss": 1.2631, "step": 46790 }, { "epoch": 1.3797729618163055, "grad_norm": 2.876449982337686, "learning_rate": 4.909792553488754e-06, "loss": 1.3188, "step": 46795 }, { "epoch": 1.379920389208315, "grad_norm": 2.8506929081866583, "learning_rate": 4.909180798358352e-06, "loss": 1.3109, "step": 46800 }, { "epoch": 1.3800678166003244, "grad_norm": 2.9935319637695987, "learning_rate": 4.908569009117731e-06, "loss": 1.2541, "step": 46805 }, { "epoch": 1.3802152439923336, "grad_norm": 2.9203091535724526, "learning_rate": 4.907957185784894e-06, "loss": 1.3622, "step": 46810 }, { "epoch": 1.380362671384343, "grad_norm": 2.8634435591310288, "learning_rate": 4.907345328377846e-06, "loss": 1.2985, "step": 46815 }, { "epoch": 1.3805100987763526, "grad_norm": 2.9557295451732166, "learning_rate": 4.906733436914589e-06, "loss": 1.3263, "step": 46820 }, { "epoch": 1.380657526168362, "grad_norm": 2.8720455932982603, "learning_rate": 4.90612151141313e-06, "loss": 1.3156, "step": 46825 }, { "epoch": 1.3808049535603715, "grad_norm": 3.008998674105567, "learning_rate": 4.905509551891475e-06, "loss": 1.2976, "step": 46830 }, { "epoch": 1.380952380952381, "grad_norm": 2.922603332601697, "learning_rate": 4.904897558367633e-06, "loss": 1.3095, "step": 46835 }, { "epoch": 1.3810998083443904, "grad_norm": 2.86137980411567, "learning_rate": 4.904285530859611e-06, "loss": 1.3158, "step": 46840 }, { "epoch": 1.3812472357363998, "grad_norm": 2.727694402247787, "learning_rate": 4.903673469385419e-06, "loss": 1.3255, "step": 46845 }, { "epoch": 1.3813946631284093, "grad_norm": 2.8381466510167574, "learning_rate": 4.903061373963068e-06, "loss": 1.2915, "step": 46850 }, { "epoch": 1.3815420905204188, "grad_norm": 2.9344658923252416, "learning_rate": 4.90244924461057e-06, "loss": 1.3253, "step": 46855 }, { "epoch": 1.3816895179124282, "grad_norm": 2.9235641874932528, "learning_rate": 4.901837081345935e-06, "loss": 1.3363, "step": 46860 }, { "epoch": 1.3818369453044377, "grad_norm": 3.0248835068029445, "learning_rate": 4.9012248841871805e-06, "loss": 1.3409, "step": 46865 }, { "epoch": 1.381984372696447, "grad_norm": 3.038388867379896, "learning_rate": 4.900612653152319e-06, "loss": 1.2908, "step": 46870 }, { "epoch": 1.3821318000884564, "grad_norm": 3.064292467769737, "learning_rate": 4.900000388259366e-06, "loss": 1.3213, "step": 46875 }, { "epoch": 1.3822792274804658, "grad_norm": 3.007619751713766, "learning_rate": 4.899388089526339e-06, "loss": 1.361, "step": 46880 }, { "epoch": 1.3824266548724753, "grad_norm": 2.927060323246237, "learning_rate": 4.898775756971255e-06, "loss": 1.3022, "step": 46885 }, { "epoch": 1.3825740822644847, "grad_norm": 2.9596976448499994, "learning_rate": 4.898163390612132e-06, "loss": 1.3257, "step": 46890 }, { "epoch": 1.3827215096564942, "grad_norm": 2.7802472273602454, "learning_rate": 4.897550990466989e-06, "loss": 1.2939, "step": 46895 }, { "epoch": 1.3828689370485037, "grad_norm": 2.897161276871869, "learning_rate": 4.89693855655385e-06, "loss": 1.3012, "step": 46900 }, { "epoch": 1.3830163644405131, "grad_norm": 2.78853115589335, "learning_rate": 4.896326088890733e-06, "loss": 1.253, "step": 46905 }, { "epoch": 1.3831637918325224, "grad_norm": 2.912460426020029, "learning_rate": 4.895713587495662e-06, "loss": 1.3486, "step": 46910 }, { "epoch": 1.3833112192245318, "grad_norm": 2.8133639775677004, "learning_rate": 4.89510105238666e-06, "loss": 1.3085, "step": 46915 }, { "epoch": 1.3834586466165413, "grad_norm": 2.889924706410521, "learning_rate": 4.894488483581753e-06, "loss": 1.3153, "step": 46920 }, { "epoch": 1.3836060740085507, "grad_norm": 2.7526358726450506, "learning_rate": 4.8938758810989656e-06, "loss": 1.3115, "step": 46925 }, { "epoch": 1.3837535014005602, "grad_norm": 2.960130446387371, "learning_rate": 4.893263244956324e-06, "loss": 1.2726, "step": 46930 }, { "epoch": 1.3839009287925697, "grad_norm": 2.6617418800611787, "learning_rate": 4.892650575171855e-06, "loss": 1.2972, "step": 46935 }, { "epoch": 1.384048356184579, "grad_norm": 2.8619418301695574, "learning_rate": 4.892037871763589e-06, "loss": 1.3038, "step": 46940 }, { "epoch": 1.3841957835765886, "grad_norm": 2.8474141818970216, "learning_rate": 4.891425134749554e-06, "loss": 1.2947, "step": 46945 }, { "epoch": 1.384343210968598, "grad_norm": 2.912901465006617, "learning_rate": 4.89081236414778e-06, "loss": 1.3429, "step": 46950 }, { "epoch": 1.3844906383606075, "grad_norm": 2.8951589456294173, "learning_rate": 4.890199559976303e-06, "loss": 1.2782, "step": 46955 }, { "epoch": 1.384638065752617, "grad_norm": 2.924868018903416, "learning_rate": 4.889586722253149e-06, "loss": 1.3019, "step": 46960 }, { "epoch": 1.3847854931446264, "grad_norm": 2.7408277250924127, "learning_rate": 4.888973850996355e-06, "loss": 1.2988, "step": 46965 }, { "epoch": 1.3849329205366356, "grad_norm": 2.7459053246896286, "learning_rate": 4.888360946223954e-06, "loss": 1.3444, "step": 46970 }, { "epoch": 1.385080347928645, "grad_norm": 2.864892188519667, "learning_rate": 4.887748007953985e-06, "loss": 1.3231, "step": 46975 }, { "epoch": 1.3852277753206546, "grad_norm": 2.9053635395672073, "learning_rate": 4.8871350362044795e-06, "loss": 1.347, "step": 46980 }, { "epoch": 1.385375202712664, "grad_norm": 2.8460562562555487, "learning_rate": 4.8865220309934775e-06, "loss": 1.2925, "step": 46985 }, { "epoch": 1.3855226301046735, "grad_norm": 2.69396019137993, "learning_rate": 4.885908992339018e-06, "loss": 1.2568, "step": 46990 }, { "epoch": 1.385670057496683, "grad_norm": 3.242478270268019, "learning_rate": 4.885295920259138e-06, "loss": 1.3177, "step": 46995 }, { "epoch": 1.3858174848886924, "grad_norm": 2.870610383912792, "learning_rate": 4.884682814771879e-06, "loss": 1.2847, "step": 47000 }, { "epoch": 1.3858174848886924, "eval_loss": 1.104032278060913, "eval_runtime": 4.2636, "eval_samples_per_second": 92.879, "eval_steps_per_second": 3.049, "step": 47000 }, { "epoch": 1.3859649122807016, "grad_norm": 2.8475282537727917, "learning_rate": 4.884069675895284e-06, "loss": 1.2699, "step": 47005 }, { "epoch": 1.386112339672711, "grad_norm": 2.8836063902392612, "learning_rate": 4.8834565036473926e-06, "loss": 1.2695, "step": 47010 }, { "epoch": 1.3862597670647205, "grad_norm": 2.8521220847533675, "learning_rate": 4.882843298046249e-06, "loss": 1.3522, "step": 47015 }, { "epoch": 1.38640719445673, "grad_norm": 2.911168010441683, "learning_rate": 4.882230059109899e-06, "loss": 1.3053, "step": 47020 }, { "epoch": 1.3865546218487395, "grad_norm": 2.8934193227180245, "learning_rate": 4.8816167868563856e-06, "loss": 1.3204, "step": 47025 }, { "epoch": 1.386702049240749, "grad_norm": 2.9201177230444526, "learning_rate": 4.881003481303757e-06, "loss": 1.3357, "step": 47030 }, { "epoch": 1.3868494766327584, "grad_norm": 2.9678403482563254, "learning_rate": 4.880390142470058e-06, "loss": 1.3089, "step": 47035 }, { "epoch": 1.3869969040247678, "grad_norm": 2.8382898642986305, "learning_rate": 4.87977677037334e-06, "loss": 1.3359, "step": 47040 }, { "epoch": 1.3871443314167773, "grad_norm": 2.8100595874607412, "learning_rate": 4.87916336503165e-06, "loss": 1.2995, "step": 47045 }, { "epoch": 1.3872917588087867, "grad_norm": 2.9058650412950957, "learning_rate": 4.878549926463038e-06, "loss": 1.322, "step": 47050 }, { "epoch": 1.3874391862007962, "grad_norm": 2.962847684611645, "learning_rate": 4.877936454685556e-06, "loss": 1.3351, "step": 47055 }, { "epoch": 1.3875866135928057, "grad_norm": 2.9553332580953597, "learning_rate": 4.877322949717257e-06, "loss": 1.2976, "step": 47060 }, { "epoch": 1.387734040984815, "grad_norm": 2.7836425011077113, "learning_rate": 4.876709411576191e-06, "loss": 1.2983, "step": 47065 }, { "epoch": 1.3878814683768244, "grad_norm": 2.841497662760925, "learning_rate": 4.876095840280415e-06, "loss": 1.2528, "step": 47070 }, { "epoch": 1.3880288957688338, "grad_norm": 2.871802563166429, "learning_rate": 4.8754822358479835e-06, "loss": 1.3046, "step": 47075 }, { "epoch": 1.3881763231608433, "grad_norm": 2.9879383100997705, "learning_rate": 4.874868598296951e-06, "loss": 1.2971, "step": 47080 }, { "epoch": 1.3883237505528527, "grad_norm": 2.9047833925246342, "learning_rate": 4.874254927645376e-06, "loss": 1.3333, "step": 47085 }, { "epoch": 1.3884711779448622, "grad_norm": 3.0247079260307665, "learning_rate": 4.873641223911316e-06, "loss": 1.2962, "step": 47090 }, { "epoch": 1.3886186053368716, "grad_norm": 3.017356754015926, "learning_rate": 4.87302748711283e-06, "loss": 1.3571, "step": 47095 }, { "epoch": 1.388766032728881, "grad_norm": 2.6785138328273175, "learning_rate": 4.872413717267977e-06, "loss": 1.3177, "step": 47100 }, { "epoch": 1.3889134601208903, "grad_norm": 2.9339238520314708, "learning_rate": 4.871799914394821e-06, "loss": 1.271, "step": 47105 }, { "epoch": 1.3890608875128998, "grad_norm": 2.9045601331357, "learning_rate": 4.8711860785114204e-06, "loss": 1.3939, "step": 47110 }, { "epoch": 1.3892083149049093, "grad_norm": 2.938738454162061, "learning_rate": 4.870572209635838e-06, "loss": 1.2965, "step": 47115 }, { "epoch": 1.3893557422969187, "grad_norm": 2.974885728952278, "learning_rate": 4.869958307786139e-06, "loss": 1.3537, "step": 47120 }, { "epoch": 1.3895031696889282, "grad_norm": 2.8234201150879485, "learning_rate": 4.869344372980389e-06, "loss": 1.3681, "step": 47125 }, { "epoch": 1.3896505970809376, "grad_norm": 2.882901339789171, "learning_rate": 4.868730405236651e-06, "loss": 1.352, "step": 47130 }, { "epoch": 1.389798024472947, "grad_norm": 2.9513392696592105, "learning_rate": 4.868116404572996e-06, "loss": 1.3157, "step": 47135 }, { "epoch": 1.3899454518649565, "grad_norm": 2.8122667004959703, "learning_rate": 4.8675023710074876e-06, "loss": 1.3333, "step": 47140 }, { "epoch": 1.390092879256966, "grad_norm": 2.751715374096146, "learning_rate": 4.866888304558195e-06, "loss": 1.3147, "step": 47145 }, { "epoch": 1.3902403066489755, "grad_norm": 2.871736910029426, "learning_rate": 4.866274205243191e-06, "loss": 1.3144, "step": 47150 }, { "epoch": 1.390387734040985, "grad_norm": 2.831385727217346, "learning_rate": 4.865660073080541e-06, "loss": 1.3492, "step": 47155 }, { "epoch": 1.3905351614329944, "grad_norm": 2.7738166513352764, "learning_rate": 4.865045908088321e-06, "loss": 1.277, "step": 47160 }, { "epoch": 1.3906825888250036, "grad_norm": 2.961375851307891, "learning_rate": 4.864431710284602e-06, "loss": 1.3699, "step": 47165 }, { "epoch": 1.390830016217013, "grad_norm": 3.0595519050783335, "learning_rate": 4.8638174796874565e-06, "loss": 1.3599, "step": 47170 }, { "epoch": 1.3909774436090225, "grad_norm": 2.9407454059589164, "learning_rate": 4.863203216314961e-06, "loss": 1.3178, "step": 47175 }, { "epoch": 1.391124871001032, "grad_norm": 2.9666571193340783, "learning_rate": 4.862588920185189e-06, "loss": 1.3479, "step": 47180 }, { "epoch": 1.3912722983930415, "grad_norm": 2.8225429440960843, "learning_rate": 4.861974591316217e-06, "loss": 1.3108, "step": 47185 }, { "epoch": 1.391419725785051, "grad_norm": 2.823614966698713, "learning_rate": 4.861360229726124e-06, "loss": 1.2752, "step": 47190 }, { "epoch": 1.3915671531770604, "grad_norm": 3.021565542444658, "learning_rate": 4.860745835432987e-06, "loss": 1.3065, "step": 47195 }, { "epoch": 1.3917145805690696, "grad_norm": 2.837235661724485, "learning_rate": 4.860131408454885e-06, "loss": 1.3335, "step": 47200 }, { "epoch": 1.391862007961079, "grad_norm": 2.8789754959016287, "learning_rate": 4.859516948809899e-06, "loss": 1.3208, "step": 47205 }, { "epoch": 1.3920094353530885, "grad_norm": 2.9111721549767022, "learning_rate": 4.85890245651611e-06, "loss": 1.3089, "step": 47210 }, { "epoch": 1.392156862745098, "grad_norm": 2.8624198776049905, "learning_rate": 4.858287931591601e-06, "loss": 1.3195, "step": 47215 }, { "epoch": 1.3923042901371074, "grad_norm": 2.9405674252608875, "learning_rate": 4.857673374054452e-06, "loss": 1.3121, "step": 47220 }, { "epoch": 1.392451717529117, "grad_norm": 3.0011276268181373, "learning_rate": 4.857058783922749e-06, "loss": 1.3741, "step": 47225 }, { "epoch": 1.3925991449211264, "grad_norm": 2.9216135241486545, "learning_rate": 4.856444161214578e-06, "loss": 1.291, "step": 47230 }, { "epoch": 1.3927465723131358, "grad_norm": 2.810418707485387, "learning_rate": 4.8558295059480255e-06, "loss": 1.3535, "step": 47235 }, { "epoch": 1.3928939997051453, "grad_norm": 2.8509577701423794, "learning_rate": 4.855214818141176e-06, "loss": 1.3269, "step": 47240 }, { "epoch": 1.3930414270971547, "grad_norm": 2.875229734822099, "learning_rate": 4.854600097812118e-06, "loss": 1.244, "step": 47245 }, { "epoch": 1.3931888544891642, "grad_norm": 2.8477490863108454, "learning_rate": 4.853985344978942e-06, "loss": 1.3552, "step": 47250 }, { "epoch": 1.3933362818811736, "grad_norm": 2.818061373018432, "learning_rate": 4.853370559659735e-06, "loss": 1.3211, "step": 47255 }, { "epoch": 1.3934837092731829, "grad_norm": 2.8965465626142253, "learning_rate": 4.852755741872589e-06, "loss": 1.3218, "step": 47260 }, { "epoch": 1.3936311366651923, "grad_norm": 2.969690531107742, "learning_rate": 4.852140891635599e-06, "loss": 1.333, "step": 47265 }, { "epoch": 1.3937785640572018, "grad_norm": 2.712079562091674, "learning_rate": 4.851526008966852e-06, "loss": 1.3237, "step": 47270 }, { "epoch": 1.3939259914492113, "grad_norm": 2.8740777995136813, "learning_rate": 4.8509110938844455e-06, "loss": 1.3283, "step": 47275 }, { "epoch": 1.3940734188412207, "grad_norm": 2.889176296676245, "learning_rate": 4.850296146406473e-06, "loss": 1.2749, "step": 47280 }, { "epoch": 1.3942208462332302, "grad_norm": 2.882569372062728, "learning_rate": 4.849681166551028e-06, "loss": 1.3392, "step": 47285 }, { "epoch": 1.3943682736252396, "grad_norm": 2.7866143548904136, "learning_rate": 4.8490661543362125e-06, "loss": 1.3212, "step": 47290 }, { "epoch": 1.394515701017249, "grad_norm": 2.8612718694668544, "learning_rate": 4.848451109780118e-06, "loss": 1.3321, "step": 47295 }, { "epoch": 1.3946631284092583, "grad_norm": 2.9511730860185397, "learning_rate": 4.847836032900846e-06, "loss": 1.3283, "step": 47300 }, { "epoch": 1.3948105558012678, "grad_norm": 2.762452147955112, "learning_rate": 4.847220923716494e-06, "loss": 1.29, "step": 47305 }, { "epoch": 1.3949579831932772, "grad_norm": 2.8705710672718787, "learning_rate": 4.846605782245165e-06, "loss": 1.3324, "step": 47310 }, { "epoch": 1.3951054105852867, "grad_norm": 2.7802509188507996, "learning_rate": 4.845990608504958e-06, "loss": 1.3178, "step": 47315 }, { "epoch": 1.3952528379772962, "grad_norm": 3.018909811752282, "learning_rate": 4.845375402513977e-06, "loss": 1.3503, "step": 47320 }, { "epoch": 1.3954002653693056, "grad_norm": 2.8404936225314437, "learning_rate": 4.844760164290322e-06, "loss": 1.2912, "step": 47325 }, { "epoch": 1.395547692761315, "grad_norm": 2.868777761062509, "learning_rate": 4.8441448938521e-06, "loss": 1.3319, "step": 47330 }, { "epoch": 1.3956951201533245, "grad_norm": 2.895624219565747, "learning_rate": 4.843529591217415e-06, "loss": 1.3681, "step": 47335 }, { "epoch": 1.395842547545334, "grad_norm": 2.8493171100076027, "learning_rate": 4.842914256404373e-06, "loss": 1.3078, "step": 47340 }, { "epoch": 1.3959899749373434, "grad_norm": 2.94965786035599, "learning_rate": 4.842298889431082e-06, "loss": 1.3484, "step": 47345 }, { "epoch": 1.396137402329353, "grad_norm": 2.8501771596355527, "learning_rate": 4.841683490315648e-06, "loss": 1.3186, "step": 47350 }, { "epoch": 1.3962848297213624, "grad_norm": 2.8406881512248825, "learning_rate": 4.841068059076181e-06, "loss": 1.2822, "step": 47355 }, { "epoch": 1.3964322571133716, "grad_norm": 2.8875680112254587, "learning_rate": 4.84045259573079e-06, "loss": 1.3942, "step": 47360 }, { "epoch": 1.396579684505381, "grad_norm": 2.783666766948981, "learning_rate": 4.8398371002975865e-06, "loss": 1.3179, "step": 47365 }, { "epoch": 1.3967271118973905, "grad_norm": 2.8237510762350437, "learning_rate": 4.839221572794682e-06, "loss": 1.293, "step": 47370 }, { "epoch": 1.3968745392894, "grad_norm": 2.842131623661912, "learning_rate": 4.83860601324019e-06, "loss": 1.2652, "step": 47375 }, { "epoch": 1.3970219666814094, "grad_norm": 2.7107369913355304, "learning_rate": 4.837990421652222e-06, "loss": 1.3047, "step": 47380 }, { "epoch": 1.397169394073419, "grad_norm": 2.8184723665734945, "learning_rate": 4.837374798048893e-06, "loss": 1.3415, "step": 47385 }, { "epoch": 1.3973168214654283, "grad_norm": 2.8902180511099433, "learning_rate": 4.836759142448319e-06, "loss": 1.3158, "step": 47390 }, { "epoch": 1.3974642488574376, "grad_norm": 2.7690944962589747, "learning_rate": 4.836143454868617e-06, "loss": 1.2873, "step": 47395 }, { "epoch": 1.397611676249447, "grad_norm": 2.990911907636718, "learning_rate": 4.8355277353279034e-06, "loss": 1.2927, "step": 47400 }, { "epoch": 1.3977591036414565, "grad_norm": 2.7996901766508313, "learning_rate": 4.834911983844296e-06, "loss": 1.3534, "step": 47405 }, { "epoch": 1.397906531033466, "grad_norm": 2.870096895975167, "learning_rate": 4.834296200435915e-06, "loss": 1.2454, "step": 47410 }, { "epoch": 1.3980539584254754, "grad_norm": 2.863438210728458, "learning_rate": 4.83368038512088e-06, "loss": 1.3652, "step": 47415 }, { "epoch": 1.3982013858174849, "grad_norm": 3.0888247540567377, "learning_rate": 4.833064537917313e-06, "loss": 1.3787, "step": 47420 }, { "epoch": 1.3983488132094943, "grad_norm": 2.9438509207514185, "learning_rate": 4.832448658843335e-06, "loss": 1.3069, "step": 47425 }, { "epoch": 1.3984962406015038, "grad_norm": 2.9737687014569594, "learning_rate": 4.8318327479170685e-06, "loss": 1.3921, "step": 47430 }, { "epoch": 1.3986436679935133, "grad_norm": 2.9442890480641886, "learning_rate": 4.831216805156638e-06, "loss": 1.3433, "step": 47435 }, { "epoch": 1.3987910953855227, "grad_norm": 2.9138744016770755, "learning_rate": 4.83060083058017e-06, "loss": 1.2942, "step": 47440 }, { "epoch": 1.3989385227775322, "grad_norm": 2.7206059786618284, "learning_rate": 4.829984824205786e-06, "loss": 1.2807, "step": 47445 }, { "epoch": 1.3990859501695416, "grad_norm": 2.979987241833953, "learning_rate": 4.829368786051618e-06, "loss": 1.3085, "step": 47450 }, { "epoch": 1.3992333775615509, "grad_norm": 2.905106475328522, "learning_rate": 4.82875271613579e-06, "loss": 1.2919, "step": 47455 }, { "epoch": 1.3993808049535603, "grad_norm": 2.8952365902795556, "learning_rate": 4.828136614476431e-06, "loss": 1.3796, "step": 47460 }, { "epoch": 1.3995282323455698, "grad_norm": 2.8746554900890455, "learning_rate": 4.827520481091671e-06, "loss": 1.3014, "step": 47465 }, { "epoch": 1.3996756597375792, "grad_norm": 2.751225075096106, "learning_rate": 4.826904315999641e-06, "loss": 1.2858, "step": 47470 }, { "epoch": 1.3998230871295887, "grad_norm": 2.899437073407164, "learning_rate": 4.8262881192184715e-06, "loss": 1.3109, "step": 47475 }, { "epoch": 1.3999705145215982, "grad_norm": 2.971530474003192, "learning_rate": 4.825671890766295e-06, "loss": 1.3376, "step": 47480 }, { "epoch": 1.4001179419136076, "grad_norm": 2.8132797120094595, "learning_rate": 4.825055630661245e-06, "loss": 1.3442, "step": 47485 }, { "epoch": 1.4002653693056168, "grad_norm": 2.8411114233634915, "learning_rate": 4.824439338921456e-06, "loss": 1.3131, "step": 47490 }, { "epoch": 1.4004127966976263, "grad_norm": 2.743340597746729, "learning_rate": 4.823823015565063e-06, "loss": 1.2735, "step": 47495 }, { "epoch": 1.4005602240896358, "grad_norm": 2.820490689063923, "learning_rate": 4.8232066606102e-06, "loss": 1.3066, "step": 47500 }, { "epoch": 1.4005602240896358, "eval_loss": 1.103610873222351, "eval_runtime": 4.1898, "eval_samples_per_second": 94.514, "eval_steps_per_second": 3.103, "step": 47500 }, { "epoch": 1.4007076514816452, "grad_norm": 2.780484332851287, "learning_rate": 4.822590274075007e-06, "loss": 1.3069, "step": 47505 }, { "epoch": 1.4008550788736547, "grad_norm": 2.857131277442936, "learning_rate": 4.821973855977619e-06, "loss": 1.3227, "step": 47510 }, { "epoch": 1.4010025062656641, "grad_norm": 2.8572068764344727, "learning_rate": 4.821357406336176e-06, "loss": 1.2855, "step": 47515 }, { "epoch": 1.4011499336576736, "grad_norm": 2.898747085075696, "learning_rate": 4.820740925168819e-06, "loss": 1.2943, "step": 47520 }, { "epoch": 1.401297361049683, "grad_norm": 2.8210520769187633, "learning_rate": 4.820124412493688e-06, "loss": 1.3502, "step": 47525 }, { "epoch": 1.4014447884416925, "grad_norm": 2.8608685396330937, "learning_rate": 4.819507868328923e-06, "loss": 1.2931, "step": 47530 }, { "epoch": 1.401592215833702, "grad_norm": 2.8926320068788165, "learning_rate": 4.818891292692668e-06, "loss": 1.3465, "step": 47535 }, { "epoch": 1.4017396432257114, "grad_norm": 2.875362149781153, "learning_rate": 4.818274685603068e-06, "loss": 1.3067, "step": 47540 }, { "epoch": 1.4018870706177209, "grad_norm": 2.8183500322985116, "learning_rate": 4.817658047078263e-06, "loss": 1.2969, "step": 47545 }, { "epoch": 1.4020344980097303, "grad_norm": 2.6923763702128207, "learning_rate": 4.817041377136402e-06, "loss": 1.2962, "step": 47550 }, { "epoch": 1.4021819254017396, "grad_norm": 2.6584016579736516, "learning_rate": 4.816424675795631e-06, "loss": 1.2595, "step": 47555 }, { "epoch": 1.402329352793749, "grad_norm": 2.845346787739108, "learning_rate": 4.815807943074094e-06, "loss": 1.3214, "step": 47560 }, { "epoch": 1.4024767801857585, "grad_norm": 2.911706238247813, "learning_rate": 4.815191178989942e-06, "loss": 1.3002, "step": 47565 }, { "epoch": 1.402624207577768, "grad_norm": 2.893604528760042, "learning_rate": 4.814574383561324e-06, "loss": 1.3401, "step": 47570 }, { "epoch": 1.4027716349697774, "grad_norm": 2.8647939181266735, "learning_rate": 4.813957556806389e-06, "loss": 1.3438, "step": 47575 }, { "epoch": 1.4029190623617869, "grad_norm": 2.8637418001884676, "learning_rate": 4.813340698743288e-06, "loss": 1.3107, "step": 47580 }, { "epoch": 1.4030664897537963, "grad_norm": 2.7355677078443366, "learning_rate": 4.812723809390172e-06, "loss": 1.2967, "step": 47585 }, { "epoch": 1.4032139171458056, "grad_norm": 2.9666861596721352, "learning_rate": 4.812106888765196e-06, "loss": 1.3246, "step": 47590 }, { "epoch": 1.403361344537815, "grad_norm": 2.9866754482951334, "learning_rate": 4.81148993688651e-06, "loss": 1.3225, "step": 47595 }, { "epoch": 1.4035087719298245, "grad_norm": 2.8360636542815674, "learning_rate": 4.810872953772271e-06, "loss": 1.3009, "step": 47600 }, { "epoch": 1.403656199321834, "grad_norm": 2.996551164829152, "learning_rate": 4.810255939440634e-06, "loss": 1.3199, "step": 47605 }, { "epoch": 1.4038036267138434, "grad_norm": 2.891285371235697, "learning_rate": 4.809638893909756e-06, "loss": 1.3175, "step": 47610 }, { "epoch": 1.4039510541058529, "grad_norm": 2.8558041446333093, "learning_rate": 4.809021817197793e-06, "loss": 1.3187, "step": 47615 }, { "epoch": 1.4040984814978623, "grad_norm": 2.9578461761169237, "learning_rate": 4.808404709322904e-06, "loss": 1.3061, "step": 47620 }, { "epoch": 1.4042459088898718, "grad_norm": 2.879002465131119, "learning_rate": 4.807787570303247e-06, "loss": 1.2979, "step": 47625 }, { "epoch": 1.4043933362818812, "grad_norm": 2.8923470249007965, "learning_rate": 4.8071704001569845e-06, "loss": 1.3364, "step": 47630 }, { "epoch": 1.4045407636738907, "grad_norm": 2.896103751343086, "learning_rate": 4.806553198902275e-06, "loss": 1.2852, "step": 47635 }, { "epoch": 1.4046881910659001, "grad_norm": 2.9046685708211317, "learning_rate": 4.805935966557281e-06, "loss": 1.2981, "step": 47640 }, { "epoch": 1.4048356184579096, "grad_norm": 3.0167653899442057, "learning_rate": 4.8053187031401655e-06, "loss": 1.3401, "step": 47645 }, { "epoch": 1.4049830458499188, "grad_norm": 2.948469461932225, "learning_rate": 4.8047014086690925e-06, "loss": 1.3267, "step": 47650 }, { "epoch": 1.4051304732419283, "grad_norm": 3.059863548207364, "learning_rate": 4.8040840831622266e-06, "loss": 1.3487, "step": 47655 }, { "epoch": 1.4052779006339378, "grad_norm": 2.9935073757067303, "learning_rate": 4.8034667266377316e-06, "loss": 1.3125, "step": 47660 }, { "epoch": 1.4054253280259472, "grad_norm": 2.911991385183222, "learning_rate": 4.8028493391137755e-06, "loss": 1.2901, "step": 47665 }, { "epoch": 1.4055727554179567, "grad_norm": 2.8993260459960055, "learning_rate": 4.802231920608526e-06, "loss": 1.2707, "step": 47670 }, { "epoch": 1.4057201828099661, "grad_norm": 2.8567838386953794, "learning_rate": 4.8016144711401505e-06, "loss": 1.2784, "step": 47675 }, { "epoch": 1.4058676102019756, "grad_norm": 2.9915086298457143, "learning_rate": 4.800996990726818e-06, "loss": 1.3132, "step": 47680 }, { "epoch": 1.4060150375939848, "grad_norm": 2.747248691844712, "learning_rate": 4.8003794793867e-06, "loss": 1.3033, "step": 47685 }, { "epoch": 1.4061624649859943, "grad_norm": 2.9979409742296204, "learning_rate": 4.799761937137965e-06, "loss": 1.3481, "step": 47690 }, { "epoch": 1.4063098923780037, "grad_norm": 2.9054459499731675, "learning_rate": 4.799144363998786e-06, "loss": 1.3053, "step": 47695 }, { "epoch": 1.4064573197700132, "grad_norm": 2.728964510385328, "learning_rate": 4.798526759987338e-06, "loss": 1.3197, "step": 47700 }, { "epoch": 1.4066047471620227, "grad_norm": 2.8923853470830116, "learning_rate": 4.7979091251217925e-06, "loss": 1.3743, "step": 47705 }, { "epoch": 1.4067521745540321, "grad_norm": 2.797421534476753, "learning_rate": 4.797291459420324e-06, "loss": 1.3184, "step": 47710 }, { "epoch": 1.4068996019460416, "grad_norm": 2.9065047006567535, "learning_rate": 4.796673762901107e-06, "loss": 1.3427, "step": 47715 }, { "epoch": 1.407047029338051, "grad_norm": 2.867400739617985, "learning_rate": 4.796056035582321e-06, "loss": 1.3033, "step": 47720 }, { "epoch": 1.4071944567300605, "grad_norm": 2.872459645811465, "learning_rate": 4.795438277482142e-06, "loss": 1.2814, "step": 47725 }, { "epoch": 1.40734188412207, "grad_norm": 2.8859247046788585, "learning_rate": 4.794820488618746e-06, "loss": 1.2617, "step": 47730 }, { "epoch": 1.4074893115140794, "grad_norm": 2.9755377651738137, "learning_rate": 4.794202669010316e-06, "loss": 1.3097, "step": 47735 }, { "epoch": 1.4076367389060889, "grad_norm": 2.6865858382059336, "learning_rate": 4.7935848186750284e-06, "loss": 1.2479, "step": 47740 }, { "epoch": 1.407784166298098, "grad_norm": 2.9186899084188154, "learning_rate": 4.792966937631067e-06, "loss": 1.3225, "step": 47745 }, { "epoch": 1.4079315936901076, "grad_norm": 2.832244077302123, "learning_rate": 4.7923490258966124e-06, "loss": 1.3027, "step": 47750 }, { "epoch": 1.408079021082117, "grad_norm": 2.8467374461585977, "learning_rate": 4.791731083489847e-06, "loss": 1.2295, "step": 47755 }, { "epoch": 1.4082264484741265, "grad_norm": 2.893577983105884, "learning_rate": 4.791113110428955e-06, "loss": 1.3134, "step": 47760 }, { "epoch": 1.408373875866136, "grad_norm": 2.776215589516912, "learning_rate": 4.790495106732123e-06, "loss": 1.2629, "step": 47765 }, { "epoch": 1.4085213032581454, "grad_norm": 2.7866553650501547, "learning_rate": 4.7898770724175325e-06, "loss": 1.3421, "step": 47770 }, { "epoch": 1.4086687306501549, "grad_norm": 2.821609715446014, "learning_rate": 4.789259007503371e-06, "loss": 1.3119, "step": 47775 }, { "epoch": 1.4088161580421643, "grad_norm": 2.975172191393296, "learning_rate": 4.788640912007827e-06, "loss": 1.326, "step": 47780 }, { "epoch": 1.4089635854341735, "grad_norm": 2.9114468812121355, "learning_rate": 4.788022785949089e-06, "loss": 1.3723, "step": 47785 }, { "epoch": 1.409111012826183, "grad_norm": 2.860473301302811, "learning_rate": 4.787404629345344e-06, "loss": 1.3094, "step": 47790 }, { "epoch": 1.4092584402181925, "grad_norm": 2.859898322317285, "learning_rate": 4.786786442214784e-06, "loss": 1.2755, "step": 47795 }, { "epoch": 1.409405867610202, "grad_norm": 2.9610358075921432, "learning_rate": 4.786168224575599e-06, "loss": 1.3488, "step": 47800 }, { "epoch": 1.4095532950022114, "grad_norm": 2.856203719327682, "learning_rate": 4.78554997644598e-06, "loss": 1.3075, "step": 47805 }, { "epoch": 1.4097007223942208, "grad_norm": 2.8326578378074188, "learning_rate": 4.7849316978441215e-06, "loss": 1.3114, "step": 47810 }, { "epoch": 1.4098481497862303, "grad_norm": 3.069619828921298, "learning_rate": 4.784313388788217e-06, "loss": 1.294, "step": 47815 }, { "epoch": 1.4099955771782398, "grad_norm": 2.7144762105461546, "learning_rate": 4.783695049296457e-06, "loss": 1.3072, "step": 47820 }, { "epoch": 1.4101430045702492, "grad_norm": 2.826202259388511, "learning_rate": 4.783076679387041e-06, "loss": 1.3065, "step": 47825 }, { "epoch": 1.4102904319622587, "grad_norm": 2.851344399400523, "learning_rate": 4.782458279078165e-06, "loss": 1.2537, "step": 47830 }, { "epoch": 1.4104378593542681, "grad_norm": 2.82744302029811, "learning_rate": 4.781839848388025e-06, "loss": 1.3263, "step": 47835 }, { "epoch": 1.4105852867462776, "grad_norm": 2.859096544516488, "learning_rate": 4.78122138733482e-06, "loss": 1.3242, "step": 47840 }, { "epoch": 1.4107327141382868, "grad_norm": 2.8924419204189196, "learning_rate": 4.780602895936747e-06, "loss": 1.3214, "step": 47845 }, { "epoch": 1.4108801415302963, "grad_norm": 2.856421311460713, "learning_rate": 4.779984374212007e-06, "loss": 1.2741, "step": 47850 }, { "epoch": 1.4110275689223057, "grad_norm": 2.8341014140332117, "learning_rate": 4.7793658221788004e-06, "loss": 1.3198, "step": 47855 }, { "epoch": 1.4111749963143152, "grad_norm": 2.8768051779991617, "learning_rate": 4.778747239855329e-06, "loss": 1.3733, "step": 47860 }, { "epoch": 1.4113224237063247, "grad_norm": 2.861810720208869, "learning_rate": 4.778128627259797e-06, "loss": 1.3763, "step": 47865 }, { "epoch": 1.4114698510983341, "grad_norm": 2.8747278100771774, "learning_rate": 4.777509984410404e-06, "loss": 1.3025, "step": 47870 }, { "epoch": 1.4116172784903436, "grad_norm": 2.8881007347874244, "learning_rate": 4.7768913113253575e-06, "loss": 1.315, "step": 47875 }, { "epoch": 1.4117647058823528, "grad_norm": 2.8325396139251833, "learning_rate": 4.776272608022861e-06, "loss": 1.3353, "step": 47880 }, { "epoch": 1.4119121332743623, "grad_norm": 3.2908512312829834, "learning_rate": 4.775653874521121e-06, "loss": 1.2982, "step": 47885 }, { "epoch": 1.4120595606663717, "grad_norm": 2.970705041000767, "learning_rate": 4.7750351108383455e-06, "loss": 1.2888, "step": 47890 }, { "epoch": 1.4122069880583812, "grad_norm": 2.8163176302889448, "learning_rate": 4.774416316992742e-06, "loss": 1.313, "step": 47895 }, { "epoch": 1.4123544154503906, "grad_norm": 3.097397180554342, "learning_rate": 4.7737974930025174e-06, "loss": 1.3011, "step": 47900 }, { "epoch": 1.4125018428424, "grad_norm": 2.8445714382081198, "learning_rate": 4.773178638885883e-06, "loss": 1.3096, "step": 47905 }, { "epoch": 1.4126492702344096, "grad_norm": 2.998946651337295, "learning_rate": 4.772559754661048e-06, "loss": 1.3031, "step": 47910 }, { "epoch": 1.412796697626419, "grad_norm": 2.8801415953545524, "learning_rate": 4.7719408403462255e-06, "loss": 1.319, "step": 47915 }, { "epoch": 1.4129441250184285, "grad_norm": 2.804639464610304, "learning_rate": 4.771321895959626e-06, "loss": 1.2923, "step": 47920 }, { "epoch": 1.413091552410438, "grad_norm": 2.8937970459418354, "learning_rate": 4.770702921519464e-06, "loss": 1.3485, "step": 47925 }, { "epoch": 1.4132389798024474, "grad_norm": 2.9782251406468623, "learning_rate": 4.770083917043953e-06, "loss": 1.3793, "step": 47930 }, { "epoch": 1.4133864071944569, "grad_norm": 3.0282725291375288, "learning_rate": 4.769464882551307e-06, "loss": 1.3296, "step": 47935 }, { "epoch": 1.413533834586466, "grad_norm": 2.9197012690976942, "learning_rate": 4.7688458180597446e-06, "loss": 1.3225, "step": 47940 }, { "epoch": 1.4136812619784755, "grad_norm": 2.850936355855207, "learning_rate": 4.76822672358748e-06, "loss": 1.3125, "step": 47945 }, { "epoch": 1.413828689370485, "grad_norm": 2.852828858541616, "learning_rate": 4.767607599152731e-06, "loss": 1.3343, "step": 47950 }, { "epoch": 1.4139761167624945, "grad_norm": 2.7807853191838237, "learning_rate": 4.766988444773716e-06, "loss": 1.2421, "step": 47955 }, { "epoch": 1.414123544154504, "grad_norm": 2.8328575713156736, "learning_rate": 4.766369260468655e-06, "loss": 1.2988, "step": 47960 }, { "epoch": 1.4142709715465134, "grad_norm": 2.8887201963373266, "learning_rate": 4.765750046255768e-06, "loss": 1.3161, "step": 47965 }, { "epoch": 1.4144183989385228, "grad_norm": 2.8244735207004577, "learning_rate": 4.765130802153277e-06, "loss": 1.3148, "step": 47970 }, { "epoch": 1.4145658263305323, "grad_norm": 3.06490826638244, "learning_rate": 4.764511528179401e-06, "loss": 1.3364, "step": 47975 }, { "epoch": 1.4147132537225415, "grad_norm": 2.907640643754187, "learning_rate": 4.763892224352366e-06, "loss": 1.3323, "step": 47980 }, { "epoch": 1.414860681114551, "grad_norm": 2.8676184862232588, "learning_rate": 4.763272890690394e-06, "loss": 1.2908, "step": 47985 }, { "epoch": 1.4150081085065604, "grad_norm": 2.9066171123054185, "learning_rate": 4.762653527211711e-06, "loss": 1.2698, "step": 47990 }, { "epoch": 1.41515553589857, "grad_norm": 2.712645628114823, "learning_rate": 4.762034133934542e-06, "loss": 1.3372, "step": 47995 }, { "epoch": 1.4153029632905794, "grad_norm": 2.7701461836910175, "learning_rate": 4.761414710877112e-06, "loss": 1.2868, "step": 48000 }, { "epoch": 1.4153029632905794, "eval_loss": 1.1024305820465088, "eval_runtime": 4.2634, "eval_samples_per_second": 92.884, "eval_steps_per_second": 3.049, "step": 48000 }, { "epoch": 1.4154503906825888, "grad_norm": 2.7763028653296433, "learning_rate": 4.760795258057649e-06, "loss": 1.3388, "step": 48005 }, { "epoch": 1.4155978180745983, "grad_norm": 2.8104088030929244, "learning_rate": 4.7601757754943814e-06, "loss": 1.2981, "step": 48010 }, { "epoch": 1.4157452454666077, "grad_norm": 2.9021913510886987, "learning_rate": 4.759556263205538e-06, "loss": 1.2907, "step": 48015 }, { "epoch": 1.4158926728586172, "grad_norm": 2.7465350721955732, "learning_rate": 4.75893672120935e-06, "loss": 1.3032, "step": 48020 }, { "epoch": 1.4160401002506267, "grad_norm": 3.598590328644695, "learning_rate": 4.758317149524047e-06, "loss": 1.3088, "step": 48025 }, { "epoch": 1.4161875276426361, "grad_norm": 2.983746536147912, "learning_rate": 4.75769754816786e-06, "loss": 1.3512, "step": 48030 }, { "epoch": 1.4163349550346456, "grad_norm": 2.892187695045086, "learning_rate": 4.75707791715902e-06, "loss": 1.3623, "step": 48035 }, { "epoch": 1.4164823824266548, "grad_norm": 2.7622761202937918, "learning_rate": 4.756458256515765e-06, "loss": 1.267, "step": 48040 }, { "epoch": 1.4166298098186643, "grad_norm": 3.036644513342121, "learning_rate": 4.7558385662563255e-06, "loss": 1.2925, "step": 48045 }, { "epoch": 1.4167772372106737, "grad_norm": 2.7971055820223, "learning_rate": 4.755218846398937e-06, "loss": 1.2675, "step": 48050 }, { "epoch": 1.4169246646026832, "grad_norm": 2.920172445182345, "learning_rate": 4.754599096961837e-06, "loss": 1.3511, "step": 48055 }, { "epoch": 1.4170720919946926, "grad_norm": 2.85350099793081, "learning_rate": 4.753979317963261e-06, "loss": 1.2873, "step": 48060 }, { "epoch": 1.417219519386702, "grad_norm": 2.874320546643346, "learning_rate": 4.753359509421447e-06, "loss": 1.3464, "step": 48065 }, { "epoch": 1.4173669467787116, "grad_norm": 2.951479855677058, "learning_rate": 4.752739671354635e-06, "loss": 1.3387, "step": 48070 }, { "epoch": 1.4175143741707208, "grad_norm": 2.913005021173878, "learning_rate": 4.752119803781062e-06, "loss": 1.3153, "step": 48075 }, { "epoch": 1.4176618015627303, "grad_norm": 3.0712731573281102, "learning_rate": 4.75149990671897e-06, "loss": 1.3189, "step": 48080 }, { "epoch": 1.4178092289547397, "grad_norm": 2.8482018577160364, "learning_rate": 4.750879980186598e-06, "loss": 1.3088, "step": 48085 }, { "epoch": 1.4179566563467492, "grad_norm": 2.7928754820440798, "learning_rate": 4.750260024202192e-06, "loss": 1.3209, "step": 48090 }, { "epoch": 1.4181040837387586, "grad_norm": 2.901149257190573, "learning_rate": 4.749640038783992e-06, "loss": 1.2948, "step": 48095 }, { "epoch": 1.418251511130768, "grad_norm": 2.8501638498133075, "learning_rate": 4.7490200239502415e-06, "loss": 1.2822, "step": 48100 }, { "epoch": 1.4183989385227775, "grad_norm": 3.738939245542385, "learning_rate": 4.748399979719186e-06, "loss": 1.3232, "step": 48105 }, { "epoch": 1.418546365914787, "grad_norm": 2.848716486467648, "learning_rate": 4.747779906109072e-06, "loss": 1.3364, "step": 48110 }, { "epoch": 1.4186937933067965, "grad_norm": 2.9635007295908267, "learning_rate": 4.747159803138144e-06, "loss": 1.3675, "step": 48115 }, { "epoch": 1.418841220698806, "grad_norm": 2.759780456585985, "learning_rate": 4.7465396708246504e-06, "loss": 1.278, "step": 48120 }, { "epoch": 1.4189886480908154, "grad_norm": 2.8358440865933097, "learning_rate": 4.74591950918684e-06, "loss": 1.3068, "step": 48125 }, { "epoch": 1.4191360754828248, "grad_norm": 2.793409748444548, "learning_rate": 4.74529931824296e-06, "loss": 1.2597, "step": 48130 }, { "epoch": 1.419283502874834, "grad_norm": 2.883023651641256, "learning_rate": 4.744679098011261e-06, "loss": 1.28, "step": 48135 }, { "epoch": 1.4194309302668435, "grad_norm": 2.78161963841085, "learning_rate": 4.744058848509994e-06, "loss": 1.3341, "step": 48140 }, { "epoch": 1.419578357658853, "grad_norm": 2.859088278520208, "learning_rate": 4.74343856975741e-06, "loss": 1.3223, "step": 48145 }, { "epoch": 1.4197257850508624, "grad_norm": 3.038575976306047, "learning_rate": 4.742818261771763e-06, "loss": 1.369, "step": 48150 }, { "epoch": 1.419873212442872, "grad_norm": 2.8828676692840185, "learning_rate": 4.742197924571304e-06, "loss": 1.3186, "step": 48155 }, { "epoch": 1.4200206398348814, "grad_norm": 2.7119341315788716, "learning_rate": 4.741577558174288e-06, "loss": 1.2522, "step": 48160 }, { "epoch": 1.4201680672268908, "grad_norm": 2.876837706572083, "learning_rate": 4.7409571625989694e-06, "loss": 1.4045, "step": 48165 }, { "epoch": 1.4203154946189, "grad_norm": 3.0234885055799126, "learning_rate": 4.740336737863605e-06, "loss": 1.3133, "step": 48170 }, { "epoch": 1.4204629220109095, "grad_norm": 2.868302824232021, "learning_rate": 4.7397162839864525e-06, "loss": 1.2622, "step": 48175 }, { "epoch": 1.420610349402919, "grad_norm": 2.807608167505161, "learning_rate": 4.739095800985767e-06, "loss": 1.3418, "step": 48180 }, { "epoch": 1.4207577767949284, "grad_norm": 2.812871040462798, "learning_rate": 4.738475288879809e-06, "loss": 1.3067, "step": 48185 }, { "epoch": 1.4209052041869379, "grad_norm": 2.835485066463372, "learning_rate": 4.7378547476868356e-06, "loss": 1.3102, "step": 48190 }, { "epoch": 1.4210526315789473, "grad_norm": 2.8648330683747516, "learning_rate": 4.737234177425109e-06, "loss": 1.3869, "step": 48195 }, { "epoch": 1.4212000589709568, "grad_norm": 2.8480154350050255, "learning_rate": 4.73661357811289e-06, "loss": 1.2771, "step": 48200 }, { "epoch": 1.4213474863629663, "grad_norm": 2.684731838654864, "learning_rate": 4.735992949768439e-06, "loss": 1.3148, "step": 48205 }, { "epoch": 1.4214949137549757, "grad_norm": 2.840813877413905, "learning_rate": 4.73537229241002e-06, "loss": 1.3062, "step": 48210 }, { "epoch": 1.4216423411469852, "grad_norm": 2.8406762542062567, "learning_rate": 4.734751606055896e-06, "loss": 1.2877, "step": 48215 }, { "epoch": 1.4217897685389946, "grad_norm": 2.8542436482451885, "learning_rate": 4.734130890724331e-06, "loss": 1.3096, "step": 48220 }, { "epoch": 1.421937195931004, "grad_norm": 2.817571099556099, "learning_rate": 4.733510146433592e-06, "loss": 1.2999, "step": 48225 }, { "epoch": 1.4220846233230136, "grad_norm": 2.8895988738055043, "learning_rate": 4.732889373201943e-06, "loss": 1.2801, "step": 48230 }, { "epoch": 1.4222320507150228, "grad_norm": 2.7491993613448424, "learning_rate": 4.732268571047652e-06, "loss": 1.3177, "step": 48235 }, { "epoch": 1.4223794781070322, "grad_norm": 2.956276264478058, "learning_rate": 4.7316477399889866e-06, "loss": 1.2739, "step": 48240 }, { "epoch": 1.4225269054990417, "grad_norm": 2.9636890641257367, "learning_rate": 4.731026880044216e-06, "loss": 1.3318, "step": 48245 }, { "epoch": 1.4226743328910512, "grad_norm": 2.85237896364395, "learning_rate": 4.730405991231608e-06, "loss": 1.3267, "step": 48250 }, { "epoch": 1.4228217602830606, "grad_norm": 2.798621766087718, "learning_rate": 4.729785073569436e-06, "loss": 1.2975, "step": 48255 }, { "epoch": 1.42296918767507, "grad_norm": 2.838309809242485, "learning_rate": 4.729164127075968e-06, "loss": 1.3459, "step": 48260 }, { "epoch": 1.4231166150670795, "grad_norm": 2.7937059022779027, "learning_rate": 4.728543151769479e-06, "loss": 1.2762, "step": 48265 }, { "epoch": 1.4232640424590888, "grad_norm": 2.938466646692348, "learning_rate": 4.727922147668238e-06, "loss": 1.2745, "step": 48270 }, { "epoch": 1.4234114698510982, "grad_norm": 2.866847791342597, "learning_rate": 4.727301114790522e-06, "loss": 1.3183, "step": 48275 }, { "epoch": 1.4235588972431077, "grad_norm": 2.9749549252392273, "learning_rate": 4.726680053154606e-06, "loss": 1.3198, "step": 48280 }, { "epoch": 1.4237063246351171, "grad_norm": 2.950254195544864, "learning_rate": 4.726058962778763e-06, "loss": 1.3217, "step": 48285 }, { "epoch": 1.4238537520271266, "grad_norm": 2.8359180288220784, "learning_rate": 4.725437843681273e-06, "loss": 1.3082, "step": 48290 }, { "epoch": 1.424001179419136, "grad_norm": 2.7783256939879446, "learning_rate": 4.724816695880407e-06, "loss": 1.3565, "step": 48295 }, { "epoch": 1.4241486068111455, "grad_norm": 2.730880241783413, "learning_rate": 4.724195519394447e-06, "loss": 1.3157, "step": 48300 }, { "epoch": 1.424296034203155, "grad_norm": 2.7819323072058357, "learning_rate": 4.723574314241674e-06, "loss": 1.2967, "step": 48305 }, { "epoch": 1.4244434615951644, "grad_norm": 2.9103982858884385, "learning_rate": 4.7229530804403635e-06, "loss": 1.3151, "step": 48310 }, { "epoch": 1.424590888987174, "grad_norm": 2.87345356705754, "learning_rate": 4.722331818008798e-06, "loss": 1.3538, "step": 48315 }, { "epoch": 1.4247383163791834, "grad_norm": 2.930524604696031, "learning_rate": 4.721710526965257e-06, "loss": 1.3409, "step": 48320 }, { "epoch": 1.4248857437711928, "grad_norm": 3.0351971223682597, "learning_rate": 4.721089207328026e-06, "loss": 1.2602, "step": 48325 }, { "epoch": 1.425033171163202, "grad_norm": 2.7723919437782434, "learning_rate": 4.720467859115386e-06, "loss": 1.2911, "step": 48330 }, { "epoch": 1.4251805985552115, "grad_norm": 2.929318511125918, "learning_rate": 4.71984648234562e-06, "loss": 1.3372, "step": 48335 }, { "epoch": 1.425328025947221, "grad_norm": 2.872645809652549, "learning_rate": 4.719225077037015e-06, "loss": 1.2684, "step": 48340 }, { "epoch": 1.4254754533392304, "grad_norm": 3.1122500159467803, "learning_rate": 4.718603643207854e-06, "loss": 1.2649, "step": 48345 }, { "epoch": 1.4256228807312399, "grad_norm": 2.6562164593473305, "learning_rate": 4.7179821808764255e-06, "loss": 1.2724, "step": 48350 }, { "epoch": 1.4257703081232493, "grad_norm": 2.7993838301196408, "learning_rate": 4.717360690061017e-06, "loss": 1.2968, "step": 48355 }, { "epoch": 1.4259177355152588, "grad_norm": 2.8395616818005616, "learning_rate": 4.716739170779915e-06, "loss": 1.3633, "step": 48360 }, { "epoch": 1.426065162907268, "grad_norm": 2.9467200951736165, "learning_rate": 4.716117623051409e-06, "loss": 1.3594, "step": 48365 }, { "epoch": 1.4262125902992775, "grad_norm": 2.9341529607373658, "learning_rate": 4.715496046893788e-06, "loss": 1.2827, "step": 48370 }, { "epoch": 1.426360017691287, "grad_norm": 2.818141438787148, "learning_rate": 4.7148744423253434e-06, "loss": 1.3241, "step": 48375 }, { "epoch": 1.4265074450832964, "grad_norm": 2.776988884931843, "learning_rate": 4.714252809364367e-06, "loss": 1.2814, "step": 48380 }, { "epoch": 1.4266548724753059, "grad_norm": 2.8802358562442247, "learning_rate": 4.713631148029151e-06, "loss": 1.3686, "step": 48385 }, { "epoch": 1.4268022998673153, "grad_norm": 2.876544970397357, "learning_rate": 4.713009458337988e-06, "loss": 1.2821, "step": 48390 }, { "epoch": 1.4269497272593248, "grad_norm": 2.8727401868434077, "learning_rate": 4.712387740309171e-06, "loss": 1.3531, "step": 48395 }, { "epoch": 1.4270971546513342, "grad_norm": 2.739725041520133, "learning_rate": 4.711765993960996e-06, "loss": 1.312, "step": 48400 }, { "epoch": 1.4272445820433437, "grad_norm": 2.9112873545059097, "learning_rate": 4.711144219311759e-06, "loss": 1.3405, "step": 48405 }, { "epoch": 1.4273920094353532, "grad_norm": 2.7772193553139384, "learning_rate": 4.710522416379756e-06, "loss": 1.3092, "step": 48410 }, { "epoch": 1.4275394368273626, "grad_norm": 2.961056038777661, "learning_rate": 4.709900585183284e-06, "loss": 1.2982, "step": 48415 }, { "epoch": 1.427686864219372, "grad_norm": 2.994533860472661, "learning_rate": 4.709278725740642e-06, "loss": 1.3006, "step": 48420 }, { "epoch": 1.4278342916113813, "grad_norm": 2.886764707282607, "learning_rate": 4.708656838070127e-06, "loss": 1.342, "step": 48425 }, { "epoch": 1.4279817190033908, "grad_norm": 2.9299914020479285, "learning_rate": 4.708034922190039e-06, "loss": 1.3575, "step": 48430 }, { "epoch": 1.4281291463954002, "grad_norm": 2.9176129382772205, "learning_rate": 4.707412978118682e-06, "loss": 1.3563, "step": 48435 }, { "epoch": 1.4282765737874097, "grad_norm": 2.8728893520918843, "learning_rate": 4.706791005874352e-06, "loss": 1.3173, "step": 48440 }, { "epoch": 1.4284240011794191, "grad_norm": 2.896664674350441, "learning_rate": 4.706169005475356e-06, "loss": 1.347, "step": 48445 }, { "epoch": 1.4285714285714286, "grad_norm": 2.975589426913697, "learning_rate": 4.705546976939994e-06, "loss": 1.299, "step": 48450 }, { "epoch": 1.428718855963438, "grad_norm": 2.7786492060650945, "learning_rate": 4.704924920286572e-06, "loss": 1.3128, "step": 48455 }, { "epoch": 1.4288662833554475, "grad_norm": 2.930061041932102, "learning_rate": 4.704302835533394e-06, "loss": 1.37, "step": 48460 }, { "epoch": 1.4290137107474568, "grad_norm": 2.8805643826061917, "learning_rate": 4.7036807226987655e-06, "loss": 1.3259, "step": 48465 }, { "epoch": 1.4291611381394662, "grad_norm": 2.9157802009018012, "learning_rate": 4.703058581800992e-06, "loss": 1.3818, "step": 48470 }, { "epoch": 1.4293085655314757, "grad_norm": 2.866654133439406, "learning_rate": 4.702436412858381e-06, "loss": 1.3102, "step": 48475 }, { "epoch": 1.4294559929234851, "grad_norm": 2.9570345310846964, "learning_rate": 4.7018142158892415e-06, "loss": 1.3141, "step": 48480 }, { "epoch": 1.4296034203154946, "grad_norm": 2.7978192820380943, "learning_rate": 4.7011919909118826e-06, "loss": 1.3348, "step": 48485 }, { "epoch": 1.429750847707504, "grad_norm": 2.735406041504898, "learning_rate": 4.700569737944613e-06, "loss": 1.3217, "step": 48490 }, { "epoch": 1.4298982750995135, "grad_norm": 2.874845398183555, "learning_rate": 4.699947457005743e-06, "loss": 1.3118, "step": 48495 }, { "epoch": 1.430045702491523, "grad_norm": 2.8390107095804913, "learning_rate": 4.699325148113584e-06, "loss": 1.326, "step": 48500 }, { "epoch": 1.430045702491523, "eval_loss": 1.1015852689743042, "eval_runtime": 4.147, "eval_samples_per_second": 95.491, "eval_steps_per_second": 3.135, "step": 48500 }, { "epoch": 1.4301931298835324, "grad_norm": 2.80830514826248, "learning_rate": 4.6987028112864485e-06, "loss": 1.3071, "step": 48505 }, { "epoch": 1.4303405572755419, "grad_norm": 2.929701324891998, "learning_rate": 4.6980804465426495e-06, "loss": 1.3073, "step": 48510 }, { "epoch": 1.4304879846675513, "grad_norm": 2.7974441509107137, "learning_rate": 4.697458053900502e-06, "loss": 1.3568, "step": 48515 }, { "epoch": 1.4306354120595608, "grad_norm": 2.730601467575781, "learning_rate": 4.6968356333783184e-06, "loss": 1.3234, "step": 48520 }, { "epoch": 1.43078283945157, "grad_norm": 2.9266875573035085, "learning_rate": 4.696213184994415e-06, "loss": 1.3092, "step": 48525 }, { "epoch": 1.4309302668435795, "grad_norm": 2.9699998751153696, "learning_rate": 4.695590708767108e-06, "loss": 1.3049, "step": 48530 }, { "epoch": 1.431077694235589, "grad_norm": 2.8740803241562527, "learning_rate": 4.694968204714715e-06, "loss": 1.2902, "step": 48535 }, { "epoch": 1.4312251216275984, "grad_norm": 2.991319423688159, "learning_rate": 4.694345672855554e-06, "loss": 1.3109, "step": 48540 }, { "epoch": 1.4313725490196079, "grad_norm": 2.950698575374716, "learning_rate": 4.693723113207942e-06, "loss": 1.3454, "step": 48545 }, { "epoch": 1.4315199764116173, "grad_norm": 2.9649715439782782, "learning_rate": 4.693100525790201e-06, "loss": 1.346, "step": 48550 }, { "epoch": 1.4316674038036268, "grad_norm": 2.7176210271105536, "learning_rate": 4.692477910620651e-06, "loss": 1.315, "step": 48555 }, { "epoch": 1.431814831195636, "grad_norm": 2.8956170130449843, "learning_rate": 4.69185526771761e-06, "loss": 1.3326, "step": 48560 }, { "epoch": 1.4319622585876455, "grad_norm": 2.800267211225566, "learning_rate": 4.691232597099404e-06, "loss": 1.2621, "step": 48565 }, { "epoch": 1.432109685979655, "grad_norm": 2.838682489497818, "learning_rate": 4.690609898784352e-06, "loss": 1.3216, "step": 48570 }, { "epoch": 1.4322571133716644, "grad_norm": 2.9509948743408922, "learning_rate": 4.689987172790782e-06, "loss": 1.3145, "step": 48575 }, { "epoch": 1.4324045407636739, "grad_norm": 2.7779750116290916, "learning_rate": 4.6893644191370145e-06, "loss": 1.32, "step": 48580 }, { "epoch": 1.4325519681556833, "grad_norm": 2.7538796622053585, "learning_rate": 4.688741637841376e-06, "loss": 1.2678, "step": 48585 }, { "epoch": 1.4326993955476928, "grad_norm": 2.919051397181746, "learning_rate": 4.688118828922195e-06, "loss": 1.3299, "step": 48590 }, { "epoch": 1.4328468229397022, "grad_norm": 2.8710768677869027, "learning_rate": 4.687495992397794e-06, "loss": 1.3154, "step": 48595 }, { "epoch": 1.4329942503317117, "grad_norm": 2.8162627070534367, "learning_rate": 4.6868731282865046e-06, "loss": 1.3201, "step": 48600 }, { "epoch": 1.4331416777237211, "grad_norm": 2.962315516101531, "learning_rate": 4.6862502366066524e-06, "loss": 1.389, "step": 48605 }, { "epoch": 1.4332891051157306, "grad_norm": 3.0116284567415477, "learning_rate": 4.685627317376568e-06, "loss": 1.2779, "step": 48610 }, { "epoch": 1.43343653250774, "grad_norm": 3.1105199501402288, "learning_rate": 4.685004370614582e-06, "loss": 1.2845, "step": 48615 }, { "epoch": 1.4335839598997493, "grad_norm": 2.8620553496262535, "learning_rate": 4.684381396339025e-06, "loss": 1.3548, "step": 48620 }, { "epoch": 1.4337313872917588, "grad_norm": 2.9069234937875965, "learning_rate": 4.683758394568228e-06, "loss": 1.3397, "step": 48625 }, { "epoch": 1.4338788146837682, "grad_norm": 2.921370523559538, "learning_rate": 4.6831353653205244e-06, "loss": 1.3218, "step": 48630 }, { "epoch": 1.4340262420757777, "grad_norm": 2.8991937524118967, "learning_rate": 4.682512308614246e-06, "loss": 1.2493, "step": 48635 }, { "epoch": 1.4341736694677871, "grad_norm": 2.8172572329230987, "learning_rate": 4.681889224467729e-06, "loss": 1.2867, "step": 48640 }, { "epoch": 1.4343210968597966, "grad_norm": 2.8891849618099013, "learning_rate": 4.681266112899308e-06, "loss": 1.351, "step": 48645 }, { "epoch": 1.434468524251806, "grad_norm": 2.7889993696077093, "learning_rate": 4.680642973927318e-06, "loss": 1.2774, "step": 48650 }, { "epoch": 1.4346159516438155, "grad_norm": 2.804819784714143, "learning_rate": 4.680019807570094e-06, "loss": 1.2898, "step": 48655 }, { "epoch": 1.4347633790358247, "grad_norm": 2.9956891267221555, "learning_rate": 4.679396613845977e-06, "loss": 1.3131, "step": 48660 }, { "epoch": 1.4349108064278342, "grad_norm": 2.8211119111714833, "learning_rate": 4.678773392773302e-06, "loss": 1.3284, "step": 48665 }, { "epoch": 1.4350582338198437, "grad_norm": 2.93349332545546, "learning_rate": 4.678150144370411e-06, "loss": 1.2965, "step": 48670 }, { "epoch": 1.4352056612118531, "grad_norm": 2.912621580380691, "learning_rate": 4.677526868655642e-06, "loss": 1.3779, "step": 48675 }, { "epoch": 1.4353530886038626, "grad_norm": 2.80283272331809, "learning_rate": 4.676903565647336e-06, "loss": 1.3201, "step": 48680 }, { "epoch": 1.435500515995872, "grad_norm": 2.8952658833413465, "learning_rate": 4.676280235363832e-06, "loss": 1.2743, "step": 48685 }, { "epoch": 1.4356479433878815, "grad_norm": 2.908388245831398, "learning_rate": 4.675656877823476e-06, "loss": 1.3555, "step": 48690 }, { "epoch": 1.435795370779891, "grad_norm": 2.8752126452865663, "learning_rate": 4.67503349304461e-06, "loss": 1.3186, "step": 48695 }, { "epoch": 1.4359427981719004, "grad_norm": 2.857272338581598, "learning_rate": 4.674410081045576e-06, "loss": 1.3059, "step": 48700 }, { "epoch": 1.4360902255639099, "grad_norm": 2.9130498275013745, "learning_rate": 4.67378664184472e-06, "loss": 1.3566, "step": 48705 }, { "epoch": 1.4362376529559193, "grad_norm": 2.635039996336706, "learning_rate": 4.673163175460387e-06, "loss": 1.2727, "step": 48710 }, { "epoch": 1.4363850803479288, "grad_norm": 3.022785934225011, "learning_rate": 4.672539681910924e-06, "loss": 1.3414, "step": 48715 }, { "epoch": 1.436532507739938, "grad_norm": 2.9034512502684096, "learning_rate": 4.671916161214678e-06, "loss": 1.2705, "step": 48720 }, { "epoch": 1.4366799351319475, "grad_norm": 2.8270978512530176, "learning_rate": 4.671292613389994e-06, "loss": 1.3059, "step": 48725 }, { "epoch": 1.436827362523957, "grad_norm": 2.7980122539756964, "learning_rate": 4.670669038455225e-06, "loss": 1.3261, "step": 48730 }, { "epoch": 1.4369747899159664, "grad_norm": 2.867276946412399, "learning_rate": 4.670045436428717e-06, "loss": 1.3396, "step": 48735 }, { "epoch": 1.4371222173079758, "grad_norm": 2.8797043996921774, "learning_rate": 4.669421807328821e-06, "loss": 1.314, "step": 48740 }, { "epoch": 1.4372696446999853, "grad_norm": 2.7864219250328985, "learning_rate": 4.668798151173891e-06, "loss": 1.3102, "step": 48745 }, { "epoch": 1.4374170720919948, "grad_norm": 2.905149544323927, "learning_rate": 4.668174467982276e-06, "loss": 1.3306, "step": 48750 }, { "epoch": 1.437564499484004, "grad_norm": 2.8356804769208344, "learning_rate": 4.6675507577723255e-06, "loss": 1.3236, "step": 48755 }, { "epoch": 1.4377119268760135, "grad_norm": 2.8301363323856927, "learning_rate": 4.666927020562399e-06, "loss": 1.2935, "step": 48760 }, { "epoch": 1.437859354268023, "grad_norm": 2.8169962361785, "learning_rate": 4.666303256370847e-06, "loss": 1.3069, "step": 48765 }, { "epoch": 1.4380067816600324, "grad_norm": 2.7656304748367844, "learning_rate": 4.665679465216025e-06, "loss": 1.3055, "step": 48770 }, { "epoch": 1.4381542090520418, "grad_norm": 2.7567525570490585, "learning_rate": 4.66505564711629e-06, "loss": 1.283, "step": 48775 }, { "epoch": 1.4383016364440513, "grad_norm": 2.7288097495694155, "learning_rate": 4.664431802089998e-06, "loss": 1.2684, "step": 48780 }, { "epoch": 1.4384490638360607, "grad_norm": 2.7722405553266545, "learning_rate": 4.663807930155504e-06, "loss": 1.2735, "step": 48785 }, { "epoch": 1.4385964912280702, "grad_norm": 2.7789890668059827, "learning_rate": 4.66318403133117e-06, "loss": 1.2727, "step": 48790 }, { "epoch": 1.4387439186200797, "grad_norm": 2.826223392572308, "learning_rate": 4.662560105635352e-06, "loss": 1.3238, "step": 48795 }, { "epoch": 1.4388913460120891, "grad_norm": 3.0278090383662057, "learning_rate": 4.661936153086413e-06, "loss": 1.3784, "step": 48800 }, { "epoch": 1.4390387734040986, "grad_norm": 2.875651064686378, "learning_rate": 4.661312173702709e-06, "loss": 1.3118, "step": 48805 }, { "epoch": 1.439186200796108, "grad_norm": 2.9752736742695127, "learning_rate": 4.660688167502606e-06, "loss": 1.3341, "step": 48810 }, { "epoch": 1.4393336281881173, "grad_norm": 2.887048345389724, "learning_rate": 4.660064134504462e-06, "loss": 1.3092, "step": 48815 }, { "epoch": 1.4394810555801267, "grad_norm": 2.932305409613743, "learning_rate": 4.659440074726642e-06, "loss": 1.286, "step": 48820 }, { "epoch": 1.4396284829721362, "grad_norm": 2.7352724056613393, "learning_rate": 4.6588159881875095e-06, "loss": 1.2838, "step": 48825 }, { "epoch": 1.4397759103641457, "grad_norm": 2.8426248093184285, "learning_rate": 4.658191874905429e-06, "loss": 1.3293, "step": 48830 }, { "epoch": 1.439923337756155, "grad_norm": 2.7613756793061843, "learning_rate": 4.657567734898765e-06, "loss": 1.2772, "step": 48835 }, { "epoch": 1.4400707651481646, "grad_norm": 2.7599704536708307, "learning_rate": 4.656943568185884e-06, "loss": 1.3272, "step": 48840 }, { "epoch": 1.440218192540174, "grad_norm": 2.7303281535736734, "learning_rate": 4.656319374785153e-06, "loss": 1.3028, "step": 48845 }, { "epoch": 1.4403656199321833, "grad_norm": 2.9183972683098665, "learning_rate": 4.65569515471494e-06, "loss": 1.338, "step": 48850 }, { "epoch": 1.4405130473241927, "grad_norm": 2.742350008282786, "learning_rate": 4.655070907993612e-06, "loss": 1.3369, "step": 48855 }, { "epoch": 1.4406604747162022, "grad_norm": 2.8358565378855016, "learning_rate": 4.65444663463954e-06, "loss": 1.3069, "step": 48860 }, { "epoch": 1.4408079021082116, "grad_norm": 2.7565455860907973, "learning_rate": 4.653822334671092e-06, "loss": 1.3493, "step": 48865 }, { "epoch": 1.440955329500221, "grad_norm": 2.8551449972774514, "learning_rate": 4.65319800810664e-06, "loss": 1.3043, "step": 48870 }, { "epoch": 1.4411027568922306, "grad_norm": 2.7177018299158666, "learning_rate": 4.652573654964557e-06, "loss": 1.345, "step": 48875 }, { "epoch": 1.44125018428424, "grad_norm": 2.836442623147188, "learning_rate": 4.6519492752632125e-06, "loss": 1.3659, "step": 48880 }, { "epoch": 1.4413976116762495, "grad_norm": 2.792151506589446, "learning_rate": 4.6513248690209795e-06, "loss": 1.3077, "step": 48885 }, { "epoch": 1.441545039068259, "grad_norm": 2.802334869280518, "learning_rate": 4.650700436256234e-06, "loss": 1.3661, "step": 48890 }, { "epoch": 1.4416924664602684, "grad_norm": 2.8885455849138055, "learning_rate": 4.650075976987349e-06, "loss": 1.3288, "step": 48895 }, { "epoch": 1.4418398938522778, "grad_norm": 2.923830777388097, "learning_rate": 4.6494514912327016e-06, "loss": 1.3566, "step": 48900 }, { "epoch": 1.4419873212442873, "grad_norm": 2.836409957563271, "learning_rate": 4.648826979010667e-06, "loss": 1.3394, "step": 48905 }, { "epoch": 1.4421347486362968, "grad_norm": 2.830140505541777, "learning_rate": 4.6482024403396214e-06, "loss": 1.2882, "step": 48910 }, { "epoch": 1.442282176028306, "grad_norm": 2.8368926110679245, "learning_rate": 4.647577875237943e-06, "loss": 1.3642, "step": 48915 }, { "epoch": 1.4424296034203155, "grad_norm": 2.8149062345143903, "learning_rate": 4.646953283724011e-06, "loss": 1.3125, "step": 48920 }, { "epoch": 1.442577030812325, "grad_norm": 2.918887989637676, "learning_rate": 4.646328665816205e-06, "loss": 1.3364, "step": 48925 }, { "epoch": 1.4427244582043344, "grad_norm": 2.781995836782417, "learning_rate": 4.645704021532904e-06, "loss": 1.3036, "step": 48930 }, { "epoch": 1.4428718855963438, "grad_norm": 2.884903031731525, "learning_rate": 4.64507935089249e-06, "loss": 1.3128, "step": 48935 }, { "epoch": 1.4430193129883533, "grad_norm": 2.8426465580941223, "learning_rate": 4.6444546539133445e-06, "loss": 1.2921, "step": 48940 }, { "epoch": 1.4431667403803627, "grad_norm": 2.822621361353054, "learning_rate": 4.6438299306138484e-06, "loss": 1.3388, "step": 48945 }, { "epoch": 1.443314167772372, "grad_norm": 2.9302534374323583, "learning_rate": 4.643205181012386e-06, "loss": 1.2854, "step": 48950 }, { "epoch": 1.4434615951643814, "grad_norm": 2.862551795158435, "learning_rate": 4.642580405127342e-06, "loss": 1.3288, "step": 48955 }, { "epoch": 1.443609022556391, "grad_norm": 2.755665370857261, "learning_rate": 4.641955602977099e-06, "loss": 1.2907, "step": 48960 }, { "epoch": 1.4437564499484004, "grad_norm": 2.8516414495909514, "learning_rate": 4.641330774580045e-06, "loss": 1.2915, "step": 48965 }, { "epoch": 1.4439038773404098, "grad_norm": 2.9587923959846782, "learning_rate": 4.640705919954565e-06, "loss": 1.3583, "step": 48970 }, { "epoch": 1.4440513047324193, "grad_norm": 2.8778367427656906, "learning_rate": 4.640081039119046e-06, "loss": 1.3532, "step": 48975 }, { "epoch": 1.4441987321244287, "grad_norm": 3.035607032580927, "learning_rate": 4.639456132091876e-06, "loss": 1.3355, "step": 48980 }, { "epoch": 1.4443461595164382, "grad_norm": 2.708045998600441, "learning_rate": 4.638831198891444e-06, "loss": 1.2937, "step": 48985 }, { "epoch": 1.4444935869084476, "grad_norm": 2.9657831205908223, "learning_rate": 4.638206239536138e-06, "loss": 1.3059, "step": 48990 }, { "epoch": 1.444641014300457, "grad_norm": 2.730703670687424, "learning_rate": 4.63758125404435e-06, "loss": 1.3287, "step": 48995 }, { "epoch": 1.4447884416924666, "grad_norm": 2.8813389960469005, "learning_rate": 4.6369562424344696e-06, "loss": 1.35, "step": 49000 }, { "epoch": 1.4447884416924666, "eval_loss": 1.1008578538894653, "eval_runtime": 4.2846, "eval_samples_per_second": 92.423, "eval_steps_per_second": 3.034, "step": 49000 }, { "epoch": 1.444935869084476, "grad_norm": 2.7516447676238887, "learning_rate": 4.63633120472489e-06, "loss": 1.3276, "step": 49005 }, { "epoch": 1.4450832964764853, "grad_norm": 2.98962566940014, "learning_rate": 4.635706140934002e-06, "loss": 1.2915, "step": 49010 }, { "epoch": 1.4452307238684947, "grad_norm": 2.8856321190034087, "learning_rate": 4.6350810510802e-06, "loss": 1.3292, "step": 49015 }, { "epoch": 1.4453781512605042, "grad_norm": 2.898573459583811, "learning_rate": 4.634455935181877e-06, "loss": 1.3044, "step": 49020 }, { "epoch": 1.4455255786525136, "grad_norm": 2.8869218029240544, "learning_rate": 4.6338307932574275e-06, "loss": 1.3104, "step": 49025 }, { "epoch": 1.445673006044523, "grad_norm": 2.816905226690664, "learning_rate": 4.633205625325248e-06, "loss": 1.2502, "step": 49030 }, { "epoch": 1.4458204334365325, "grad_norm": 2.956636671343898, "learning_rate": 4.632580431403735e-06, "loss": 1.3413, "step": 49035 }, { "epoch": 1.445967860828542, "grad_norm": 2.7686177001732863, "learning_rate": 4.631955211511285e-06, "loss": 1.2556, "step": 49040 }, { "epoch": 1.4461152882205512, "grad_norm": 2.854064146908017, "learning_rate": 4.631329965666295e-06, "loss": 1.2998, "step": 49045 }, { "epoch": 1.4462627156125607, "grad_norm": 2.8474812392851416, "learning_rate": 4.630704693887165e-06, "loss": 1.3164, "step": 49050 }, { "epoch": 1.4464101430045702, "grad_norm": 2.8810841392937006, "learning_rate": 4.630079396192293e-06, "loss": 1.2845, "step": 49055 }, { "epoch": 1.4465575703965796, "grad_norm": 2.8367975739438234, "learning_rate": 4.629454072600082e-06, "loss": 1.354, "step": 49060 }, { "epoch": 1.446704997788589, "grad_norm": 2.7961879743665476, "learning_rate": 4.628828723128928e-06, "loss": 1.3072, "step": 49065 }, { "epoch": 1.4468524251805985, "grad_norm": 2.837137495359492, "learning_rate": 4.628203347797237e-06, "loss": 1.3157, "step": 49070 }, { "epoch": 1.446999852572608, "grad_norm": 2.786482740934724, "learning_rate": 4.627577946623409e-06, "loss": 1.2924, "step": 49075 }, { "epoch": 1.4471472799646175, "grad_norm": 2.7649626258659947, "learning_rate": 4.626952519625847e-06, "loss": 1.2762, "step": 49080 }, { "epoch": 1.447294707356627, "grad_norm": 2.801941341712752, "learning_rate": 4.626327066822958e-06, "loss": 1.3021, "step": 49085 }, { "epoch": 1.4474421347486364, "grad_norm": 2.897307070056573, "learning_rate": 4.625701588233143e-06, "loss": 1.3159, "step": 49090 }, { "epoch": 1.4475895621406458, "grad_norm": 2.914268818578162, "learning_rate": 4.6250760838748094e-06, "loss": 1.2994, "step": 49095 }, { "epoch": 1.4477369895326553, "grad_norm": 2.8568177095335225, "learning_rate": 4.6244505537663616e-06, "loss": 1.3471, "step": 49100 }, { "epoch": 1.4478844169246647, "grad_norm": 2.8822760519563047, "learning_rate": 4.623824997926208e-06, "loss": 1.289, "step": 49105 }, { "epoch": 1.448031844316674, "grad_norm": 2.911774841876054, "learning_rate": 4.623199416372757e-06, "loss": 1.3087, "step": 49110 }, { "epoch": 1.4481792717086834, "grad_norm": 2.9020212059096404, "learning_rate": 4.622573809124415e-06, "loss": 1.2474, "step": 49115 }, { "epoch": 1.448326699100693, "grad_norm": 2.7923832040146017, "learning_rate": 4.621948176199593e-06, "loss": 1.3611, "step": 49120 }, { "epoch": 1.4484741264927024, "grad_norm": 3.0593114939433614, "learning_rate": 4.6213225176167e-06, "loss": 1.3626, "step": 49125 }, { "epoch": 1.4486215538847118, "grad_norm": 2.933874073177931, "learning_rate": 4.620696833394146e-06, "loss": 1.3337, "step": 49130 }, { "epoch": 1.4487689812767213, "grad_norm": 2.8822570946567945, "learning_rate": 4.620071123550346e-06, "loss": 1.2624, "step": 49135 }, { "epoch": 1.4489164086687307, "grad_norm": 2.733948188419434, "learning_rate": 4.619445388103708e-06, "loss": 1.2844, "step": 49140 }, { "epoch": 1.44906383606074, "grad_norm": 2.855210424752552, "learning_rate": 4.6188196270726465e-06, "loss": 1.367, "step": 49145 }, { "epoch": 1.4492112634527494, "grad_norm": 2.7759425107571167, "learning_rate": 4.618193840475576e-06, "loss": 1.3524, "step": 49150 }, { "epoch": 1.4493586908447589, "grad_norm": 2.9008108274165103, "learning_rate": 4.6175680283309105e-06, "loss": 1.3374, "step": 49155 }, { "epoch": 1.4495061182367683, "grad_norm": 2.8957860659949315, "learning_rate": 4.616942190657064e-06, "loss": 1.3775, "step": 49160 }, { "epoch": 1.4496535456287778, "grad_norm": 2.786995471670467, "learning_rate": 4.6163163274724544e-06, "loss": 1.2969, "step": 49165 }, { "epoch": 1.4498009730207873, "grad_norm": 2.969807079786854, "learning_rate": 4.615690438795498e-06, "loss": 1.3185, "step": 49170 }, { "epoch": 1.4499484004127967, "grad_norm": 2.872466604900823, "learning_rate": 4.615064524644611e-06, "loss": 1.3429, "step": 49175 }, { "epoch": 1.4500958278048062, "grad_norm": 2.917235483461362, "learning_rate": 4.614438585038213e-06, "loss": 1.3159, "step": 49180 }, { "epoch": 1.4502432551968156, "grad_norm": 2.7942727821906086, "learning_rate": 4.613812619994723e-06, "loss": 1.324, "step": 49185 }, { "epoch": 1.450390682588825, "grad_norm": 2.7718468044943614, "learning_rate": 4.61318662953256e-06, "loss": 1.1928, "step": 49190 }, { "epoch": 1.4505381099808345, "grad_norm": 2.959631942599432, "learning_rate": 4.612560613670144e-06, "loss": 1.3539, "step": 49195 }, { "epoch": 1.450685537372844, "grad_norm": 2.888176964895354, "learning_rate": 4.611934572425898e-06, "loss": 1.3346, "step": 49200 }, { "epoch": 1.4508329647648532, "grad_norm": 2.9762523599766686, "learning_rate": 4.611308505818242e-06, "loss": 1.3329, "step": 49205 }, { "epoch": 1.4509803921568627, "grad_norm": 2.8672833479348343, "learning_rate": 4.610682413865601e-06, "loss": 1.3165, "step": 49210 }, { "epoch": 1.4511278195488722, "grad_norm": 2.878129718715138, "learning_rate": 4.6100562965863975e-06, "loss": 1.3121, "step": 49215 }, { "epoch": 1.4512752469408816, "grad_norm": 2.857362080317014, "learning_rate": 4.609430153999054e-06, "loss": 1.2987, "step": 49220 }, { "epoch": 1.451422674332891, "grad_norm": 2.824228764382716, "learning_rate": 4.608803986121997e-06, "loss": 1.2923, "step": 49225 }, { "epoch": 1.4515701017249005, "grad_norm": 2.891973666542448, "learning_rate": 4.608177792973652e-06, "loss": 1.3147, "step": 49230 }, { "epoch": 1.45171752911691, "grad_norm": 2.8451164260788877, "learning_rate": 4.607551574572445e-06, "loss": 1.2873, "step": 49235 }, { "epoch": 1.4518649565089192, "grad_norm": 2.847310733312094, "learning_rate": 4.606925330936806e-06, "loss": 1.3777, "step": 49240 }, { "epoch": 1.4520123839009287, "grad_norm": 2.8866542189300923, "learning_rate": 4.606299062085159e-06, "loss": 1.28, "step": 49245 }, { "epoch": 1.4521598112929381, "grad_norm": 2.8385528057104326, "learning_rate": 4.605672768035934e-06, "loss": 1.2925, "step": 49250 }, { "epoch": 1.4523072386849476, "grad_norm": 2.816421578607567, "learning_rate": 4.60504644880756e-06, "loss": 1.268, "step": 49255 }, { "epoch": 1.452454666076957, "grad_norm": 2.839264017263066, "learning_rate": 4.604420104418469e-06, "loss": 1.3024, "step": 49260 }, { "epoch": 1.4526020934689665, "grad_norm": 3.0662038712792863, "learning_rate": 4.603793734887092e-06, "loss": 1.2501, "step": 49265 }, { "epoch": 1.452749520860976, "grad_norm": 3.056060451675719, "learning_rate": 4.603167340231858e-06, "loss": 1.3086, "step": 49270 }, { "epoch": 1.4528969482529854, "grad_norm": 2.7998149154954866, "learning_rate": 4.602540920471202e-06, "loss": 1.2973, "step": 49275 }, { "epoch": 1.453044375644995, "grad_norm": 2.8967445992001033, "learning_rate": 4.601914475623554e-06, "loss": 1.3316, "step": 49280 }, { "epoch": 1.4531918030370043, "grad_norm": 2.7761111980555007, "learning_rate": 4.601288005707351e-06, "loss": 1.3218, "step": 49285 }, { "epoch": 1.4533392304290138, "grad_norm": 2.843832559993474, "learning_rate": 4.600661510741026e-06, "loss": 1.2945, "step": 49290 }, { "epoch": 1.4534866578210233, "grad_norm": 2.844851402299494, "learning_rate": 4.600034990743016e-06, "loss": 1.2834, "step": 49295 }, { "epoch": 1.4536340852130325, "grad_norm": 2.9252412564784636, "learning_rate": 4.599408445731753e-06, "loss": 1.3213, "step": 49300 }, { "epoch": 1.453781512605042, "grad_norm": 3.0812879501038695, "learning_rate": 4.598781875725678e-06, "loss": 1.3096, "step": 49305 }, { "epoch": 1.4539289399970514, "grad_norm": 2.8254543028070866, "learning_rate": 4.598155280743227e-06, "loss": 1.2881, "step": 49310 }, { "epoch": 1.4540763673890609, "grad_norm": 2.9645620251232376, "learning_rate": 4.597528660802839e-06, "loss": 1.3125, "step": 49315 }, { "epoch": 1.4542237947810703, "grad_norm": 2.9440839315865874, "learning_rate": 4.596902015922952e-06, "loss": 1.2629, "step": 49320 }, { "epoch": 1.4543712221730798, "grad_norm": 2.998082711900771, "learning_rate": 4.596275346122006e-06, "loss": 1.3507, "step": 49325 }, { "epoch": 1.4545186495650892, "grad_norm": 2.9636328327591257, "learning_rate": 4.595648651418441e-06, "loss": 1.2875, "step": 49330 }, { "epoch": 1.4546660769570987, "grad_norm": 2.8424966084514214, "learning_rate": 4.5950219318307005e-06, "loss": 1.3103, "step": 49335 }, { "epoch": 1.454813504349108, "grad_norm": 2.9173947668389335, "learning_rate": 4.594395187377224e-06, "loss": 1.3218, "step": 49340 }, { "epoch": 1.4549609317411174, "grad_norm": 2.891074568216096, "learning_rate": 4.593768418076454e-06, "loss": 1.3684, "step": 49345 }, { "epoch": 1.4551083591331269, "grad_norm": 2.8729737553536787, "learning_rate": 4.593141623946836e-06, "loss": 1.3018, "step": 49350 }, { "epoch": 1.4552557865251363, "grad_norm": 2.9416200476462215, "learning_rate": 4.592514805006812e-06, "loss": 1.3027, "step": 49355 }, { "epoch": 1.4554032139171458, "grad_norm": 2.9252546867585205, "learning_rate": 4.591887961274828e-06, "loss": 1.3203, "step": 49360 }, { "epoch": 1.4555506413091552, "grad_norm": 2.9071118363576667, "learning_rate": 4.59126109276933e-06, "loss": 1.3692, "step": 49365 }, { "epoch": 1.4556980687011647, "grad_norm": 2.9189574949400066, "learning_rate": 4.590634199508764e-06, "loss": 1.3286, "step": 49370 }, { "epoch": 1.4558454960931742, "grad_norm": 2.900259290317756, "learning_rate": 4.5900072815115754e-06, "loss": 1.3088, "step": 49375 }, { "epoch": 1.4559929234851836, "grad_norm": 2.714374406073723, "learning_rate": 4.589380338796214e-06, "loss": 1.3166, "step": 49380 }, { "epoch": 1.456140350877193, "grad_norm": 2.6896319532649793, "learning_rate": 4.588753371381127e-06, "loss": 1.3342, "step": 49385 }, { "epoch": 1.4562877782692025, "grad_norm": 2.7983322160540123, "learning_rate": 4.588126379284765e-06, "loss": 1.3355, "step": 49390 }, { "epoch": 1.456435205661212, "grad_norm": 2.8918377620726936, "learning_rate": 4.587499362525578e-06, "loss": 1.3755, "step": 49395 }, { "epoch": 1.4565826330532212, "grad_norm": 2.9045788760010085, "learning_rate": 4.586872321122016e-06, "loss": 1.3276, "step": 49400 }, { "epoch": 1.4567300604452307, "grad_norm": 2.785722312795803, "learning_rate": 4.58624525509253e-06, "loss": 1.2925, "step": 49405 }, { "epoch": 1.4568774878372401, "grad_norm": 3.094053488226169, "learning_rate": 4.585618164455571e-06, "loss": 1.323, "step": 49410 }, { "epoch": 1.4570249152292496, "grad_norm": 2.8241306157096253, "learning_rate": 4.584991049229594e-06, "loss": 1.3007, "step": 49415 }, { "epoch": 1.457172342621259, "grad_norm": 2.675991735319993, "learning_rate": 4.584363909433052e-06, "loss": 1.3268, "step": 49420 }, { "epoch": 1.4573197700132685, "grad_norm": 2.8786518812352417, "learning_rate": 4.5837367450844005e-06, "loss": 1.3189, "step": 49425 }, { "epoch": 1.457467197405278, "grad_norm": 2.839983949148231, "learning_rate": 4.583109556202092e-06, "loss": 1.2776, "step": 49430 }, { "epoch": 1.4576146247972872, "grad_norm": 2.8696831256212287, "learning_rate": 4.582482342804583e-06, "loss": 1.2952, "step": 49435 }, { "epoch": 1.4577620521892967, "grad_norm": 2.99077863609398, "learning_rate": 4.581855104910331e-06, "loss": 1.3896, "step": 49440 }, { "epoch": 1.4579094795813061, "grad_norm": 2.9407458540450775, "learning_rate": 4.581227842537793e-06, "loss": 1.3131, "step": 49445 }, { "epoch": 1.4580569069733156, "grad_norm": 2.912135003238348, "learning_rate": 4.580600555705427e-06, "loss": 1.3164, "step": 49450 }, { "epoch": 1.458204334365325, "grad_norm": 2.9638692499975257, "learning_rate": 4.5799732444316905e-06, "loss": 1.2918, "step": 49455 }, { "epoch": 1.4583517617573345, "grad_norm": 2.741336857351245, "learning_rate": 4.579345908735044e-06, "loss": 1.2468, "step": 49460 }, { "epoch": 1.458499189149344, "grad_norm": 2.911528432639223, "learning_rate": 4.578718548633947e-06, "loss": 1.3345, "step": 49465 }, { "epoch": 1.4586466165413534, "grad_norm": 2.946730295999335, "learning_rate": 4.578091164146859e-06, "loss": 1.3053, "step": 49470 }, { "epoch": 1.4587940439333629, "grad_norm": 2.8753119672103558, "learning_rate": 4.5774637552922456e-06, "loss": 1.3234, "step": 49475 }, { "epoch": 1.4589414713253723, "grad_norm": 2.744356065361049, "learning_rate": 4.576836322088564e-06, "loss": 1.2463, "step": 49480 }, { "epoch": 1.4590888987173818, "grad_norm": 2.8016985139460897, "learning_rate": 4.576208864554281e-06, "loss": 1.2704, "step": 49485 }, { "epoch": 1.4592363261093912, "grad_norm": 2.959128680141988, "learning_rate": 4.575581382707858e-06, "loss": 1.3367, "step": 49490 }, { "epoch": 1.4593837535014005, "grad_norm": 2.753120269776671, "learning_rate": 4.57495387656776e-06, "loss": 1.3228, "step": 49495 }, { "epoch": 1.45953118089341, "grad_norm": 2.8112386934022795, "learning_rate": 4.5743263461524524e-06, "loss": 1.3054, "step": 49500 }, { "epoch": 1.45953118089341, "eval_loss": 1.0997750759124756, "eval_runtime": 4.1581, "eval_samples_per_second": 95.237, "eval_steps_per_second": 3.126, "step": 49500 }, { "epoch": 1.4596786082854194, "grad_norm": 2.9555118621483567, "learning_rate": 4.5736987914804e-06, "loss": 1.3511, "step": 49505 }, { "epoch": 1.4598260356774289, "grad_norm": 2.904567467206201, "learning_rate": 4.5730712125700715e-06, "loss": 1.2647, "step": 49510 }, { "epoch": 1.4599734630694383, "grad_norm": 2.943616728619081, "learning_rate": 4.572443609439932e-06, "loss": 1.3333, "step": 49515 }, { "epoch": 1.4601208904614478, "grad_norm": 2.7941877095385212, "learning_rate": 4.57181598210845e-06, "loss": 1.2923, "step": 49520 }, { "epoch": 1.4602683178534572, "grad_norm": 2.9478449324726816, "learning_rate": 4.571188330594095e-06, "loss": 1.3251, "step": 49525 }, { "epoch": 1.4604157452454667, "grad_norm": 2.7921909734487182, "learning_rate": 4.570560654915336e-06, "loss": 1.3052, "step": 49530 }, { "epoch": 1.460563172637476, "grad_norm": 2.894628656697957, "learning_rate": 4.569932955090641e-06, "loss": 1.3034, "step": 49535 }, { "epoch": 1.4607106000294854, "grad_norm": 2.9570787188784373, "learning_rate": 4.569305231138484e-06, "loss": 1.2774, "step": 49540 }, { "epoch": 1.4608580274214948, "grad_norm": 3.042873847007404, "learning_rate": 4.568677483077334e-06, "loss": 1.2822, "step": 49545 }, { "epoch": 1.4610054548135043, "grad_norm": 2.8517340567109555, "learning_rate": 4.568049710925664e-06, "loss": 1.3038, "step": 49550 }, { "epoch": 1.4611528822055138, "grad_norm": 2.770215850564039, "learning_rate": 4.567421914701949e-06, "loss": 1.3109, "step": 49555 }, { "epoch": 1.4613003095975232, "grad_norm": 2.6795027443259154, "learning_rate": 4.566794094424659e-06, "loss": 1.2637, "step": 49560 }, { "epoch": 1.4614477369895327, "grad_norm": 2.924742852403286, "learning_rate": 4.5661662501122714e-06, "loss": 1.3788, "step": 49565 }, { "epoch": 1.4615951643815421, "grad_norm": 2.7823271414698176, "learning_rate": 4.565538381783259e-06, "loss": 1.3319, "step": 49570 }, { "epoch": 1.4617425917735516, "grad_norm": 2.8212460037976816, "learning_rate": 4.564910489456098e-06, "loss": 1.3099, "step": 49575 }, { "epoch": 1.461890019165561, "grad_norm": 2.8652564742516633, "learning_rate": 4.5642825731492665e-06, "loss": 1.3425, "step": 49580 }, { "epoch": 1.4620374465575705, "grad_norm": 2.785599874108369, "learning_rate": 4.56365463288124e-06, "loss": 1.3021, "step": 49585 }, { "epoch": 1.46218487394958, "grad_norm": 2.8559518073612127, "learning_rate": 4.563026668670497e-06, "loss": 1.3785, "step": 49590 }, { "epoch": 1.4623323013415892, "grad_norm": 2.8188937408655272, "learning_rate": 4.562398680535515e-06, "loss": 1.2944, "step": 49595 }, { "epoch": 1.4624797287335987, "grad_norm": 2.984241628956383, "learning_rate": 4.561770668494776e-06, "loss": 1.3336, "step": 49600 }, { "epoch": 1.4626271561256081, "grad_norm": 2.8716071263236573, "learning_rate": 4.561142632566757e-06, "loss": 1.313, "step": 49605 }, { "epoch": 1.4627745835176176, "grad_norm": 2.892249361584178, "learning_rate": 4.56051457276994e-06, "loss": 1.3231, "step": 49610 }, { "epoch": 1.462922010909627, "grad_norm": 2.8227280731928164, "learning_rate": 4.559886489122806e-06, "loss": 1.3338, "step": 49615 }, { "epoch": 1.4630694383016365, "grad_norm": 2.814206988161512, "learning_rate": 4.559258381643837e-06, "loss": 1.2578, "step": 49620 }, { "epoch": 1.463216865693646, "grad_norm": 2.7851305726735727, "learning_rate": 4.558630250351516e-06, "loss": 1.298, "step": 49625 }, { "epoch": 1.4633642930856552, "grad_norm": 2.8394347261543604, "learning_rate": 4.5580020952643275e-06, "loss": 1.3403, "step": 49630 }, { "epoch": 1.4635117204776646, "grad_norm": 2.795747018154428, "learning_rate": 4.557373916400753e-06, "loss": 1.3206, "step": 49635 }, { "epoch": 1.463659147869674, "grad_norm": 2.836991499720449, "learning_rate": 4.5567457137792805e-06, "loss": 1.3297, "step": 49640 }, { "epoch": 1.4638065752616836, "grad_norm": 2.797878233127281, "learning_rate": 4.5561174874183935e-06, "loss": 1.287, "step": 49645 }, { "epoch": 1.463954002653693, "grad_norm": 2.6748289774140157, "learning_rate": 4.555489237336579e-06, "loss": 1.2323, "step": 49650 }, { "epoch": 1.4641014300457025, "grad_norm": 3.0066053075151262, "learning_rate": 4.554860963552325e-06, "loss": 1.344, "step": 49655 }, { "epoch": 1.464248857437712, "grad_norm": 2.899174218223935, "learning_rate": 4.554232666084118e-06, "loss": 1.2737, "step": 49660 }, { "epoch": 1.4643962848297214, "grad_norm": 2.900357182992855, "learning_rate": 4.553604344950446e-06, "loss": 1.3092, "step": 49665 }, { "epoch": 1.4645437122217309, "grad_norm": 2.79717280311994, "learning_rate": 4.5529760001697975e-06, "loss": 1.3795, "step": 49670 }, { "epoch": 1.4646911396137403, "grad_norm": 2.9565308803580055, "learning_rate": 4.5523476317606645e-06, "loss": 1.3915, "step": 49675 }, { "epoch": 1.4648385670057498, "grad_norm": 2.856802469738279, "learning_rate": 4.551719239741535e-06, "loss": 1.2639, "step": 49680 }, { "epoch": 1.4649859943977592, "grad_norm": 2.807957558222574, "learning_rate": 4.551090824130903e-06, "loss": 1.2681, "step": 49685 }, { "epoch": 1.4651334217897685, "grad_norm": 2.9513105056506093, "learning_rate": 4.550462384947258e-06, "loss": 1.3305, "step": 49690 }, { "epoch": 1.465280849181778, "grad_norm": 2.900584227928756, "learning_rate": 4.549833922209093e-06, "loss": 1.2696, "step": 49695 }, { "epoch": 1.4654282765737874, "grad_norm": 2.7749916822049587, "learning_rate": 4.5492054359349015e-06, "loss": 1.3069, "step": 49700 }, { "epoch": 1.4655757039657968, "grad_norm": 2.852327296047243, "learning_rate": 4.548576926143178e-06, "loss": 1.2857, "step": 49705 }, { "epoch": 1.4657231313578063, "grad_norm": 2.843577456302789, "learning_rate": 4.547948392852416e-06, "loss": 1.3037, "step": 49710 }, { "epoch": 1.4658705587498158, "grad_norm": 2.926402556784678, "learning_rate": 4.547319836081112e-06, "loss": 1.2958, "step": 49715 }, { "epoch": 1.4660179861418252, "grad_norm": 2.8340446202711185, "learning_rate": 4.546691255847761e-06, "loss": 1.3302, "step": 49720 }, { "epoch": 1.4661654135338344, "grad_norm": 2.839167981294006, "learning_rate": 4.54606265217086e-06, "loss": 1.2555, "step": 49725 }, { "epoch": 1.466312840925844, "grad_norm": 2.86883706412062, "learning_rate": 4.545434025068907e-06, "loss": 1.3252, "step": 49730 }, { "epoch": 1.4664602683178534, "grad_norm": 2.742694985546808, "learning_rate": 4.5448053745604e-06, "loss": 1.2852, "step": 49735 }, { "epoch": 1.4666076957098628, "grad_norm": 2.8075344771679336, "learning_rate": 4.544176700663835e-06, "loss": 1.2663, "step": 49740 }, { "epoch": 1.4667551231018723, "grad_norm": 2.7648134373203215, "learning_rate": 4.543548003397715e-06, "loss": 1.2917, "step": 49745 }, { "epoch": 1.4669025504938817, "grad_norm": 2.901239170621329, "learning_rate": 4.54291928278054e-06, "loss": 1.3182, "step": 49750 }, { "epoch": 1.4670499778858912, "grad_norm": 2.66365518530146, "learning_rate": 4.542290538830808e-06, "loss": 1.283, "step": 49755 }, { "epoch": 1.4671974052779007, "grad_norm": 2.771419384200911, "learning_rate": 4.541661771567024e-06, "loss": 1.2931, "step": 49760 }, { "epoch": 1.4673448326699101, "grad_norm": 2.8323990558005048, "learning_rate": 4.541032981007686e-06, "loss": 1.275, "step": 49765 }, { "epoch": 1.4674922600619196, "grad_norm": 2.851863948879374, "learning_rate": 4.540404167171301e-06, "loss": 1.3148, "step": 49770 }, { "epoch": 1.467639687453929, "grad_norm": 2.800539603106772, "learning_rate": 4.5397753300763696e-06, "loss": 1.2838, "step": 49775 }, { "epoch": 1.4677871148459385, "grad_norm": 2.8745190092878086, "learning_rate": 4.539146469741398e-06, "loss": 1.3426, "step": 49780 }, { "epoch": 1.467934542237948, "grad_norm": 2.820636767857283, "learning_rate": 4.538517586184891e-06, "loss": 1.3038, "step": 49785 }, { "epoch": 1.4680819696299572, "grad_norm": 2.9147233998761677, "learning_rate": 4.537888679425353e-06, "loss": 1.3241, "step": 49790 }, { "epoch": 1.4682293970219666, "grad_norm": 2.9892496811985647, "learning_rate": 4.537259749481291e-06, "loss": 1.3202, "step": 49795 }, { "epoch": 1.468376824413976, "grad_norm": 2.8214781683058265, "learning_rate": 4.536630796371212e-06, "loss": 1.2904, "step": 49800 }, { "epoch": 1.4685242518059856, "grad_norm": 2.991560052797833, "learning_rate": 4.536001820113623e-06, "loss": 1.3092, "step": 49805 }, { "epoch": 1.468671679197995, "grad_norm": 2.799738005620867, "learning_rate": 4.535372820727033e-06, "loss": 1.3378, "step": 49810 }, { "epoch": 1.4688191065900045, "grad_norm": 2.830215241396668, "learning_rate": 4.534743798229953e-06, "loss": 1.3383, "step": 49815 }, { "epoch": 1.468966533982014, "grad_norm": 2.830309568210431, "learning_rate": 4.5341147526408885e-06, "loss": 1.3043, "step": 49820 }, { "epoch": 1.4691139613740232, "grad_norm": 2.8087899646329415, "learning_rate": 4.533485683978352e-06, "loss": 1.2956, "step": 49825 }, { "epoch": 1.4692613887660326, "grad_norm": 2.79102908241337, "learning_rate": 4.532856592260855e-06, "loss": 1.3058, "step": 49830 }, { "epoch": 1.469408816158042, "grad_norm": 2.811348060667417, "learning_rate": 4.5322274775069094e-06, "loss": 1.3104, "step": 49835 }, { "epoch": 1.4695562435500515, "grad_norm": 2.8743112573576264, "learning_rate": 4.531598339735026e-06, "loss": 1.3069, "step": 49840 }, { "epoch": 1.469703670942061, "grad_norm": 2.892855567553791, "learning_rate": 4.53096917896372e-06, "loss": 1.3398, "step": 49845 }, { "epoch": 1.4698510983340705, "grad_norm": 2.8738255793455005, "learning_rate": 4.530339995211504e-06, "loss": 1.3257, "step": 49850 }, { "epoch": 1.46999852572608, "grad_norm": 2.819746015752854, "learning_rate": 4.529710788496893e-06, "loss": 1.2927, "step": 49855 }, { "epoch": 1.4701459531180894, "grad_norm": 2.7396461983453233, "learning_rate": 4.529081558838401e-06, "loss": 1.2796, "step": 49860 }, { "epoch": 1.4702933805100988, "grad_norm": 2.9420943864057416, "learning_rate": 4.528452306254545e-06, "loss": 1.3103, "step": 49865 }, { "epoch": 1.4704408079021083, "grad_norm": 2.9045181699910843, "learning_rate": 4.52782303076384e-06, "loss": 1.3117, "step": 49870 }, { "epoch": 1.4705882352941178, "grad_norm": 2.868054282814387, "learning_rate": 4.527193732384805e-06, "loss": 1.2888, "step": 49875 }, { "epoch": 1.4707356626861272, "grad_norm": 2.8379642648822454, "learning_rate": 4.526564411135956e-06, "loss": 1.2967, "step": 49880 }, { "epoch": 1.4708830900781364, "grad_norm": 2.7479760739084473, "learning_rate": 4.525935067035813e-06, "loss": 1.2814, "step": 49885 }, { "epoch": 1.471030517470146, "grad_norm": 2.920566694394728, "learning_rate": 4.5253057001028955e-06, "loss": 1.3548, "step": 49890 }, { "epoch": 1.4711779448621554, "grad_norm": 2.934783251178435, "learning_rate": 4.5246763103557215e-06, "loss": 1.3451, "step": 49895 }, { "epoch": 1.4713253722541648, "grad_norm": 2.844255580158951, "learning_rate": 4.524046897812812e-06, "loss": 1.2927, "step": 49900 }, { "epoch": 1.4714727996461743, "grad_norm": 2.902637448082236, "learning_rate": 4.523417462492689e-06, "loss": 1.2773, "step": 49905 }, { "epoch": 1.4716202270381837, "grad_norm": 2.9467914241424724, "learning_rate": 4.522788004413875e-06, "loss": 1.31, "step": 49910 }, { "epoch": 1.4717676544301932, "grad_norm": 2.8503482130129134, "learning_rate": 4.522158523594891e-06, "loss": 1.3006, "step": 49915 }, { "epoch": 1.4719150818222024, "grad_norm": 2.9597015434951985, "learning_rate": 4.521529020054261e-06, "loss": 1.3561, "step": 49920 }, { "epoch": 1.472062509214212, "grad_norm": 2.885114347790191, "learning_rate": 4.520899493810508e-06, "loss": 1.3052, "step": 49925 }, { "epoch": 1.4722099366062213, "grad_norm": 2.925381425758026, "learning_rate": 4.520269944882158e-06, "loss": 1.3274, "step": 49930 }, { "epoch": 1.4723573639982308, "grad_norm": 2.9109711645953475, "learning_rate": 4.519640373287734e-06, "loss": 1.3384, "step": 49935 }, { "epoch": 1.4725047913902403, "grad_norm": 2.7956275692122547, "learning_rate": 4.519010779045764e-06, "loss": 1.3191, "step": 49940 }, { "epoch": 1.4726522187822497, "grad_norm": 2.9233722728175255, "learning_rate": 4.518381162174774e-06, "loss": 1.3287, "step": 49945 }, { "epoch": 1.4727996461742592, "grad_norm": 2.7163125675556246, "learning_rate": 4.51775152269329e-06, "loss": 1.2968, "step": 49950 }, { "epoch": 1.4729470735662686, "grad_norm": 2.859276459756097, "learning_rate": 4.517121860619841e-06, "loss": 1.2992, "step": 49955 }, { "epoch": 1.473094500958278, "grad_norm": 2.926794491511425, "learning_rate": 4.516492175972956e-06, "loss": 1.3249, "step": 49960 }, { "epoch": 1.4732419283502876, "grad_norm": 2.930467740517202, "learning_rate": 4.5158624687711626e-06, "loss": 1.3477, "step": 49965 }, { "epoch": 1.473389355742297, "grad_norm": 2.8253872373065048, "learning_rate": 4.515232739032993e-06, "loss": 1.2846, "step": 49970 }, { "epoch": 1.4735367831343065, "grad_norm": 2.796081015663179, "learning_rate": 4.514602986776974e-06, "loss": 1.3072, "step": 49975 }, { "epoch": 1.4736842105263157, "grad_norm": 2.850053009645826, "learning_rate": 4.51397321202164e-06, "loss": 1.2953, "step": 49980 }, { "epoch": 1.4738316379183252, "grad_norm": 2.7556886468873456, "learning_rate": 4.513343414785523e-06, "loss": 1.3297, "step": 49985 }, { "epoch": 1.4739790653103346, "grad_norm": 2.7520232498472077, "learning_rate": 4.512713595087153e-06, "loss": 1.249, "step": 49990 }, { "epoch": 1.474126492702344, "grad_norm": 2.778120463309227, "learning_rate": 4.512083752945066e-06, "loss": 1.2859, "step": 49995 }, { "epoch": 1.4742739200943535, "grad_norm": 3.002056283251719, "learning_rate": 4.511453888377793e-06, "loss": 1.3156, "step": 50000 }, { "epoch": 1.4742739200943535, "eval_loss": 1.09761381149292, "eval_runtime": 4.2665, "eval_samples_per_second": 92.816, "eval_steps_per_second": 3.047, "step": 50000 }, { "epoch": 1.474421347486363, "grad_norm": 2.944965062539509, "learning_rate": 4.51082400140387e-06, "loss": 1.3353, "step": 50005 }, { "epoch": 1.4745687748783725, "grad_norm": 2.891768933236723, "learning_rate": 4.510194092041832e-06, "loss": 1.267, "step": 50010 }, { "epoch": 1.474716202270382, "grad_norm": 2.8491334343781283, "learning_rate": 4.509564160310215e-06, "loss": 1.3229, "step": 50015 }, { "epoch": 1.4748636296623912, "grad_norm": 2.8175121039876907, "learning_rate": 4.508934206227556e-06, "loss": 1.3073, "step": 50020 }, { "epoch": 1.4750110570544006, "grad_norm": 2.7903065680701484, "learning_rate": 4.508304229812391e-06, "loss": 1.2939, "step": 50025 }, { "epoch": 1.47515848444641, "grad_norm": 2.8631708836728134, "learning_rate": 4.507674231083257e-06, "loss": 1.2963, "step": 50030 }, { "epoch": 1.4753059118384195, "grad_norm": 2.944680837492001, "learning_rate": 4.507044210058694e-06, "loss": 1.3888, "step": 50035 }, { "epoch": 1.475453339230429, "grad_norm": 2.8999108161877185, "learning_rate": 4.506414166757241e-06, "loss": 1.3265, "step": 50040 }, { "epoch": 1.4756007666224384, "grad_norm": 2.8962758264410455, "learning_rate": 4.505784101197437e-06, "loss": 1.2876, "step": 50045 }, { "epoch": 1.475748194014448, "grad_norm": 2.753866253231284, "learning_rate": 4.505154013397825e-06, "loss": 1.3166, "step": 50050 }, { "epoch": 1.4758956214064574, "grad_norm": 2.8234617465983187, "learning_rate": 4.504523903376941e-06, "loss": 1.317, "step": 50055 }, { "epoch": 1.4760430487984668, "grad_norm": 2.8915041734972644, "learning_rate": 4.503893771153331e-06, "loss": 1.3164, "step": 50060 }, { "epoch": 1.4761904761904763, "grad_norm": 2.8875349795416922, "learning_rate": 4.5032636167455346e-06, "loss": 1.2625, "step": 50065 }, { "epoch": 1.4763379035824857, "grad_norm": 2.9672101682391974, "learning_rate": 4.502633440172098e-06, "loss": 1.299, "step": 50070 }, { "epoch": 1.4764853309744952, "grad_norm": 2.816313761773909, "learning_rate": 4.502003241451563e-06, "loss": 1.2931, "step": 50075 }, { "epoch": 1.4766327583665044, "grad_norm": 2.8668511219144888, "learning_rate": 4.501373020602472e-06, "loss": 1.3086, "step": 50080 }, { "epoch": 1.4767801857585139, "grad_norm": 2.920614007301268, "learning_rate": 4.500742777643373e-06, "loss": 1.3193, "step": 50085 }, { "epoch": 1.4769276131505233, "grad_norm": 2.7327646386524163, "learning_rate": 4.50011251259281e-06, "loss": 1.322, "step": 50090 }, { "epoch": 1.4770750405425328, "grad_norm": 2.9050597183107407, "learning_rate": 4.49948222546933e-06, "loss": 1.3031, "step": 50095 }, { "epoch": 1.4772224679345423, "grad_norm": 3.013190170151898, "learning_rate": 4.49885191629148e-06, "loss": 1.3219, "step": 50100 }, { "epoch": 1.4773698953265517, "grad_norm": 2.8627062423633256, "learning_rate": 4.498221585077808e-06, "loss": 1.3439, "step": 50105 }, { "epoch": 1.4775173227185612, "grad_norm": 3.3349504328903308, "learning_rate": 4.49759123184686e-06, "loss": 1.2713, "step": 50110 }, { "epoch": 1.4776647501105704, "grad_norm": 2.803777546601866, "learning_rate": 4.496960856617188e-06, "loss": 1.2957, "step": 50115 }, { "epoch": 1.4778121775025799, "grad_norm": 2.966533576950461, "learning_rate": 4.496330459407338e-06, "loss": 1.3525, "step": 50120 }, { "epoch": 1.4779596048945893, "grad_norm": 2.7929844135562822, "learning_rate": 4.495700040235863e-06, "loss": 1.295, "step": 50125 }, { "epoch": 1.4781070322865988, "grad_norm": 2.745523509805109, "learning_rate": 4.495069599121314e-06, "loss": 1.3137, "step": 50130 }, { "epoch": 1.4782544596786082, "grad_norm": 2.987787047901792, "learning_rate": 4.494439136082239e-06, "loss": 1.3243, "step": 50135 }, { "epoch": 1.4784018870706177, "grad_norm": 2.895108009749132, "learning_rate": 4.493808651137193e-06, "loss": 1.3162, "step": 50140 }, { "epoch": 1.4785493144626272, "grad_norm": 2.8262948471727776, "learning_rate": 4.4931781443047285e-06, "loss": 1.3008, "step": 50145 }, { "epoch": 1.4786967418546366, "grad_norm": 2.826320648763934, "learning_rate": 4.4925476156033985e-06, "loss": 1.3462, "step": 50150 }, { "epoch": 1.478844169246646, "grad_norm": 2.804112870557724, "learning_rate": 4.491917065051757e-06, "loss": 1.311, "step": 50155 }, { "epoch": 1.4789915966386555, "grad_norm": 2.7734218252069023, "learning_rate": 4.491286492668359e-06, "loss": 1.2222, "step": 50160 }, { "epoch": 1.479139024030665, "grad_norm": 2.960979301623601, "learning_rate": 4.490655898471758e-06, "loss": 1.3406, "step": 50165 }, { "epoch": 1.4792864514226745, "grad_norm": 2.740205768398764, "learning_rate": 4.490025282480513e-06, "loss": 1.3151, "step": 50170 }, { "epoch": 1.4794338788146837, "grad_norm": 2.7882630299289826, "learning_rate": 4.489394644713179e-06, "loss": 1.2884, "step": 50175 }, { "epoch": 1.4795813062066931, "grad_norm": 2.82816591881367, "learning_rate": 4.4887639851883134e-06, "loss": 1.3196, "step": 50180 }, { "epoch": 1.4797287335987026, "grad_norm": 2.841561463029685, "learning_rate": 4.4881333039244726e-06, "loss": 1.3587, "step": 50185 }, { "epoch": 1.479876160990712, "grad_norm": 2.764968383473194, "learning_rate": 4.4875026009402175e-06, "loss": 1.2871, "step": 50190 }, { "epoch": 1.4800235883827215, "grad_norm": 2.8506266454558205, "learning_rate": 4.486871876254106e-06, "loss": 1.2707, "step": 50195 }, { "epoch": 1.480171015774731, "grad_norm": 2.788174911958974, "learning_rate": 4.486241129884698e-06, "loss": 1.3281, "step": 50200 }, { "epoch": 1.4803184431667404, "grad_norm": 2.820347554094603, "learning_rate": 4.4856103618505546e-06, "loss": 1.3247, "step": 50205 }, { "epoch": 1.48046587055875, "grad_norm": 2.734290580459228, "learning_rate": 4.484979572170235e-06, "loss": 1.2861, "step": 50210 }, { "epoch": 1.4806132979507591, "grad_norm": 2.8574830585918227, "learning_rate": 4.4843487608623035e-06, "loss": 1.3147, "step": 50215 }, { "epoch": 1.4807607253427686, "grad_norm": 2.823048759793737, "learning_rate": 4.48371792794532e-06, "loss": 1.3259, "step": 50220 }, { "epoch": 1.480908152734778, "grad_norm": 2.926112515438616, "learning_rate": 4.48308707343785e-06, "loss": 1.3064, "step": 50225 }, { "epoch": 1.4810555801267875, "grad_norm": 2.759702774281742, "learning_rate": 4.482456197358455e-06, "loss": 1.284, "step": 50230 }, { "epoch": 1.481203007518797, "grad_norm": 2.9059829567781756, "learning_rate": 4.481825299725701e-06, "loss": 1.361, "step": 50235 }, { "epoch": 1.4813504349108064, "grad_norm": 2.7767992764804843, "learning_rate": 4.481194380558151e-06, "loss": 1.3053, "step": 50240 }, { "epoch": 1.4814978623028159, "grad_norm": 2.8145174781682925, "learning_rate": 4.480563439874371e-06, "loss": 1.2877, "step": 50245 }, { "epoch": 1.4816452896948253, "grad_norm": 2.8169010107406813, "learning_rate": 4.479932477692929e-06, "loss": 1.2816, "step": 50250 }, { "epoch": 1.4817927170868348, "grad_norm": 2.9753374713019203, "learning_rate": 4.479301494032389e-06, "loss": 1.3234, "step": 50255 }, { "epoch": 1.4819401444788443, "grad_norm": 2.905849211945967, "learning_rate": 4.478670488911321e-06, "loss": 1.3009, "step": 50260 }, { "epoch": 1.4820875718708537, "grad_norm": 2.8456316624035964, "learning_rate": 4.478039462348291e-06, "loss": 1.3801, "step": 50265 }, { "epoch": 1.4822349992628632, "grad_norm": 2.9033037496971077, "learning_rate": 4.4774084143618685e-06, "loss": 1.3217, "step": 50270 }, { "epoch": 1.4823824266548724, "grad_norm": 2.832547135304774, "learning_rate": 4.476777344970622e-06, "loss": 1.256, "step": 50275 }, { "epoch": 1.4825298540468819, "grad_norm": 2.81212011301522, "learning_rate": 4.476146254193124e-06, "loss": 1.3012, "step": 50280 }, { "epoch": 1.4826772814388913, "grad_norm": 2.9343802762986324, "learning_rate": 4.475515142047942e-06, "loss": 1.329, "step": 50285 }, { "epoch": 1.4828247088309008, "grad_norm": 2.9412983600054843, "learning_rate": 4.474884008553648e-06, "loss": 1.3719, "step": 50290 }, { "epoch": 1.4829721362229102, "grad_norm": 2.827882271033328, "learning_rate": 4.474252853728816e-06, "loss": 1.3093, "step": 50295 }, { "epoch": 1.4831195636149197, "grad_norm": 2.9748566554958433, "learning_rate": 4.473621677592015e-06, "loss": 1.3321, "step": 50300 }, { "epoch": 1.4832669910069292, "grad_norm": 2.893470602695188, "learning_rate": 4.472990480161822e-06, "loss": 1.2907, "step": 50305 }, { "epoch": 1.4834144183989384, "grad_norm": 2.978844997979274, "learning_rate": 4.472359261456807e-06, "loss": 1.3261, "step": 50310 }, { "epoch": 1.4835618457909479, "grad_norm": 2.818598699141613, "learning_rate": 4.471728021495546e-06, "loss": 1.3132, "step": 50315 }, { "epoch": 1.4837092731829573, "grad_norm": 2.698103112438765, "learning_rate": 4.471096760296614e-06, "loss": 1.2807, "step": 50320 }, { "epoch": 1.4838567005749668, "grad_norm": 2.9419447107806045, "learning_rate": 4.4704654778785864e-06, "loss": 1.3472, "step": 50325 }, { "epoch": 1.4840041279669762, "grad_norm": 2.7736062980220737, "learning_rate": 4.46983417426004e-06, "loss": 1.2942, "step": 50330 }, { "epoch": 1.4841515553589857, "grad_norm": 2.890246597679305, "learning_rate": 4.46920284945955e-06, "loss": 1.3114, "step": 50335 }, { "epoch": 1.4842989827509951, "grad_norm": 3.202756490225601, "learning_rate": 4.468571503495696e-06, "loss": 1.3391, "step": 50340 }, { "epoch": 1.4844464101430046, "grad_norm": 2.923293693356093, "learning_rate": 4.467940136387054e-06, "loss": 1.3489, "step": 50345 }, { "epoch": 1.484593837535014, "grad_norm": 2.817579475089879, "learning_rate": 4.467308748152204e-06, "loss": 1.2982, "step": 50350 }, { "epoch": 1.4847412649270235, "grad_norm": 2.822921054058777, "learning_rate": 4.466677338809725e-06, "loss": 1.3308, "step": 50355 }, { "epoch": 1.484888692319033, "grad_norm": 2.9353808327736464, "learning_rate": 4.466045908378198e-06, "loss": 1.366, "step": 50360 }, { "epoch": 1.4850361197110424, "grad_norm": 2.7071862707083816, "learning_rate": 4.465414456876202e-06, "loss": 1.261, "step": 50365 }, { "epoch": 1.4851835471030517, "grad_norm": 2.868331839822151, "learning_rate": 4.464782984322317e-06, "loss": 1.2721, "step": 50370 }, { "epoch": 1.4853309744950611, "grad_norm": 3.0107233951837347, "learning_rate": 4.464151490735128e-06, "loss": 1.3445, "step": 50375 }, { "epoch": 1.4854784018870706, "grad_norm": 2.7101890457018065, "learning_rate": 4.4635199761332164e-06, "loss": 1.2822, "step": 50380 }, { "epoch": 1.48562582927908, "grad_norm": 2.898371002019571, "learning_rate": 4.4628884405351635e-06, "loss": 1.3174, "step": 50385 }, { "epoch": 1.4857732566710895, "grad_norm": 2.8582962317494953, "learning_rate": 4.462256883959555e-06, "loss": 1.2977, "step": 50390 }, { "epoch": 1.485920684063099, "grad_norm": 2.832944797763616, "learning_rate": 4.461625306424975e-06, "loss": 1.2824, "step": 50395 }, { "epoch": 1.4860681114551084, "grad_norm": 2.91251115463683, "learning_rate": 4.460993707950005e-06, "loss": 1.3111, "step": 50400 }, { "epoch": 1.4862155388471177, "grad_norm": 3.0258587112737056, "learning_rate": 4.4603620885532356e-06, "loss": 1.3129, "step": 50405 }, { "epoch": 1.4863629662391271, "grad_norm": 2.8810099518327092, "learning_rate": 4.459730448253251e-06, "loss": 1.2505, "step": 50410 }, { "epoch": 1.4865103936311366, "grad_norm": 2.7068761330311535, "learning_rate": 4.459098787068636e-06, "loss": 1.3392, "step": 50415 }, { "epoch": 1.486657821023146, "grad_norm": 2.944465742104866, "learning_rate": 4.45846710501798e-06, "loss": 1.2939, "step": 50420 }, { "epoch": 1.4868052484151555, "grad_norm": 2.89262834306392, "learning_rate": 4.4578354021198715e-06, "loss": 1.3572, "step": 50425 }, { "epoch": 1.486952675807165, "grad_norm": 2.936851359152498, "learning_rate": 4.4572036783928965e-06, "loss": 1.2559, "step": 50430 }, { "epoch": 1.4871001031991744, "grad_norm": 2.7413087441032813, "learning_rate": 4.456571933855647e-06, "loss": 1.3345, "step": 50435 }, { "epoch": 1.4872475305911839, "grad_norm": 2.851154354411175, "learning_rate": 4.4559401685267105e-06, "loss": 1.2871, "step": 50440 }, { "epoch": 1.4873949579831933, "grad_norm": 2.785518930418477, "learning_rate": 4.45530838242468e-06, "loss": 1.2909, "step": 50445 }, { "epoch": 1.4875423853752028, "grad_norm": 2.8221908471570583, "learning_rate": 4.454676575568144e-06, "loss": 1.3091, "step": 50450 }, { "epoch": 1.4876898127672122, "grad_norm": 2.9561343661039894, "learning_rate": 4.454044747975694e-06, "loss": 1.2769, "step": 50455 }, { "epoch": 1.4878372401592217, "grad_norm": 2.860731853118197, "learning_rate": 4.453412899665924e-06, "loss": 1.3679, "step": 50460 }, { "epoch": 1.4879846675512312, "grad_norm": 2.818625165120589, "learning_rate": 4.452781030657427e-06, "loss": 1.3342, "step": 50465 }, { "epoch": 1.4881320949432404, "grad_norm": 2.8720941980793513, "learning_rate": 4.452149140968795e-06, "loss": 1.3592, "step": 50470 }, { "epoch": 1.4882795223352498, "grad_norm": 2.8521295630924914, "learning_rate": 4.451517230618623e-06, "loss": 1.2922, "step": 50475 }, { "epoch": 1.4884269497272593, "grad_norm": 2.8007898160413682, "learning_rate": 4.450885299625505e-06, "loss": 1.3392, "step": 50480 }, { "epoch": 1.4885743771192688, "grad_norm": 2.7783241556407807, "learning_rate": 4.450253348008037e-06, "loss": 1.2965, "step": 50485 }, { "epoch": 1.4887218045112782, "grad_norm": 2.842222205356446, "learning_rate": 4.449621375784815e-06, "loss": 1.2512, "step": 50490 }, { "epoch": 1.4888692319032877, "grad_norm": 2.874429121020486, "learning_rate": 4.448989382974435e-06, "loss": 1.333, "step": 50495 }, { "epoch": 1.4890166592952971, "grad_norm": 2.792154411430763, "learning_rate": 4.448357369595494e-06, "loss": 1.333, "step": 50500 }, { "epoch": 1.4890166592952971, "eval_loss": 1.096326231956482, "eval_runtime": 4.1306, "eval_samples_per_second": 95.87, "eval_steps_per_second": 3.147, "step": 50500 }, { "epoch": 1.4891640866873064, "grad_norm": 2.8793730074372497, "learning_rate": 4.44772533566659e-06, "loss": 1.3186, "step": 50505 }, { "epoch": 1.4893115140793158, "grad_norm": 2.765108715686205, "learning_rate": 4.447093281206322e-06, "loss": 1.2891, "step": 50510 }, { "epoch": 1.4894589414713253, "grad_norm": 2.907764011953018, "learning_rate": 4.446461206233287e-06, "loss": 1.2858, "step": 50515 }, { "epoch": 1.4896063688633348, "grad_norm": 2.9217930201428106, "learning_rate": 4.445829110766087e-06, "loss": 1.2888, "step": 50520 }, { "epoch": 1.4897537962553442, "grad_norm": 2.9642373839394995, "learning_rate": 4.4451969948233195e-06, "loss": 1.2816, "step": 50525 }, { "epoch": 1.4899012236473537, "grad_norm": 2.797619281426558, "learning_rate": 4.444564858423587e-06, "loss": 1.2815, "step": 50530 }, { "epoch": 1.4900486510393631, "grad_norm": 2.9018573200923092, "learning_rate": 4.443932701585491e-06, "loss": 1.3204, "step": 50535 }, { "epoch": 1.4901960784313726, "grad_norm": 2.7296604584875546, "learning_rate": 4.4433005243276324e-06, "loss": 1.2864, "step": 50540 }, { "epoch": 1.490343505823382, "grad_norm": 2.759791554116397, "learning_rate": 4.442668326668615e-06, "loss": 1.337, "step": 50545 }, { "epoch": 1.4904909332153915, "grad_norm": 2.7420232476187327, "learning_rate": 4.442036108627041e-06, "loss": 1.2638, "step": 50550 }, { "epoch": 1.490638360607401, "grad_norm": 2.9259444830189256, "learning_rate": 4.441403870221512e-06, "loss": 1.326, "step": 50555 }, { "epoch": 1.4907857879994104, "grad_norm": 2.739099021658351, "learning_rate": 4.440771611470637e-06, "loss": 1.2409, "step": 50560 }, { "epoch": 1.4909332153914197, "grad_norm": 2.8882709411980545, "learning_rate": 4.440139332393018e-06, "loss": 1.304, "step": 50565 }, { "epoch": 1.491080642783429, "grad_norm": 2.9685030613619072, "learning_rate": 4.439507033007262e-06, "loss": 1.3303, "step": 50570 }, { "epoch": 1.4912280701754386, "grad_norm": 2.9084558999613828, "learning_rate": 4.4388747133319734e-06, "loss": 1.2771, "step": 50575 }, { "epoch": 1.491375497567448, "grad_norm": 2.766079141868109, "learning_rate": 4.438242373385759e-06, "loss": 1.3127, "step": 50580 }, { "epoch": 1.4915229249594575, "grad_norm": 2.772238826428845, "learning_rate": 4.437610013187228e-06, "loss": 1.3234, "step": 50585 }, { "epoch": 1.491670352351467, "grad_norm": 2.7917414069347983, "learning_rate": 4.436977632754986e-06, "loss": 1.3367, "step": 50590 }, { "epoch": 1.4918177797434764, "grad_norm": 2.7258974531570437, "learning_rate": 4.436345232107644e-06, "loss": 1.2972, "step": 50595 }, { "epoch": 1.4919652071354856, "grad_norm": 2.8612044580883405, "learning_rate": 4.435712811263809e-06, "loss": 1.2922, "step": 50600 }, { "epoch": 1.492112634527495, "grad_norm": 2.7762064957647263, "learning_rate": 4.435080370242091e-06, "loss": 1.2806, "step": 50605 }, { "epoch": 1.4922600619195046, "grad_norm": 2.859336699575079, "learning_rate": 4.434447909061102e-06, "loss": 1.3021, "step": 50610 }, { "epoch": 1.492407489311514, "grad_norm": 2.9237504562217977, "learning_rate": 4.433815427739451e-06, "loss": 1.3221, "step": 50615 }, { "epoch": 1.4925549167035235, "grad_norm": 2.9017921822663935, "learning_rate": 4.43318292629575e-06, "loss": 1.3039, "step": 50620 }, { "epoch": 1.492702344095533, "grad_norm": 2.8569976426625243, "learning_rate": 4.432550404748613e-06, "loss": 1.3181, "step": 50625 }, { "epoch": 1.4928497714875424, "grad_norm": 2.834001818099855, "learning_rate": 4.431917863116648e-06, "loss": 1.3173, "step": 50630 }, { "epoch": 1.4929971988795518, "grad_norm": 3.029989411936918, "learning_rate": 4.431285301418473e-06, "loss": 1.3091, "step": 50635 }, { "epoch": 1.4931446262715613, "grad_norm": 2.8670982582728985, "learning_rate": 4.430652719672702e-06, "loss": 1.3024, "step": 50640 }, { "epoch": 1.4932920536635708, "grad_norm": 2.773044057956553, "learning_rate": 4.430020117897945e-06, "loss": 1.2637, "step": 50645 }, { "epoch": 1.4934394810555802, "grad_norm": 2.926986173565862, "learning_rate": 4.4293874961128196e-06, "loss": 1.2987, "step": 50650 }, { "epoch": 1.4935869084475897, "grad_norm": 2.817818929564453, "learning_rate": 4.4287548543359415e-06, "loss": 1.3236, "step": 50655 }, { "epoch": 1.493734335839599, "grad_norm": 2.8914381063817065, "learning_rate": 4.4281221925859275e-06, "loss": 1.3175, "step": 50660 }, { "epoch": 1.4938817632316084, "grad_norm": 2.800597283877545, "learning_rate": 4.427489510881393e-06, "loss": 1.3179, "step": 50665 }, { "epoch": 1.4940291906236178, "grad_norm": 2.8006800583312645, "learning_rate": 4.4268568092409566e-06, "loss": 1.3359, "step": 50670 }, { "epoch": 1.4941766180156273, "grad_norm": 2.954839645939313, "learning_rate": 4.426224087683235e-06, "loss": 1.3153, "step": 50675 }, { "epoch": 1.4943240454076367, "grad_norm": 2.9798921620698025, "learning_rate": 4.4255913462268475e-06, "loss": 1.3509, "step": 50680 }, { "epoch": 1.4944714727996462, "grad_norm": 2.7699357181948256, "learning_rate": 4.424958584890413e-06, "loss": 1.2934, "step": 50685 }, { "epoch": 1.4946189001916557, "grad_norm": 2.894696060572854, "learning_rate": 4.424325803692552e-06, "loss": 1.3329, "step": 50690 }, { "epoch": 1.4947663275836651, "grad_norm": 2.814329008264858, "learning_rate": 4.4236930026518845e-06, "loss": 1.2952, "step": 50695 }, { "epoch": 1.4949137549756744, "grad_norm": 2.9786366336404013, "learning_rate": 4.42306018178703e-06, "loss": 1.309, "step": 50700 }, { "epoch": 1.4950611823676838, "grad_norm": 2.8800559463418742, "learning_rate": 4.422427341116613e-06, "loss": 1.3181, "step": 50705 }, { "epoch": 1.4952086097596933, "grad_norm": 2.8172304400470196, "learning_rate": 4.421794480659252e-06, "loss": 1.3395, "step": 50710 }, { "epoch": 1.4953560371517027, "grad_norm": 2.7597762801486088, "learning_rate": 4.421161600433571e-06, "loss": 1.2891, "step": 50715 }, { "epoch": 1.4955034645437122, "grad_norm": 2.847380369608714, "learning_rate": 4.420528700458195e-06, "loss": 1.3324, "step": 50720 }, { "epoch": 1.4956508919357216, "grad_norm": 2.7829405918740937, "learning_rate": 4.419895780751745e-06, "loss": 1.2929, "step": 50725 }, { "epoch": 1.495798319327731, "grad_norm": 2.9442464988922854, "learning_rate": 4.419262841332849e-06, "loss": 1.3072, "step": 50730 }, { "epoch": 1.4959457467197406, "grad_norm": 3.2671591162661, "learning_rate": 4.418629882220127e-06, "loss": 1.3157, "step": 50735 }, { "epoch": 1.49609317411175, "grad_norm": 2.8388757887271545, "learning_rate": 4.417996903432207e-06, "loss": 1.2763, "step": 50740 }, { "epoch": 1.4962406015037595, "grad_norm": 2.7858958387282176, "learning_rate": 4.417363904987716e-06, "loss": 1.3102, "step": 50745 }, { "epoch": 1.496388028895769, "grad_norm": 2.79502695226646, "learning_rate": 4.4167308869052826e-06, "loss": 1.3059, "step": 50750 }, { "epoch": 1.4965354562877784, "grad_norm": 2.901774895268436, "learning_rate": 4.4160978492035286e-06, "loss": 1.2764, "step": 50755 }, { "epoch": 1.4966828836797876, "grad_norm": 2.858953416988783, "learning_rate": 4.415464791901086e-06, "loss": 1.2708, "step": 50760 }, { "epoch": 1.496830311071797, "grad_norm": 2.773314649875937, "learning_rate": 4.414831715016582e-06, "loss": 1.2929, "step": 50765 }, { "epoch": 1.4969777384638066, "grad_norm": 2.8621552088320756, "learning_rate": 4.414198618568645e-06, "loss": 1.2591, "step": 50770 }, { "epoch": 1.497125165855816, "grad_norm": 2.83828821782575, "learning_rate": 4.413565502575905e-06, "loss": 1.3044, "step": 50775 }, { "epoch": 1.4972725932478255, "grad_norm": 2.9443533967242828, "learning_rate": 4.412932367056994e-06, "loss": 1.2862, "step": 50780 }, { "epoch": 1.497420020639835, "grad_norm": 2.815507304926182, "learning_rate": 4.41229921203054e-06, "loss": 1.3258, "step": 50785 }, { "epoch": 1.4975674480318444, "grad_norm": 2.7800260032017614, "learning_rate": 4.411666037515176e-06, "loss": 1.2792, "step": 50790 }, { "epoch": 1.4977148754238536, "grad_norm": 2.880941499930407, "learning_rate": 4.4110328435295316e-06, "loss": 1.3284, "step": 50795 }, { "epoch": 1.497862302815863, "grad_norm": 2.901654521282114, "learning_rate": 4.4103996300922435e-06, "loss": 1.3139, "step": 50800 }, { "epoch": 1.4980097302078725, "grad_norm": 2.6222328135416366, "learning_rate": 4.40976639722194e-06, "loss": 1.2454, "step": 50805 }, { "epoch": 1.498157157599882, "grad_norm": 2.9604624779172917, "learning_rate": 4.409133144937258e-06, "loss": 1.3276, "step": 50810 }, { "epoch": 1.4983045849918915, "grad_norm": 2.7891026866862165, "learning_rate": 4.40849987325683e-06, "loss": 1.3659, "step": 50815 }, { "epoch": 1.498452012383901, "grad_norm": 2.7897906408171154, "learning_rate": 4.4078665821992915e-06, "loss": 1.3292, "step": 50820 }, { "epoch": 1.4985994397759104, "grad_norm": 2.895884006700452, "learning_rate": 4.407233271783278e-06, "loss": 1.2604, "step": 50825 }, { "epoch": 1.4987468671679198, "grad_norm": 2.90277326715, "learning_rate": 4.406599942027424e-06, "loss": 1.3326, "step": 50830 }, { "epoch": 1.4988942945599293, "grad_norm": 2.8111883524192365, "learning_rate": 4.405966592950369e-06, "loss": 1.3166, "step": 50835 }, { "epoch": 1.4990417219519387, "grad_norm": 2.9763874977222926, "learning_rate": 4.405333224570746e-06, "loss": 1.3176, "step": 50840 }, { "epoch": 1.4991891493439482, "grad_norm": 2.9372708793794113, "learning_rate": 4.404699836907195e-06, "loss": 1.2658, "step": 50845 }, { "epoch": 1.4993365767359577, "grad_norm": 2.9326526251606198, "learning_rate": 4.4040664299783535e-06, "loss": 1.2953, "step": 50850 }, { "epoch": 1.499484004127967, "grad_norm": 2.874478959177107, "learning_rate": 4.4034330038028615e-06, "loss": 1.302, "step": 50855 }, { "epoch": 1.4996314315199764, "grad_norm": 2.867393618590269, "learning_rate": 4.402799558399356e-06, "loss": 1.2742, "step": 50860 }, { "epoch": 1.4997788589119858, "grad_norm": 2.972770792619671, "learning_rate": 4.402166093786478e-06, "loss": 1.3389, "step": 50865 }, { "epoch": 1.4999262863039953, "grad_norm": 2.704846791714382, "learning_rate": 4.401532609982869e-06, "loss": 1.3186, "step": 50870 }, { "epoch": 1.5000737136960047, "grad_norm": 3.0015453139221213, "learning_rate": 4.400899107007168e-06, "loss": 1.3688, "step": 50875 }, { "epoch": 1.5002211410880142, "grad_norm": 2.894262026415617, "learning_rate": 4.400265584878019e-06, "loss": 1.324, "step": 50880 }, { "epoch": 1.5003685684800234, "grad_norm": 2.809181097039339, "learning_rate": 4.399632043614061e-06, "loss": 1.3371, "step": 50885 }, { "epoch": 1.5005159958720329, "grad_norm": 2.918527937617655, "learning_rate": 4.398998483233939e-06, "loss": 1.2858, "step": 50890 }, { "epoch": 1.5006634232640423, "grad_norm": 2.872112615318048, "learning_rate": 4.398364903756295e-06, "loss": 1.3196, "step": 50895 }, { "epoch": 1.5008108506560518, "grad_norm": 2.744097675515791, "learning_rate": 4.397731305199775e-06, "loss": 1.269, "step": 50900 }, { "epoch": 1.5009582780480613, "grad_norm": 2.7618227563127995, "learning_rate": 4.397097687583021e-06, "loss": 1.3274, "step": 50905 }, { "epoch": 1.5011057054400707, "grad_norm": 2.8163141994244616, "learning_rate": 4.396464050924678e-06, "loss": 1.3087, "step": 50910 }, { "epoch": 1.5012531328320802, "grad_norm": 2.970941534856446, "learning_rate": 4.395830395243392e-06, "loss": 1.2653, "step": 50915 }, { "epoch": 1.5014005602240896, "grad_norm": 2.895762023566428, "learning_rate": 4.39519672055781e-06, "loss": 1.3411, "step": 50920 }, { "epoch": 1.501547987616099, "grad_norm": 2.777719165270419, "learning_rate": 4.394563026886577e-06, "loss": 1.3026, "step": 50925 }, { "epoch": 1.5016954150081085, "grad_norm": 2.640381746068934, "learning_rate": 4.393929314248342e-06, "loss": 1.3406, "step": 50930 }, { "epoch": 1.501842842400118, "grad_norm": 2.8446217398092806, "learning_rate": 4.39329558266175e-06, "loss": 1.2676, "step": 50935 }, { "epoch": 1.5019902697921275, "grad_norm": 2.8477820655219634, "learning_rate": 4.392661832145452e-06, "loss": 1.2903, "step": 50940 }, { "epoch": 1.502137697184137, "grad_norm": 2.7977052080194644, "learning_rate": 4.392028062718095e-06, "loss": 1.3427, "step": 50945 }, { "epoch": 1.5022851245761464, "grad_norm": 2.6675209835183864, "learning_rate": 4.39139427439833e-06, "loss": 1.2714, "step": 50950 }, { "epoch": 1.5024325519681558, "grad_norm": 2.890237106086047, "learning_rate": 4.390760467204807e-06, "loss": 1.301, "step": 50955 }, { "epoch": 1.502579979360165, "grad_norm": 2.9912188173996865, "learning_rate": 4.390126641156173e-06, "loss": 1.3264, "step": 50960 }, { "epoch": 1.5027274067521745, "grad_norm": 2.9258408952342916, "learning_rate": 4.389492796271083e-06, "loss": 1.3384, "step": 50965 }, { "epoch": 1.502874834144184, "grad_norm": 2.9062573290345894, "learning_rate": 4.388858932568188e-06, "loss": 1.3344, "step": 50970 }, { "epoch": 1.5030222615361934, "grad_norm": 2.85814477784363, "learning_rate": 4.388225050066139e-06, "loss": 1.3169, "step": 50975 }, { "epoch": 1.503169688928203, "grad_norm": 2.8006007743265107, "learning_rate": 4.387591148783588e-06, "loss": 1.3073, "step": 50980 }, { "epoch": 1.5033171163202121, "grad_norm": 2.9342785815895724, "learning_rate": 4.386957228739191e-06, "loss": 1.3597, "step": 50985 }, { "epoch": 1.5034645437122216, "grad_norm": 2.9088533798998957, "learning_rate": 4.386323289951598e-06, "loss": 1.3167, "step": 50990 }, { "epoch": 1.503611971104231, "grad_norm": 2.6424376334669994, "learning_rate": 4.385689332439467e-06, "loss": 1.2748, "step": 50995 }, { "epoch": 1.5037593984962405, "grad_norm": 2.8300183008882334, "learning_rate": 4.385055356221452e-06, "loss": 1.3592, "step": 51000 }, { "epoch": 1.5037593984962405, "eval_loss": 1.0959432125091553, "eval_runtime": 4.2363, "eval_samples_per_second": 93.478, "eval_steps_per_second": 3.069, "step": 51000 }, { "epoch": 1.50390682588825, "grad_norm": 2.884422253644779, "learning_rate": 4.384421361316207e-06, "loss": 1.3534, "step": 51005 }, { "epoch": 1.5040542532802594, "grad_norm": 2.8764857055770428, "learning_rate": 4.38378734774239e-06, "loss": 1.3387, "step": 51010 }, { "epoch": 1.504201680672269, "grad_norm": 2.864634772324071, "learning_rate": 4.383153315518656e-06, "loss": 1.3018, "step": 51015 }, { "epoch": 1.5043491080642784, "grad_norm": 2.825048219532293, "learning_rate": 4.3825192646636626e-06, "loss": 1.312, "step": 51020 }, { "epoch": 1.5044965354562878, "grad_norm": 2.8455338390285116, "learning_rate": 4.381885195196067e-06, "loss": 1.3135, "step": 51025 }, { "epoch": 1.5046439628482973, "grad_norm": 2.873473804210055, "learning_rate": 4.38125110713453e-06, "loss": 1.3306, "step": 51030 }, { "epoch": 1.5047913902403067, "grad_norm": 2.901580681505985, "learning_rate": 4.380617000497707e-06, "loss": 1.3411, "step": 51035 }, { "epoch": 1.5049388176323162, "grad_norm": 2.8301052628439116, "learning_rate": 4.3799828753042596e-06, "loss": 1.2846, "step": 51040 }, { "epoch": 1.5050862450243256, "grad_norm": 2.800708723169561, "learning_rate": 4.3793487315728446e-06, "loss": 1.309, "step": 51045 }, { "epoch": 1.505233672416335, "grad_norm": 2.8748496318976517, "learning_rate": 4.378714569322126e-06, "loss": 1.3529, "step": 51050 }, { "epoch": 1.5053810998083443, "grad_norm": 2.7779399559524953, "learning_rate": 4.378080388570763e-06, "loss": 1.3178, "step": 51055 }, { "epoch": 1.5055285272003538, "grad_norm": 2.8454032687913138, "learning_rate": 4.377446189337418e-06, "loss": 1.31, "step": 51060 }, { "epoch": 1.5056759545923633, "grad_norm": 2.7629969217876513, "learning_rate": 4.376811971640752e-06, "loss": 1.3493, "step": 51065 }, { "epoch": 1.5058233819843727, "grad_norm": 2.8133151040210262, "learning_rate": 4.3761777354994275e-06, "loss": 1.3089, "step": 51070 }, { "epoch": 1.5059708093763822, "grad_norm": 2.8173154086052583, "learning_rate": 4.375543480932107e-06, "loss": 1.2627, "step": 51075 }, { "epoch": 1.5061182367683914, "grad_norm": 2.822046342679191, "learning_rate": 4.374909207957457e-06, "loss": 1.2644, "step": 51080 }, { "epoch": 1.5062656641604009, "grad_norm": 2.803873869048325, "learning_rate": 4.374274916594141e-06, "loss": 1.2853, "step": 51085 }, { "epoch": 1.5064130915524103, "grad_norm": 2.81299476784107, "learning_rate": 4.373640606860821e-06, "loss": 1.2883, "step": 51090 }, { "epoch": 1.5065605189444198, "grad_norm": 2.8777283831666916, "learning_rate": 4.3730062787761655e-06, "loss": 1.2648, "step": 51095 }, { "epoch": 1.5067079463364292, "grad_norm": 2.6857982570982974, "learning_rate": 4.372371932358837e-06, "loss": 1.3519, "step": 51100 }, { "epoch": 1.5068553737284387, "grad_norm": 2.848392824108841, "learning_rate": 4.371737567627505e-06, "loss": 1.3052, "step": 51105 }, { "epoch": 1.5070028011204482, "grad_norm": 2.794452226023776, "learning_rate": 4.371103184600834e-06, "loss": 1.3099, "step": 51110 }, { "epoch": 1.5071502285124576, "grad_norm": 2.819486910807878, "learning_rate": 4.370468783297494e-06, "loss": 1.3271, "step": 51115 }, { "epoch": 1.507297655904467, "grad_norm": 2.77870050061854, "learning_rate": 4.369834363736151e-06, "loss": 1.2772, "step": 51120 }, { "epoch": 1.5074450832964765, "grad_norm": 3.1430139297407518, "learning_rate": 4.3691999259354734e-06, "loss": 1.2994, "step": 51125 }, { "epoch": 1.507592510688486, "grad_norm": 2.7700118274560044, "learning_rate": 4.3685654699141315e-06, "loss": 1.2744, "step": 51130 }, { "epoch": 1.5077399380804954, "grad_norm": 2.893650061802269, "learning_rate": 4.3679309956907935e-06, "loss": 1.3099, "step": 51135 }, { "epoch": 1.507887365472505, "grad_norm": 2.800905005488243, "learning_rate": 4.3672965032841315e-06, "loss": 1.2403, "step": 51140 }, { "epoch": 1.5080347928645144, "grad_norm": 2.8821067012233907, "learning_rate": 4.366661992712814e-06, "loss": 1.3455, "step": 51145 }, { "epoch": 1.5081822202565238, "grad_norm": 2.8013182535391326, "learning_rate": 4.366027463995514e-06, "loss": 1.285, "step": 51150 }, { "epoch": 1.508329647648533, "grad_norm": 2.8913725278363334, "learning_rate": 4.365392917150902e-06, "loss": 1.3296, "step": 51155 }, { "epoch": 1.5084770750405425, "grad_norm": 2.709742160557453, "learning_rate": 4.3647583521976516e-06, "loss": 1.2654, "step": 51160 }, { "epoch": 1.508624502432552, "grad_norm": 2.85713691697767, "learning_rate": 4.364123769154434e-06, "loss": 1.2996, "step": 51165 }, { "epoch": 1.5087719298245614, "grad_norm": 2.8730605097805664, "learning_rate": 4.3634891680399235e-06, "loss": 1.3434, "step": 51170 }, { "epoch": 1.508919357216571, "grad_norm": 2.792915589752904, "learning_rate": 4.362854548872793e-06, "loss": 1.2677, "step": 51175 }, { "epoch": 1.5090667846085801, "grad_norm": 2.8055930702514345, "learning_rate": 4.3622199116717186e-06, "loss": 1.3079, "step": 51180 }, { "epoch": 1.5092142120005896, "grad_norm": 2.755501097324807, "learning_rate": 4.361585256455373e-06, "loss": 1.2905, "step": 51185 }, { "epoch": 1.509361639392599, "grad_norm": 2.7632686836306135, "learning_rate": 4.360950583242435e-06, "loss": 1.2952, "step": 51190 }, { "epoch": 1.5095090667846085, "grad_norm": 2.9265246567215266, "learning_rate": 4.360315892051576e-06, "loss": 1.267, "step": 51195 }, { "epoch": 1.509656494176618, "grad_norm": 2.790957865562251, "learning_rate": 4.359681182901476e-06, "loss": 1.3052, "step": 51200 }, { "epoch": 1.5098039215686274, "grad_norm": 2.7859861632927414, "learning_rate": 4.359046455810811e-06, "loss": 1.3235, "step": 51205 }, { "epoch": 1.5099513489606369, "grad_norm": 2.8271901174095873, "learning_rate": 4.358411710798258e-06, "loss": 1.2957, "step": 51210 }, { "epoch": 1.5100987763526463, "grad_norm": 2.8668165300594444, "learning_rate": 4.357776947882497e-06, "loss": 1.2927, "step": 51215 }, { "epoch": 1.5102462037446558, "grad_norm": 2.760214436114426, "learning_rate": 4.357142167082205e-06, "loss": 1.3358, "step": 51220 }, { "epoch": 1.5103936311366652, "grad_norm": 2.9263713981546093, "learning_rate": 4.35650736841606e-06, "loss": 1.3523, "step": 51225 }, { "epoch": 1.5105410585286747, "grad_norm": 2.828687465438315, "learning_rate": 4.355872551902745e-06, "loss": 1.3242, "step": 51230 }, { "epoch": 1.5106884859206842, "grad_norm": 3.0793530403910334, "learning_rate": 4.355237717560938e-06, "loss": 1.2995, "step": 51235 }, { "epoch": 1.5108359133126936, "grad_norm": 2.851760351781211, "learning_rate": 4.354602865409318e-06, "loss": 1.2952, "step": 51240 }, { "epoch": 1.510983340704703, "grad_norm": 2.6998515178674825, "learning_rate": 4.3539679954665705e-06, "loss": 1.2328, "step": 51245 }, { "epoch": 1.5111307680967123, "grad_norm": 2.8228889242749524, "learning_rate": 4.353333107751374e-06, "loss": 1.3321, "step": 51250 }, { "epoch": 1.5112781954887218, "grad_norm": 2.7770379568055517, "learning_rate": 4.3526982022824125e-06, "loss": 1.2853, "step": 51255 }, { "epoch": 1.5114256228807312, "grad_norm": 2.8044471888495353, "learning_rate": 4.352063279078368e-06, "loss": 1.2964, "step": 51260 }, { "epoch": 1.5115730502727407, "grad_norm": 2.947671739890126, "learning_rate": 4.351428338157923e-06, "loss": 1.2694, "step": 51265 }, { "epoch": 1.5117204776647502, "grad_norm": 2.952313736473724, "learning_rate": 4.350793379539764e-06, "loss": 1.3179, "step": 51270 }, { "epoch": 1.5118679050567594, "grad_norm": 2.8458047691696073, "learning_rate": 4.3501584032425736e-06, "loss": 1.3003, "step": 51275 }, { "epoch": 1.5120153324487688, "grad_norm": 2.8075716487045717, "learning_rate": 4.349523409285036e-06, "loss": 1.2882, "step": 51280 }, { "epoch": 1.5121627598407783, "grad_norm": 2.8390421340581664, "learning_rate": 4.348888397685838e-06, "loss": 1.2836, "step": 51285 }, { "epoch": 1.5123101872327878, "grad_norm": 2.7271476294599086, "learning_rate": 4.348253368463665e-06, "loss": 1.2823, "step": 51290 }, { "epoch": 1.5124576146247972, "grad_norm": 2.9080841574934113, "learning_rate": 4.347618321637203e-06, "loss": 1.2943, "step": 51295 }, { "epoch": 1.5126050420168067, "grad_norm": 2.86943891679535, "learning_rate": 4.346983257225141e-06, "loss": 1.3201, "step": 51300 }, { "epoch": 1.5127524694088161, "grad_norm": 2.7601782001959325, "learning_rate": 4.346348175246164e-06, "loss": 1.2779, "step": 51305 }, { "epoch": 1.5128998968008256, "grad_norm": 2.8147681800430875, "learning_rate": 4.34571307571896e-06, "loss": 1.3177, "step": 51310 }, { "epoch": 1.513047324192835, "grad_norm": 2.7752409240099, "learning_rate": 4.34507795866222e-06, "loss": 1.2909, "step": 51315 }, { "epoch": 1.5131947515848445, "grad_norm": 2.8638751287581306, "learning_rate": 4.344442824094631e-06, "loss": 1.3611, "step": 51320 }, { "epoch": 1.513342178976854, "grad_norm": 2.999329049317606, "learning_rate": 4.343807672034883e-06, "loss": 1.3324, "step": 51325 }, { "epoch": 1.5134896063688634, "grad_norm": 3.0134241516454647, "learning_rate": 4.343172502501666e-06, "loss": 1.352, "step": 51330 }, { "epoch": 1.5136370337608729, "grad_norm": 2.8243363066108005, "learning_rate": 4.342537315513671e-06, "loss": 1.3051, "step": 51335 }, { "epoch": 1.5137844611528823, "grad_norm": 2.7592878667533722, "learning_rate": 4.3419021110895894e-06, "loss": 1.3232, "step": 51340 }, { "epoch": 1.5139318885448918, "grad_norm": 2.7779546954274092, "learning_rate": 4.341266889248112e-06, "loss": 1.276, "step": 51345 }, { "epoch": 1.514079315936901, "grad_norm": 2.778733698195498, "learning_rate": 4.34063165000793e-06, "loss": 1.3273, "step": 51350 }, { "epoch": 1.5142267433289105, "grad_norm": 2.74615378063904, "learning_rate": 4.3399963933877385e-06, "loss": 1.3156, "step": 51355 }, { "epoch": 1.51437417072092, "grad_norm": 2.9529996091279296, "learning_rate": 4.33936111940623e-06, "loss": 1.342, "step": 51360 }, { "epoch": 1.5145215981129294, "grad_norm": 2.7545271415095702, "learning_rate": 4.338725828082096e-06, "loss": 1.2875, "step": 51365 }, { "epoch": 1.5146690255049389, "grad_norm": 2.8788103194534997, "learning_rate": 4.338090519434033e-06, "loss": 1.2994, "step": 51370 }, { "epoch": 1.514816452896948, "grad_norm": 2.67319423599606, "learning_rate": 4.337455193480736e-06, "loss": 1.2888, "step": 51375 }, { "epoch": 1.5149638802889576, "grad_norm": 3.0421079273666365, "learning_rate": 4.336819850240897e-06, "loss": 1.2933, "step": 51380 }, { "epoch": 1.515111307680967, "grad_norm": 2.8972134697689285, "learning_rate": 4.336184489733216e-06, "loss": 1.3245, "step": 51385 }, { "epoch": 1.5152587350729765, "grad_norm": 2.809274906403468, "learning_rate": 4.335549111976385e-06, "loss": 1.2873, "step": 51390 }, { "epoch": 1.515406162464986, "grad_norm": 2.894996784261447, "learning_rate": 4.334913716989104e-06, "loss": 1.3131, "step": 51395 }, { "epoch": 1.5155535898569954, "grad_norm": 2.8446605380356167, "learning_rate": 4.334278304790069e-06, "loss": 1.2842, "step": 51400 }, { "epoch": 1.5157010172490049, "grad_norm": 2.936202278805412, "learning_rate": 4.333642875397978e-06, "loss": 1.2714, "step": 51405 }, { "epoch": 1.5158484446410143, "grad_norm": 2.825266113791012, "learning_rate": 4.333007428831527e-06, "loss": 1.3282, "step": 51410 }, { "epoch": 1.5159958720330238, "grad_norm": 2.808522389560824, "learning_rate": 4.332371965109419e-06, "loss": 1.259, "step": 51415 }, { "epoch": 1.5161432994250332, "grad_norm": 2.812759601223041, "learning_rate": 4.33173648425035e-06, "loss": 1.296, "step": 51420 }, { "epoch": 1.5162907268170427, "grad_norm": 2.8670381327134646, "learning_rate": 4.331100986273021e-06, "loss": 1.313, "step": 51425 }, { "epoch": 1.5164381542090521, "grad_norm": 2.7564863236610413, "learning_rate": 4.330465471196131e-06, "loss": 1.3198, "step": 51430 }, { "epoch": 1.5165855816010616, "grad_norm": 2.8916813188547437, "learning_rate": 4.329829939038383e-06, "loss": 1.2983, "step": 51435 }, { "epoch": 1.516733008993071, "grad_norm": 2.7572328772865395, "learning_rate": 4.329194389818476e-06, "loss": 1.2956, "step": 51440 }, { "epoch": 1.5168804363850803, "grad_norm": 2.7650963352976965, "learning_rate": 4.328558823555113e-06, "loss": 1.3027, "step": 51445 }, { "epoch": 1.5170278637770898, "grad_norm": 2.9298343956000332, "learning_rate": 4.327923240266997e-06, "loss": 1.3073, "step": 51450 }, { "epoch": 1.5171752911690992, "grad_norm": 2.8384278966451504, "learning_rate": 4.327287639972827e-06, "loss": 1.2952, "step": 51455 }, { "epoch": 1.5173227185611087, "grad_norm": 2.9720918407455974, "learning_rate": 4.326652022691311e-06, "loss": 1.3294, "step": 51460 }, { "epoch": 1.5174701459531181, "grad_norm": 2.901900860279952, "learning_rate": 4.326016388441151e-06, "loss": 1.325, "step": 51465 }, { "epoch": 1.5176175733451274, "grad_norm": 2.902429391031578, "learning_rate": 4.3253807372410495e-06, "loss": 1.3128, "step": 51470 }, { "epoch": 1.5177650007371368, "grad_norm": 2.9122303376208887, "learning_rate": 4.324745069109715e-06, "loss": 1.3036, "step": 51475 }, { "epoch": 1.5179124281291463, "grad_norm": 2.8562266309206317, "learning_rate": 4.324109384065848e-06, "loss": 1.2906, "step": 51480 }, { "epoch": 1.5180598555211557, "grad_norm": 2.8734493790258258, "learning_rate": 4.323473682128158e-06, "loss": 1.3039, "step": 51485 }, { "epoch": 1.5182072829131652, "grad_norm": 2.878443054262053, "learning_rate": 4.322837963315351e-06, "loss": 1.3457, "step": 51490 }, { "epoch": 1.5183547103051747, "grad_norm": 2.9862037374404657, "learning_rate": 4.322202227646132e-06, "loss": 1.2804, "step": 51495 }, { "epoch": 1.5185021376971841, "grad_norm": 2.8889018911975457, "learning_rate": 4.321566475139209e-06, "loss": 1.2748, "step": 51500 }, { "epoch": 1.5185021376971841, "eval_loss": 1.094597578048706, "eval_runtime": 4.2054, "eval_samples_per_second": 94.165, "eval_steps_per_second": 3.091, "step": 51500 }, { "epoch": 1.5186495650891936, "grad_norm": 2.9573698611940067, "learning_rate": 4.32093070581329e-06, "loss": 1.3028, "step": 51505 }, { "epoch": 1.518796992481203, "grad_norm": 2.8827652826031787, "learning_rate": 4.320294919687083e-06, "loss": 1.3263, "step": 51510 }, { "epoch": 1.5189444198732125, "grad_norm": 2.8228946865789584, "learning_rate": 4.319659116779296e-06, "loss": 1.2845, "step": 51515 }, { "epoch": 1.519091847265222, "grad_norm": 2.865124406721502, "learning_rate": 4.319023297108639e-06, "loss": 1.3149, "step": 51520 }, { "epoch": 1.5192392746572314, "grad_norm": 2.8819811597063585, "learning_rate": 4.318387460693823e-06, "loss": 1.3164, "step": 51525 }, { "epoch": 1.5193867020492409, "grad_norm": 2.858345924794266, "learning_rate": 4.317751607553557e-06, "loss": 1.3261, "step": 51530 }, { "epoch": 1.5195341294412503, "grad_norm": 2.9005521152105, "learning_rate": 4.317115737706551e-06, "loss": 1.2654, "step": 51535 }, { "epoch": 1.5196815568332598, "grad_norm": 2.986874850020821, "learning_rate": 4.316479851171517e-06, "loss": 1.3181, "step": 51540 }, { "epoch": 1.519828984225269, "grad_norm": 2.8748524103809703, "learning_rate": 4.315843947967166e-06, "loss": 1.3076, "step": 51545 }, { "epoch": 1.5199764116172785, "grad_norm": 2.798529437466816, "learning_rate": 4.315208028112211e-06, "loss": 1.3248, "step": 51550 }, { "epoch": 1.520123839009288, "grad_norm": 2.7919425835923066, "learning_rate": 4.314572091625365e-06, "loss": 1.2661, "step": 51555 }, { "epoch": 1.5202712664012974, "grad_norm": 2.7427331386737435, "learning_rate": 4.313936138525341e-06, "loss": 1.3218, "step": 51560 }, { "epoch": 1.5204186937933066, "grad_norm": 2.8818443668463942, "learning_rate": 4.31330016883085e-06, "loss": 1.282, "step": 51565 }, { "epoch": 1.520566121185316, "grad_norm": 2.9566979329692864, "learning_rate": 4.312664182560609e-06, "loss": 1.321, "step": 51570 }, { "epoch": 1.5207135485773255, "grad_norm": 2.911245278496498, "learning_rate": 4.3120281797333325e-06, "loss": 1.333, "step": 51575 }, { "epoch": 1.520860975969335, "grad_norm": 2.891939645318826, "learning_rate": 4.311392160367736e-06, "loss": 1.2827, "step": 51580 }, { "epoch": 1.5210084033613445, "grad_norm": 2.899279110415233, "learning_rate": 4.310756124482533e-06, "loss": 1.3422, "step": 51585 }, { "epoch": 1.521155830753354, "grad_norm": 2.8386775696961744, "learning_rate": 4.31012007209644e-06, "loss": 1.2349, "step": 51590 }, { "epoch": 1.5213032581453634, "grad_norm": 2.798356734640046, "learning_rate": 4.309484003228174e-06, "loss": 1.3028, "step": 51595 }, { "epoch": 1.5214506855373728, "grad_norm": 2.7168665193098507, "learning_rate": 4.308847917896455e-06, "loss": 1.2991, "step": 51600 }, { "epoch": 1.5215981129293823, "grad_norm": 2.8798397328810954, "learning_rate": 4.308211816119996e-06, "loss": 1.3075, "step": 51605 }, { "epoch": 1.5217455403213918, "grad_norm": 2.7206700467510863, "learning_rate": 4.307575697917517e-06, "loss": 1.2567, "step": 51610 }, { "epoch": 1.5218929677134012, "grad_norm": 2.7995011168294313, "learning_rate": 4.306939563307736e-06, "loss": 1.3132, "step": 51615 }, { "epoch": 1.5220403951054107, "grad_norm": 2.927682505986488, "learning_rate": 4.306303412309374e-06, "loss": 1.3277, "step": 51620 }, { "epoch": 1.5221878224974201, "grad_norm": 2.9184467882373224, "learning_rate": 4.305667244941147e-06, "loss": 1.2916, "step": 51625 }, { "epoch": 1.5223352498894296, "grad_norm": 2.739915706696299, "learning_rate": 4.305031061221779e-06, "loss": 1.3101, "step": 51630 }, { "epoch": 1.522482677281439, "grad_norm": 2.8347072225491323, "learning_rate": 4.3043948611699865e-06, "loss": 1.3339, "step": 51635 }, { "epoch": 1.5226301046734483, "grad_norm": 2.785946285424503, "learning_rate": 4.303758644804493e-06, "loss": 1.2927, "step": 51640 }, { "epoch": 1.5227775320654577, "grad_norm": 2.9118310480038003, "learning_rate": 4.303122412144018e-06, "loss": 1.3142, "step": 51645 }, { "epoch": 1.5229249594574672, "grad_norm": 2.880430956676546, "learning_rate": 4.302486163207285e-06, "loss": 1.28, "step": 51650 }, { "epoch": 1.5230723868494767, "grad_norm": 2.842292824965276, "learning_rate": 4.301849898013015e-06, "loss": 1.296, "step": 51655 }, { "epoch": 1.5232198142414861, "grad_norm": 2.8365816709202885, "learning_rate": 4.301213616579934e-06, "loss": 1.2688, "step": 51660 }, { "epoch": 1.5233672416334954, "grad_norm": 2.851147206458781, "learning_rate": 4.300577318926761e-06, "loss": 1.3345, "step": 51665 }, { "epoch": 1.5235146690255048, "grad_norm": 2.8749366656149973, "learning_rate": 4.299941005072222e-06, "loss": 1.333, "step": 51670 }, { "epoch": 1.5236620964175143, "grad_norm": 2.807997044722539, "learning_rate": 4.299304675035041e-06, "loss": 1.2922, "step": 51675 }, { "epoch": 1.5238095238095237, "grad_norm": 2.785803011776617, "learning_rate": 4.298668328833942e-06, "loss": 1.341, "step": 51680 }, { "epoch": 1.5239569512015332, "grad_norm": 2.955098784094108, "learning_rate": 4.298031966487652e-06, "loss": 1.3132, "step": 51685 }, { "epoch": 1.5241043785935426, "grad_norm": 2.8190509467828426, "learning_rate": 4.297395588014896e-06, "loss": 1.3369, "step": 51690 }, { "epoch": 1.524251805985552, "grad_norm": 2.8824158390417582, "learning_rate": 4.296759193434399e-06, "loss": 1.2936, "step": 51695 }, { "epoch": 1.5243992333775616, "grad_norm": 2.8909073402424244, "learning_rate": 4.296122782764888e-06, "loss": 1.3297, "step": 51700 }, { "epoch": 1.524546660769571, "grad_norm": 2.8302259795738074, "learning_rate": 4.29548635602509e-06, "loss": 1.3313, "step": 51705 }, { "epoch": 1.5246940881615805, "grad_norm": 2.800578541847684, "learning_rate": 4.2948499132337346e-06, "loss": 1.304, "step": 51710 }, { "epoch": 1.52484151555359, "grad_norm": 2.8413942964547054, "learning_rate": 4.294213454409548e-06, "loss": 1.2939, "step": 51715 }, { "epoch": 1.5249889429455994, "grad_norm": 2.9725791053123536, "learning_rate": 4.293576979571259e-06, "loss": 1.2829, "step": 51720 }, { "epoch": 1.5251363703376088, "grad_norm": 2.8277427392689627, "learning_rate": 4.292940488737596e-06, "loss": 1.3023, "step": 51725 }, { "epoch": 1.5252837977296183, "grad_norm": 2.9742783939574293, "learning_rate": 4.29230398192729e-06, "loss": 1.2833, "step": 51730 }, { "epoch": 1.5254312251216275, "grad_norm": 2.8919568096688435, "learning_rate": 4.29166745915907e-06, "loss": 1.3417, "step": 51735 }, { "epoch": 1.525578652513637, "grad_norm": 2.9639649449446, "learning_rate": 4.2910309204516655e-06, "loss": 1.3303, "step": 51740 }, { "epoch": 1.5257260799056465, "grad_norm": 2.8707180842273843, "learning_rate": 4.290394365823808e-06, "loss": 1.3072, "step": 51745 }, { "epoch": 1.525873507297656, "grad_norm": 2.8607845811982444, "learning_rate": 4.289757795294232e-06, "loss": 1.3232, "step": 51750 }, { "epoch": 1.5260209346896654, "grad_norm": 2.79142625020932, "learning_rate": 4.289121208881664e-06, "loss": 1.3427, "step": 51755 }, { "epoch": 1.5261683620816746, "grad_norm": 2.751265234887755, "learning_rate": 4.28848460660484e-06, "loss": 1.2921, "step": 51760 }, { "epoch": 1.526315789473684, "grad_norm": 2.8038632713342886, "learning_rate": 4.287847988482492e-06, "loss": 1.295, "step": 51765 }, { "epoch": 1.5264632168656935, "grad_norm": 2.8084398077128183, "learning_rate": 4.287211354533352e-06, "loss": 1.3351, "step": 51770 }, { "epoch": 1.526610644257703, "grad_norm": 3.002221939451476, "learning_rate": 4.2865747047761535e-06, "loss": 1.3671, "step": 51775 }, { "epoch": 1.5267580716497124, "grad_norm": 2.913688437835395, "learning_rate": 4.285938039229633e-06, "loss": 1.2855, "step": 51780 }, { "epoch": 1.526905499041722, "grad_norm": 2.8624400995262858, "learning_rate": 4.285301357912522e-06, "loss": 1.3114, "step": 51785 }, { "epoch": 1.5270529264337314, "grad_norm": 2.979898079749889, "learning_rate": 4.28466466084356e-06, "loss": 1.3224, "step": 51790 }, { "epoch": 1.5272003538257408, "grad_norm": 2.839562582079185, "learning_rate": 4.2840279480414785e-06, "loss": 1.3046, "step": 51795 }, { "epoch": 1.5273477812177503, "grad_norm": 2.8276272309957795, "learning_rate": 4.283391219525014e-06, "loss": 1.2216, "step": 51800 }, { "epoch": 1.5274952086097597, "grad_norm": 2.8067614430777055, "learning_rate": 4.282754475312904e-06, "loss": 1.3285, "step": 51805 }, { "epoch": 1.5276426360017692, "grad_norm": 2.8367242236619985, "learning_rate": 4.282117715423886e-06, "loss": 1.264, "step": 51810 }, { "epoch": 1.5277900633937787, "grad_norm": 2.9044866686410904, "learning_rate": 4.281480939876697e-06, "loss": 1.3242, "step": 51815 }, { "epoch": 1.527937490785788, "grad_norm": 2.7402606158439187, "learning_rate": 4.280844148690074e-06, "loss": 1.278, "step": 51820 }, { "epoch": 1.5280849181777976, "grad_norm": 2.753598864887938, "learning_rate": 4.280207341882755e-06, "loss": 1.3475, "step": 51825 }, { "epoch": 1.528232345569807, "grad_norm": 2.7991233552325863, "learning_rate": 4.27957051947348e-06, "loss": 1.3478, "step": 51830 }, { "epoch": 1.5283797729618163, "grad_norm": 2.848402979681548, "learning_rate": 4.278933681480988e-06, "loss": 1.2498, "step": 51835 }, { "epoch": 1.5285272003538257, "grad_norm": 2.8122096271481656, "learning_rate": 4.278296827924019e-06, "loss": 1.2793, "step": 51840 }, { "epoch": 1.5286746277458352, "grad_norm": 2.8251811033716003, "learning_rate": 4.2776599588213115e-06, "loss": 1.3385, "step": 51845 }, { "epoch": 1.5288220551378446, "grad_norm": 2.860568561069018, "learning_rate": 4.277023074191608e-06, "loss": 1.3283, "step": 51850 }, { "epoch": 1.528969482529854, "grad_norm": 2.8043977565092204, "learning_rate": 4.276386174053648e-06, "loss": 1.2942, "step": 51855 }, { "epoch": 1.5291169099218633, "grad_norm": 2.832886534717278, "learning_rate": 4.275749258426174e-06, "loss": 1.2776, "step": 51860 }, { "epoch": 1.5292643373138728, "grad_norm": 3.040095325616569, "learning_rate": 4.275112327327928e-06, "loss": 1.3408, "step": 51865 }, { "epoch": 1.5294117647058822, "grad_norm": 2.7200242471190252, "learning_rate": 4.274475380777651e-06, "loss": 1.3092, "step": 51870 }, { "epoch": 1.5295591920978917, "grad_norm": 2.7611044169554297, "learning_rate": 4.273838418794089e-06, "loss": 1.2643, "step": 51875 }, { "epoch": 1.5297066194899012, "grad_norm": 2.8521396981443057, "learning_rate": 4.273201441395983e-06, "loss": 1.2966, "step": 51880 }, { "epoch": 1.5298540468819106, "grad_norm": 2.754891569909281, "learning_rate": 4.272564448602076e-06, "loss": 1.3079, "step": 51885 }, { "epoch": 1.53000147427392, "grad_norm": 2.8881202627342377, "learning_rate": 4.271927440431114e-06, "loss": 1.2965, "step": 51890 }, { "epoch": 1.5301489016659295, "grad_norm": 2.9130007310023616, "learning_rate": 4.271290416901841e-06, "loss": 1.3087, "step": 51895 }, { "epoch": 1.530296329057939, "grad_norm": 2.678150697424734, "learning_rate": 4.270653378033002e-06, "loss": 1.2779, "step": 51900 }, { "epoch": 1.5304437564499485, "grad_norm": 2.777252790833313, "learning_rate": 4.270016323843343e-06, "loss": 1.2908, "step": 51905 }, { "epoch": 1.530591183841958, "grad_norm": 2.892506805684014, "learning_rate": 4.26937925435161e-06, "loss": 1.3088, "step": 51910 }, { "epoch": 1.5307386112339674, "grad_norm": 2.8346875222159866, "learning_rate": 4.268742169576549e-06, "loss": 1.3305, "step": 51915 }, { "epoch": 1.5308860386259768, "grad_norm": 2.805069769845823, "learning_rate": 4.268105069536908e-06, "loss": 1.297, "step": 51920 }, { "epoch": 1.5310334660179863, "grad_norm": 2.7736615992074682, "learning_rate": 4.267467954251433e-06, "loss": 1.279, "step": 51925 }, { "epoch": 1.5311808934099955, "grad_norm": 2.9603004993901827, "learning_rate": 4.266830823738873e-06, "loss": 1.2775, "step": 51930 }, { "epoch": 1.531328320802005, "grad_norm": 2.8193228298124553, "learning_rate": 4.266193678017976e-06, "loss": 1.3102, "step": 51935 }, { "epoch": 1.5314757481940144, "grad_norm": 3.041107670772098, "learning_rate": 4.26555651710749e-06, "loss": 1.3257, "step": 51940 }, { "epoch": 1.531623175586024, "grad_norm": 3.015191514745197, "learning_rate": 4.264919341026166e-06, "loss": 1.3597, "step": 51945 }, { "epoch": 1.5317706029780334, "grad_norm": 2.8521939959046514, "learning_rate": 4.264282149792751e-06, "loss": 1.3123, "step": 51950 }, { "epoch": 1.5319180303700426, "grad_norm": 2.83925884247405, "learning_rate": 4.263644943425998e-06, "loss": 1.3553, "step": 51955 }, { "epoch": 1.532065457762052, "grad_norm": 2.7073101934852444, "learning_rate": 4.2630077219446535e-06, "loss": 1.2801, "step": 51960 }, { "epoch": 1.5322128851540615, "grad_norm": 2.7433222381456694, "learning_rate": 4.262370485367473e-06, "loss": 1.31, "step": 51965 }, { "epoch": 1.532360312546071, "grad_norm": 2.9013109355310376, "learning_rate": 4.261733233713207e-06, "loss": 1.3988, "step": 51970 }, { "epoch": 1.5325077399380804, "grad_norm": 2.925519143684551, "learning_rate": 4.261095967000603e-06, "loss": 1.2999, "step": 51975 }, { "epoch": 1.5326551673300899, "grad_norm": 2.8949588445880767, "learning_rate": 4.260458685248419e-06, "loss": 1.2994, "step": 51980 }, { "epoch": 1.5328025947220993, "grad_norm": 2.8310059370052203, "learning_rate": 4.259821388475404e-06, "loss": 1.3172, "step": 51985 }, { "epoch": 1.5329500221141088, "grad_norm": 2.8509619765750136, "learning_rate": 4.259184076700313e-06, "loss": 1.294, "step": 51990 }, { "epoch": 1.5330974495061183, "grad_norm": 2.6924370080798177, "learning_rate": 4.2585467499419e-06, "loss": 1.2835, "step": 51995 }, { "epoch": 1.5332448768981277, "grad_norm": 2.8367597748787214, "learning_rate": 4.257909408218917e-06, "loss": 1.369, "step": 52000 }, { "epoch": 1.5332448768981277, "eval_loss": 1.093622088432312, "eval_runtime": 4.2443, "eval_samples_per_second": 93.302, "eval_steps_per_second": 3.063, "step": 52000 }, { "epoch": 1.5333923042901372, "grad_norm": 2.882324497167821, "learning_rate": 4.25727205155012e-06, "loss": 1.2772, "step": 52005 }, { "epoch": 1.5335397316821466, "grad_norm": 2.889422826332595, "learning_rate": 4.256634679954264e-06, "loss": 1.3327, "step": 52010 }, { "epoch": 1.533687159074156, "grad_norm": 2.806385624342711, "learning_rate": 4.255997293450104e-06, "loss": 1.3441, "step": 52015 }, { "epoch": 1.5338345864661656, "grad_norm": 2.950600089462329, "learning_rate": 4.255359892056395e-06, "loss": 1.3403, "step": 52020 }, { "epoch": 1.533982013858175, "grad_norm": 2.6882583267326687, "learning_rate": 4.254722475791895e-06, "loss": 1.3204, "step": 52025 }, { "epoch": 1.5341294412501842, "grad_norm": 2.87304293858847, "learning_rate": 4.25408504467536e-06, "loss": 1.2998, "step": 52030 }, { "epoch": 1.5342768686421937, "grad_norm": 2.9239869031288217, "learning_rate": 4.2534475987255454e-06, "loss": 1.283, "step": 52035 }, { "epoch": 1.5344242960342032, "grad_norm": 2.8891590547478376, "learning_rate": 4.252810137961211e-06, "loss": 1.3347, "step": 52040 }, { "epoch": 1.5345717234262126, "grad_norm": 3.0006597528148697, "learning_rate": 4.252172662401115e-06, "loss": 1.291, "step": 52045 }, { "epoch": 1.534719150818222, "grad_norm": 2.7893800607079777, "learning_rate": 4.251535172064015e-06, "loss": 1.3125, "step": 52050 }, { "epoch": 1.5348665782102313, "grad_norm": 2.8288127506996843, "learning_rate": 4.2508976669686695e-06, "loss": 1.3696, "step": 52055 }, { "epoch": 1.5350140056022408, "grad_norm": 2.8675947083040394, "learning_rate": 4.250260147133838e-06, "loss": 1.3133, "step": 52060 }, { "epoch": 1.5351614329942502, "grad_norm": 2.8844250653231533, "learning_rate": 4.24962261257828e-06, "loss": 1.3204, "step": 52065 }, { "epoch": 1.5353088603862597, "grad_norm": 2.7964358919303534, "learning_rate": 4.248985063320757e-06, "loss": 1.2958, "step": 52070 }, { "epoch": 1.5354562877782691, "grad_norm": 2.841432846668572, "learning_rate": 4.248347499380028e-06, "loss": 1.3223, "step": 52075 }, { "epoch": 1.5356037151702786, "grad_norm": 2.7990172675435225, "learning_rate": 4.247709920774857e-06, "loss": 1.287, "step": 52080 }, { "epoch": 1.535751142562288, "grad_norm": 2.907846087278307, "learning_rate": 4.2470723275240005e-06, "loss": 1.2958, "step": 52085 }, { "epoch": 1.5358985699542975, "grad_norm": 3.0099504970117055, "learning_rate": 4.246434719646223e-06, "loss": 1.3116, "step": 52090 }, { "epoch": 1.536045997346307, "grad_norm": 2.8792612600462637, "learning_rate": 4.245797097160289e-06, "loss": 1.2421, "step": 52095 }, { "epoch": 1.5361934247383164, "grad_norm": 2.8153115184852604, "learning_rate": 4.245159460084959e-06, "loss": 1.3379, "step": 52100 }, { "epoch": 1.536340852130326, "grad_norm": 2.8061211153880046, "learning_rate": 4.244521808438995e-06, "loss": 1.2651, "step": 52105 }, { "epoch": 1.5364882795223354, "grad_norm": 2.8437859702436405, "learning_rate": 4.243884142241163e-06, "loss": 1.3052, "step": 52110 }, { "epoch": 1.5366357069143448, "grad_norm": 2.8843616729182835, "learning_rate": 4.243246461510226e-06, "loss": 1.285, "step": 52115 }, { "epoch": 1.5367831343063543, "grad_norm": 2.9501472389871704, "learning_rate": 4.242608766264948e-06, "loss": 1.3689, "step": 52120 }, { "epoch": 1.5369305616983635, "grad_norm": 3.0890392288492348, "learning_rate": 4.241971056524096e-06, "loss": 1.3391, "step": 52125 }, { "epoch": 1.537077989090373, "grad_norm": 2.838190687034767, "learning_rate": 4.2413333323064325e-06, "loss": 1.2814, "step": 52130 }, { "epoch": 1.5372254164823824, "grad_norm": 3.0431574891754463, "learning_rate": 4.240695593630726e-06, "loss": 1.3759, "step": 52135 }, { "epoch": 1.5373728438743919, "grad_norm": 2.691740466232927, "learning_rate": 4.24005784051574e-06, "loss": 1.269, "step": 52140 }, { "epoch": 1.5375202712664013, "grad_norm": 2.883753231744376, "learning_rate": 4.239420072980244e-06, "loss": 1.3018, "step": 52145 }, { "epoch": 1.5376676986584106, "grad_norm": 2.911206421122311, "learning_rate": 4.238782291043002e-06, "loss": 1.2666, "step": 52150 }, { "epoch": 1.53781512605042, "grad_norm": 2.7094836035460332, "learning_rate": 4.238144494722784e-06, "loss": 1.3276, "step": 52155 }, { "epoch": 1.5379625534424295, "grad_norm": 2.791295614354464, "learning_rate": 4.237506684038357e-06, "loss": 1.3112, "step": 52160 }, { "epoch": 1.538109980834439, "grad_norm": 2.8315639516059816, "learning_rate": 4.236868859008488e-06, "loss": 1.3062, "step": 52165 }, { "epoch": 1.5382574082264484, "grad_norm": 2.8540216359490675, "learning_rate": 4.236231019651946e-06, "loss": 1.3494, "step": 52170 }, { "epoch": 1.5384048356184579, "grad_norm": 2.8071845131443363, "learning_rate": 4.235593165987503e-06, "loss": 1.329, "step": 52175 }, { "epoch": 1.5385522630104673, "grad_norm": 2.4865204250837505, "learning_rate": 4.2349552980339274e-06, "loss": 1.2696, "step": 52180 }, { "epoch": 1.5386996904024768, "grad_norm": 2.85737639530173, "learning_rate": 4.234317415809986e-06, "loss": 1.2905, "step": 52185 }, { "epoch": 1.5388471177944862, "grad_norm": 2.8481690258708015, "learning_rate": 4.233679519334453e-06, "loss": 1.3095, "step": 52190 }, { "epoch": 1.5389945451864957, "grad_norm": 3.0130834303344742, "learning_rate": 4.233041608626097e-06, "loss": 1.3416, "step": 52195 }, { "epoch": 1.5391419725785052, "grad_norm": 2.7810759419158333, "learning_rate": 4.23240368370369e-06, "loss": 1.3203, "step": 52200 }, { "epoch": 1.5392893999705146, "grad_norm": 2.8295149592474425, "learning_rate": 4.2317657445860055e-06, "loss": 1.3154, "step": 52205 }, { "epoch": 1.539436827362524, "grad_norm": 2.947618783027184, "learning_rate": 4.231127791291813e-06, "loss": 1.3363, "step": 52210 }, { "epoch": 1.5395842547545335, "grad_norm": 2.8819150987756523, "learning_rate": 4.230489823839886e-06, "loss": 1.3022, "step": 52215 }, { "epoch": 1.539731682146543, "grad_norm": 2.9553489049861144, "learning_rate": 4.229851842248996e-06, "loss": 1.3753, "step": 52220 }, { "epoch": 1.5398791095385522, "grad_norm": 2.887840355697921, "learning_rate": 4.229213846537918e-06, "loss": 1.3337, "step": 52225 }, { "epoch": 1.5400265369305617, "grad_norm": 3.015107703877997, "learning_rate": 4.228575836725427e-06, "loss": 1.2821, "step": 52230 }, { "epoch": 1.5401739643225711, "grad_norm": 2.8698162034052994, "learning_rate": 4.227937812830294e-06, "loss": 1.3307, "step": 52235 }, { "epoch": 1.5403213917145806, "grad_norm": 2.834809859446686, "learning_rate": 4.227299774871295e-06, "loss": 1.27, "step": 52240 }, { "epoch": 1.54046881910659, "grad_norm": 2.8827902406644106, "learning_rate": 4.2266617228672054e-06, "loss": 1.3051, "step": 52245 }, { "epoch": 1.5406162464985993, "grad_norm": 2.849424849223697, "learning_rate": 4.2260236568368e-06, "loss": 1.3078, "step": 52250 }, { "epoch": 1.5407636738906088, "grad_norm": 2.856920090065417, "learning_rate": 4.225385576798856e-06, "loss": 1.3334, "step": 52255 }, { "epoch": 1.5409111012826182, "grad_norm": 2.8843088493166458, "learning_rate": 4.224747482772148e-06, "loss": 1.288, "step": 52260 }, { "epoch": 1.5410585286746277, "grad_norm": 2.890562964838915, "learning_rate": 4.224109374775453e-06, "loss": 1.3234, "step": 52265 }, { "epoch": 1.5412059560666371, "grad_norm": 2.8080532044290227, "learning_rate": 4.2234712528275484e-06, "loss": 1.2691, "step": 52270 }, { "epoch": 1.5413533834586466, "grad_norm": 2.8154435239712448, "learning_rate": 4.222833116947212e-06, "loss": 1.2397, "step": 52275 }, { "epoch": 1.541500810850656, "grad_norm": 2.8643051727108526, "learning_rate": 4.22219496715322e-06, "loss": 1.3063, "step": 52280 }, { "epoch": 1.5416482382426655, "grad_norm": 2.9814612338333677, "learning_rate": 4.221556803464354e-06, "loss": 1.3392, "step": 52285 }, { "epoch": 1.541795665634675, "grad_norm": 2.9864697807748066, "learning_rate": 4.220918625899389e-06, "loss": 1.3069, "step": 52290 }, { "epoch": 1.5419430930266844, "grad_norm": 2.7699755357769247, "learning_rate": 4.220280434477105e-06, "loss": 1.3272, "step": 52295 }, { "epoch": 1.5420905204186939, "grad_norm": 2.7426194009458693, "learning_rate": 4.219642229216284e-06, "loss": 1.3512, "step": 52300 }, { "epoch": 1.5422379478107033, "grad_norm": 2.7417892973152362, "learning_rate": 4.219004010135703e-06, "loss": 1.3065, "step": 52305 }, { "epoch": 1.5423853752027128, "grad_norm": 2.9246167083481143, "learning_rate": 4.218365777254144e-06, "loss": 1.3076, "step": 52310 }, { "epoch": 1.5425328025947223, "grad_norm": 2.8217168860131836, "learning_rate": 4.217727530590386e-06, "loss": 1.2642, "step": 52315 }, { "epoch": 1.5426802299867315, "grad_norm": 3.1041792500446452, "learning_rate": 4.217089270163212e-06, "loss": 1.3089, "step": 52320 }, { "epoch": 1.542827657378741, "grad_norm": 2.862778082316183, "learning_rate": 4.216450995991403e-06, "loss": 1.3171, "step": 52325 }, { "epoch": 1.5429750847707504, "grad_norm": 2.836447184033839, "learning_rate": 4.2158127080937414e-06, "loss": 1.3508, "step": 52330 }, { "epoch": 1.5431225121627599, "grad_norm": 2.945794169583152, "learning_rate": 4.2151744064890095e-06, "loss": 1.3592, "step": 52335 }, { "epoch": 1.5432699395547693, "grad_norm": 2.809796891716302, "learning_rate": 4.214536091195989e-06, "loss": 1.301, "step": 52340 }, { "epoch": 1.5434173669467786, "grad_norm": 2.849860861942464, "learning_rate": 4.213897762233463e-06, "loss": 1.2612, "step": 52345 }, { "epoch": 1.543564794338788, "grad_norm": 2.7952976316193676, "learning_rate": 4.2132594196202156e-06, "loss": 1.3358, "step": 52350 }, { "epoch": 1.5437122217307975, "grad_norm": 2.8583097188134046, "learning_rate": 4.212621063375031e-06, "loss": 1.3047, "step": 52355 }, { "epoch": 1.543859649122807, "grad_norm": 2.7821027404243663, "learning_rate": 4.211982693516694e-06, "loss": 1.2931, "step": 52360 }, { "epoch": 1.5440070765148164, "grad_norm": 2.6912574104324833, "learning_rate": 4.211344310063986e-06, "loss": 1.2712, "step": 52365 }, { "epoch": 1.5441545039068258, "grad_norm": 2.787187125788788, "learning_rate": 4.210705913035697e-06, "loss": 1.242, "step": 52370 }, { "epoch": 1.5443019312988353, "grad_norm": 3.235797832858873, "learning_rate": 4.210067502450611e-06, "loss": 1.3048, "step": 52375 }, { "epoch": 1.5444493586908448, "grad_norm": 2.78929485551326, "learning_rate": 4.209429078327512e-06, "loss": 1.3117, "step": 52380 }, { "epoch": 1.5445967860828542, "grad_norm": 2.836197311165769, "learning_rate": 4.208790640685189e-06, "loss": 1.288, "step": 52385 }, { "epoch": 1.5447442134748637, "grad_norm": 2.786925045689197, "learning_rate": 4.208152189542427e-06, "loss": 1.2931, "step": 52390 }, { "epoch": 1.5448916408668731, "grad_norm": 3.1053879676047913, "learning_rate": 4.207513724918014e-06, "loss": 1.2712, "step": 52395 }, { "epoch": 1.5450390682588826, "grad_norm": 2.879671450755165, "learning_rate": 4.2068752468307366e-06, "loss": 1.2779, "step": 52400 }, { "epoch": 1.545186495650892, "grad_norm": 2.7605302644503373, "learning_rate": 4.206236755299385e-06, "loss": 1.3104, "step": 52405 }, { "epoch": 1.5453339230429015, "grad_norm": 2.956441006168676, "learning_rate": 4.205598250342743e-06, "loss": 1.3462, "step": 52410 }, { "epoch": 1.545481350434911, "grad_norm": 2.92288097300919, "learning_rate": 4.204959731979605e-06, "loss": 1.315, "step": 52415 }, { "epoch": 1.5456287778269202, "grad_norm": 2.9132733721515724, "learning_rate": 4.204321200228755e-06, "loss": 1.3077, "step": 52420 }, { "epoch": 1.5457762052189297, "grad_norm": 2.813587397344763, "learning_rate": 4.203682655108986e-06, "loss": 1.2707, "step": 52425 }, { "epoch": 1.5459236326109391, "grad_norm": 2.8781234898961277, "learning_rate": 4.203044096639087e-06, "loss": 1.2707, "step": 52430 }, { "epoch": 1.5460710600029486, "grad_norm": 2.6942716755759477, "learning_rate": 4.2024055248378485e-06, "loss": 1.3324, "step": 52435 }, { "epoch": 1.5462184873949578, "grad_norm": 2.888413945549641, "learning_rate": 4.2017669397240605e-06, "loss": 1.3545, "step": 52440 }, { "epoch": 1.5463659147869673, "grad_norm": 2.86406989635791, "learning_rate": 4.201128341316514e-06, "loss": 1.3374, "step": 52445 }, { "epoch": 1.5465133421789767, "grad_norm": 2.8481120574276844, "learning_rate": 4.200489729634001e-06, "loss": 1.2991, "step": 52450 }, { "epoch": 1.5466607695709862, "grad_norm": 2.779566148569961, "learning_rate": 4.199851104695313e-06, "loss": 1.3288, "step": 52455 }, { "epoch": 1.5468081969629957, "grad_norm": 2.997450077396141, "learning_rate": 4.199212466519243e-06, "loss": 1.3191, "step": 52460 }, { "epoch": 1.546955624355005, "grad_norm": 2.7705823250118793, "learning_rate": 4.1985738151245845e-06, "loss": 1.288, "step": 52465 }, { "epoch": 1.5471030517470146, "grad_norm": 2.865055157997572, "learning_rate": 4.197935150530128e-06, "loss": 1.3472, "step": 52470 }, { "epoch": 1.547250479139024, "grad_norm": 2.9280320751741833, "learning_rate": 4.197296472754669e-06, "loss": 1.2893, "step": 52475 }, { "epoch": 1.5473979065310335, "grad_norm": 2.7516530616529544, "learning_rate": 4.196657781817e-06, "loss": 1.3339, "step": 52480 }, { "epoch": 1.547545333923043, "grad_norm": 2.8568184460629182, "learning_rate": 4.196019077735915e-06, "loss": 1.3506, "step": 52485 }, { "epoch": 1.5476927613150524, "grad_norm": 2.809633391718088, "learning_rate": 4.19538036053021e-06, "loss": 1.3183, "step": 52490 }, { "epoch": 1.5478401887070619, "grad_norm": 2.844536156767987, "learning_rate": 4.194741630218679e-06, "loss": 1.2646, "step": 52495 }, { "epoch": 1.5479876160990713, "grad_norm": 2.9103005809607407, "learning_rate": 4.194102886820118e-06, "loss": 1.3058, "step": 52500 }, { "epoch": 1.5479876160990713, "eval_loss": 1.092174768447876, "eval_runtime": 4.2835, "eval_samples_per_second": 92.447, "eval_steps_per_second": 3.035, "step": 52500 }, { "epoch": 1.5481350434910808, "grad_norm": 2.7862896023073036, "learning_rate": 4.193464130353322e-06, "loss": 1.2778, "step": 52505 }, { "epoch": 1.5482824708830902, "grad_norm": 3.0492913345521404, "learning_rate": 4.1928253608370875e-06, "loss": 1.3081, "step": 52510 }, { "epoch": 1.5484298982750995, "grad_norm": 2.7616962836055032, "learning_rate": 4.192186578290213e-06, "loss": 1.3182, "step": 52515 }, { "epoch": 1.548577325667109, "grad_norm": 2.7825210816393176, "learning_rate": 4.191547782731491e-06, "loss": 1.326, "step": 52520 }, { "epoch": 1.5487247530591184, "grad_norm": 2.8710613051190137, "learning_rate": 4.1909089741797215e-06, "loss": 1.3045, "step": 52525 }, { "epoch": 1.5488721804511278, "grad_norm": 2.9675912836955103, "learning_rate": 4.190270152653703e-06, "loss": 1.2992, "step": 52530 }, { "epoch": 1.5490196078431373, "grad_norm": 2.7357957734441416, "learning_rate": 4.189631318172233e-06, "loss": 1.3472, "step": 52535 }, { "epoch": 1.5491670352351465, "grad_norm": 2.828165368506491, "learning_rate": 4.188992470754109e-06, "loss": 1.3547, "step": 52540 }, { "epoch": 1.549314462627156, "grad_norm": 2.9625144565477224, "learning_rate": 4.18835361041813e-06, "loss": 1.3949, "step": 52545 }, { "epoch": 1.5494618900191655, "grad_norm": 2.890072301610993, "learning_rate": 4.1877147371830965e-06, "loss": 1.302, "step": 52550 }, { "epoch": 1.549609317411175, "grad_norm": 2.87578338135787, "learning_rate": 4.187075851067806e-06, "loss": 1.2995, "step": 52555 }, { "epoch": 1.5497567448031844, "grad_norm": 2.8741625189536664, "learning_rate": 4.186436952091058e-06, "loss": 1.3451, "step": 52560 }, { "epoch": 1.5499041721951938, "grad_norm": 2.773013891147131, "learning_rate": 4.1857980402716565e-06, "loss": 1.2947, "step": 52565 }, { "epoch": 1.5500515995872033, "grad_norm": 3.0054342594766705, "learning_rate": 4.1851591156284e-06, "loss": 1.3314, "step": 52570 }, { "epoch": 1.5501990269792127, "grad_norm": 2.747749243798579, "learning_rate": 4.184520178180089e-06, "loss": 1.2377, "step": 52575 }, { "epoch": 1.5503464543712222, "grad_norm": 2.8251945181891194, "learning_rate": 4.183881227945525e-06, "loss": 1.3231, "step": 52580 }, { "epoch": 1.5504938817632317, "grad_norm": 2.9071129048202926, "learning_rate": 4.183242264943511e-06, "loss": 1.3105, "step": 52585 }, { "epoch": 1.5506413091552411, "grad_norm": 2.767772137481517, "learning_rate": 4.182603289192849e-06, "loss": 1.3132, "step": 52590 }, { "epoch": 1.5507887365472506, "grad_norm": 2.826366779728356, "learning_rate": 4.181964300712342e-06, "loss": 1.341, "step": 52595 }, { "epoch": 1.55093616393926, "grad_norm": 3.5374723437831084, "learning_rate": 4.181325299520791e-06, "loss": 1.3119, "step": 52600 }, { "epoch": 1.5510835913312695, "grad_norm": 2.7994919061776677, "learning_rate": 4.180686285637001e-06, "loss": 1.3026, "step": 52605 }, { "epoch": 1.5512310187232787, "grad_norm": 2.9028187640054894, "learning_rate": 4.180047259079774e-06, "loss": 1.364, "step": 52610 }, { "epoch": 1.5513784461152882, "grad_norm": 2.8249797766379614, "learning_rate": 4.179408219867916e-06, "loss": 1.3195, "step": 52615 }, { "epoch": 1.5515258735072976, "grad_norm": 2.8476947755597775, "learning_rate": 4.178769168020231e-06, "loss": 1.31, "step": 52620 }, { "epoch": 1.551673300899307, "grad_norm": 2.7633319239871525, "learning_rate": 4.178130103555524e-06, "loss": 1.2958, "step": 52625 }, { "epoch": 1.5518207282913166, "grad_norm": 2.781507998177043, "learning_rate": 4.177491026492599e-06, "loss": 1.2891, "step": 52630 }, { "epoch": 1.5519681556833258, "grad_norm": 2.9108390189042925, "learning_rate": 4.1768519368502634e-06, "loss": 1.3205, "step": 52635 }, { "epoch": 1.5521155830753353, "grad_norm": 2.85657218093188, "learning_rate": 4.176212834647321e-06, "loss": 1.3108, "step": 52640 }, { "epoch": 1.5522630104673447, "grad_norm": 2.8819531731205625, "learning_rate": 4.175573719902581e-06, "loss": 1.3153, "step": 52645 }, { "epoch": 1.5524104378593542, "grad_norm": 2.8963473947139153, "learning_rate": 4.174934592634847e-06, "loss": 1.3655, "step": 52650 }, { "epoch": 1.5525578652513636, "grad_norm": 2.8572777008334778, "learning_rate": 4.1742954528629275e-06, "loss": 1.2725, "step": 52655 }, { "epoch": 1.552705292643373, "grad_norm": 2.693733471107145, "learning_rate": 4.17365630060563e-06, "loss": 1.2705, "step": 52660 }, { "epoch": 1.5528527200353825, "grad_norm": 2.7822351354766215, "learning_rate": 4.173017135881762e-06, "loss": 1.3142, "step": 52665 }, { "epoch": 1.553000147427392, "grad_norm": 2.7448766461692453, "learning_rate": 4.172377958710132e-06, "loss": 1.3461, "step": 52670 }, { "epoch": 1.5531475748194015, "grad_norm": 2.9975957795646107, "learning_rate": 4.171738769109548e-06, "loss": 1.3196, "step": 52675 }, { "epoch": 1.553295002211411, "grad_norm": 2.80840878271559, "learning_rate": 4.1710995670988195e-06, "loss": 1.3042, "step": 52680 }, { "epoch": 1.5534424296034204, "grad_norm": 2.7552508425067557, "learning_rate": 4.170460352696755e-06, "loss": 1.2914, "step": 52685 }, { "epoch": 1.5535898569954298, "grad_norm": 3.3537740659416326, "learning_rate": 4.1698211259221645e-06, "loss": 1.2999, "step": 52690 }, { "epoch": 1.5537372843874393, "grad_norm": 2.769303832994814, "learning_rate": 4.1691818867938575e-06, "loss": 1.2897, "step": 52695 }, { "epoch": 1.5538847117794488, "grad_norm": 2.8014119427811495, "learning_rate": 4.168542635330647e-06, "loss": 1.2824, "step": 52700 }, { "epoch": 1.5540321391714582, "grad_norm": 2.7672528229172078, "learning_rate": 4.167903371551339e-06, "loss": 1.2748, "step": 52705 }, { "epoch": 1.5541795665634675, "grad_norm": 2.942089839046664, "learning_rate": 4.167264095474748e-06, "loss": 1.3623, "step": 52710 }, { "epoch": 1.554326993955477, "grad_norm": 2.9735984241735145, "learning_rate": 4.166624807119684e-06, "loss": 1.3316, "step": 52715 }, { "epoch": 1.5544744213474864, "grad_norm": 2.808011091863334, "learning_rate": 4.1659855065049605e-06, "loss": 1.3524, "step": 52720 }, { "epoch": 1.5546218487394958, "grad_norm": 2.9032424258493994, "learning_rate": 4.165346193649388e-06, "loss": 1.3366, "step": 52725 }, { "epoch": 1.5547692761315053, "grad_norm": 2.8983217449487184, "learning_rate": 4.1647068685717795e-06, "loss": 1.3031, "step": 52730 }, { "epoch": 1.5549167035235145, "grad_norm": 2.92243990070068, "learning_rate": 4.164067531290948e-06, "loss": 1.3046, "step": 52735 }, { "epoch": 1.555064130915524, "grad_norm": 2.9121122024688297, "learning_rate": 4.163428181825706e-06, "loss": 1.2747, "step": 52740 }, { "epoch": 1.5552115583075334, "grad_norm": 2.882542315424099, "learning_rate": 4.162788820194867e-06, "loss": 1.3373, "step": 52745 }, { "epoch": 1.555358985699543, "grad_norm": 2.9330035410778135, "learning_rate": 4.162149446417248e-06, "loss": 1.2927, "step": 52750 }, { "epoch": 1.5555064130915524, "grad_norm": 2.9498358585439366, "learning_rate": 4.161510060511658e-06, "loss": 1.3199, "step": 52755 }, { "epoch": 1.5556538404835618, "grad_norm": 2.8761521097499974, "learning_rate": 4.160870662496916e-06, "loss": 1.3268, "step": 52760 }, { "epoch": 1.5558012678755713, "grad_norm": 2.880602811439438, "learning_rate": 4.160231252391836e-06, "loss": 1.3095, "step": 52765 }, { "epoch": 1.5559486952675807, "grad_norm": 2.7944158215757318, "learning_rate": 4.159591830215232e-06, "loss": 1.3103, "step": 52770 }, { "epoch": 1.5560961226595902, "grad_norm": 2.8919234111444547, "learning_rate": 4.1589523959859215e-06, "loss": 1.267, "step": 52775 }, { "epoch": 1.5562435500515996, "grad_norm": 2.9063956931351416, "learning_rate": 4.158312949722721e-06, "loss": 1.3281, "step": 52780 }, { "epoch": 1.556390977443609, "grad_norm": 3.0337058711824545, "learning_rate": 4.157673491444443e-06, "loss": 1.2513, "step": 52785 }, { "epoch": 1.5565384048356186, "grad_norm": 2.794327949472135, "learning_rate": 4.157034021169908e-06, "loss": 1.3394, "step": 52790 }, { "epoch": 1.556685832227628, "grad_norm": 2.865359577245731, "learning_rate": 4.156394538917933e-06, "loss": 1.3277, "step": 52795 }, { "epoch": 1.5568332596196375, "grad_norm": 2.7743197794885415, "learning_rate": 4.1557550447073336e-06, "loss": 1.2559, "step": 52800 }, { "epoch": 1.5569806870116467, "grad_norm": 2.9935694921802765, "learning_rate": 4.15511553855693e-06, "loss": 1.3504, "step": 52805 }, { "epoch": 1.5571281144036562, "grad_norm": 2.6635912676295224, "learning_rate": 4.154476020485538e-06, "loss": 1.301, "step": 52810 }, { "epoch": 1.5572755417956656, "grad_norm": 2.914805113556141, "learning_rate": 4.153836490511977e-06, "loss": 1.3198, "step": 52815 }, { "epoch": 1.557422969187675, "grad_norm": 2.672784527754086, "learning_rate": 4.153196948655067e-06, "loss": 1.2737, "step": 52820 }, { "epoch": 1.5575703965796845, "grad_norm": 2.8939840143863145, "learning_rate": 4.152557394933625e-06, "loss": 1.3434, "step": 52825 }, { "epoch": 1.5577178239716938, "grad_norm": 2.8100099141472357, "learning_rate": 4.151917829366473e-06, "loss": 1.337, "step": 52830 }, { "epoch": 1.5578652513637032, "grad_norm": 2.6574462224284288, "learning_rate": 4.151278251972431e-06, "loss": 1.3017, "step": 52835 }, { "epoch": 1.5580126787557127, "grad_norm": 2.8075592347861558, "learning_rate": 4.150638662770316e-06, "loss": 1.3269, "step": 52840 }, { "epoch": 1.5581601061477222, "grad_norm": 2.7649296022707293, "learning_rate": 4.149999061778952e-06, "loss": 1.2919, "step": 52845 }, { "epoch": 1.5583075335397316, "grad_norm": 2.8940700519215645, "learning_rate": 4.1493594490171586e-06, "loss": 1.2881, "step": 52850 }, { "epoch": 1.558454960931741, "grad_norm": 2.8244995961029784, "learning_rate": 4.148719824503758e-06, "loss": 1.3183, "step": 52855 }, { "epoch": 1.5586023883237505, "grad_norm": 3.125934076736503, "learning_rate": 4.1480801882575716e-06, "loss": 1.3576, "step": 52860 }, { "epoch": 1.55874981571576, "grad_norm": 2.813491792941322, "learning_rate": 4.147440540297421e-06, "loss": 1.3268, "step": 52865 }, { "epoch": 1.5588972431077694, "grad_norm": 2.7306368192893786, "learning_rate": 4.146800880642127e-06, "loss": 1.2423, "step": 52870 }, { "epoch": 1.559044670499779, "grad_norm": 3.010160404025528, "learning_rate": 4.146161209310515e-06, "loss": 1.3566, "step": 52875 }, { "epoch": 1.5591920978917884, "grad_norm": 2.9766004907190307, "learning_rate": 4.145521526321407e-06, "loss": 1.2905, "step": 52880 }, { "epoch": 1.5593395252837978, "grad_norm": 2.880647271157963, "learning_rate": 4.1448818316936266e-06, "loss": 1.3146, "step": 52885 }, { "epoch": 1.5594869526758073, "grad_norm": 2.837641409457702, "learning_rate": 4.144242125445997e-06, "loss": 1.3082, "step": 52890 }, { "epoch": 1.5596343800678167, "grad_norm": 2.8502812889908697, "learning_rate": 4.143602407597343e-06, "loss": 1.3264, "step": 52895 }, { "epoch": 1.5597818074598262, "grad_norm": 2.689710065305266, "learning_rate": 4.142962678166489e-06, "loss": 1.2882, "step": 52900 }, { "epoch": 1.5599292348518354, "grad_norm": 2.836563930827463, "learning_rate": 4.142322937172259e-06, "loss": 1.2583, "step": 52905 }, { "epoch": 1.560076662243845, "grad_norm": 2.924340044697517, "learning_rate": 4.141683184633479e-06, "loss": 1.3318, "step": 52910 }, { "epoch": 1.5602240896358543, "grad_norm": 2.9482725257644504, "learning_rate": 4.141043420568973e-06, "loss": 1.3736, "step": 52915 }, { "epoch": 1.5603715170278638, "grad_norm": 2.8196569396826208, "learning_rate": 4.14040364499757e-06, "loss": 1.2764, "step": 52920 }, { "epoch": 1.5605189444198733, "grad_norm": 2.993800069228507, "learning_rate": 4.139763857938093e-06, "loss": 1.2901, "step": 52925 }, { "epoch": 1.5606663718118825, "grad_norm": 2.752523273848624, "learning_rate": 4.139124059409368e-06, "loss": 1.2889, "step": 52930 }, { "epoch": 1.560813799203892, "grad_norm": 2.991430734328334, "learning_rate": 4.138484249430224e-06, "loss": 1.3154, "step": 52935 }, { "epoch": 1.5609612265959014, "grad_norm": 2.897871414495551, "learning_rate": 4.137844428019489e-06, "loss": 1.284, "step": 52940 }, { "epoch": 1.5611086539879109, "grad_norm": 2.7345889245386723, "learning_rate": 4.137204595195986e-06, "loss": 1.3068, "step": 52945 }, { "epoch": 1.5612560813799203, "grad_norm": 2.901542487784476, "learning_rate": 4.136564750978547e-06, "loss": 1.3452, "step": 52950 }, { "epoch": 1.5614035087719298, "grad_norm": 2.735732769627672, "learning_rate": 4.135924895385999e-06, "loss": 1.3248, "step": 52955 }, { "epoch": 1.5615509361639393, "grad_norm": 2.826148447727723, "learning_rate": 4.13528502843717e-06, "loss": 1.3028, "step": 52960 }, { "epoch": 1.5616983635559487, "grad_norm": 2.758987887650094, "learning_rate": 4.1346451501508895e-06, "loss": 1.3075, "step": 52965 }, { "epoch": 1.5618457909479582, "grad_norm": 2.960013155796374, "learning_rate": 4.134005260545986e-06, "loss": 1.3307, "step": 52970 }, { "epoch": 1.5619932183399676, "grad_norm": 2.7886406434145248, "learning_rate": 4.133365359641289e-06, "loss": 1.3255, "step": 52975 }, { "epoch": 1.562140645731977, "grad_norm": 2.9424773513554827, "learning_rate": 4.132725447455628e-06, "loss": 1.3299, "step": 52980 }, { "epoch": 1.5622880731239865, "grad_norm": 2.94672714585084, "learning_rate": 4.132085524007835e-06, "loss": 1.306, "step": 52985 }, { "epoch": 1.562435500515996, "grad_norm": 2.8152787012502087, "learning_rate": 4.131445589316739e-06, "loss": 1.2549, "step": 52990 }, { "epoch": 1.5625829279080055, "grad_norm": 2.751778425392876, "learning_rate": 4.130805643401171e-06, "loss": 1.3104, "step": 52995 }, { "epoch": 1.5627303553000147, "grad_norm": 2.9162179001342663, "learning_rate": 4.130165686279961e-06, "loss": 1.3611, "step": 53000 }, { "epoch": 1.5627303553000147, "eval_loss": 1.0915744304656982, "eval_runtime": 4.2677, "eval_samples_per_second": 92.79, "eval_steps_per_second": 3.046, "step": 53000 }, { "epoch": 1.5628777826920242, "grad_norm": 2.8101516233816426, "learning_rate": 4.129525717971942e-06, "loss": 1.2904, "step": 53005 }, { "epoch": 1.5630252100840336, "grad_norm": 2.886267187609954, "learning_rate": 4.128885738495946e-06, "loss": 1.3237, "step": 53010 }, { "epoch": 1.563172637476043, "grad_norm": 2.8656150010323023, "learning_rate": 4.128245747870804e-06, "loss": 1.3654, "step": 53015 }, { "epoch": 1.5633200648680525, "grad_norm": 2.7023477399577405, "learning_rate": 4.127605746115349e-06, "loss": 1.3246, "step": 53020 }, { "epoch": 1.5634674922600618, "grad_norm": 2.6897035315569977, "learning_rate": 4.126965733248414e-06, "loss": 1.3311, "step": 53025 }, { "epoch": 1.5636149196520712, "grad_norm": 2.856276090646627, "learning_rate": 4.126325709288831e-06, "loss": 1.3519, "step": 53030 }, { "epoch": 1.5637623470440807, "grad_norm": 2.7971207062087973, "learning_rate": 4.125685674255435e-06, "loss": 1.262, "step": 53035 }, { "epoch": 1.5639097744360901, "grad_norm": 2.857808261774763, "learning_rate": 4.125045628167059e-06, "loss": 1.2895, "step": 53040 }, { "epoch": 1.5640572018280996, "grad_norm": 2.904940142676921, "learning_rate": 4.124405571042537e-06, "loss": 1.2956, "step": 53045 }, { "epoch": 1.564204629220109, "grad_norm": 2.7191976884460627, "learning_rate": 4.123765502900704e-06, "loss": 1.2386, "step": 53050 }, { "epoch": 1.5643520566121185, "grad_norm": 2.8388405902151663, "learning_rate": 4.123125423760394e-06, "loss": 1.2992, "step": 53055 }, { "epoch": 1.564499484004128, "grad_norm": 2.836371181324436, "learning_rate": 4.1224853336404424e-06, "loss": 1.2827, "step": 53060 }, { "epoch": 1.5646469113961374, "grad_norm": 2.647344912995461, "learning_rate": 4.1218452325596845e-06, "loss": 1.307, "step": 53065 }, { "epoch": 1.5647943387881469, "grad_norm": 2.75034321510332, "learning_rate": 4.121205120536956e-06, "loss": 1.2984, "step": 53070 }, { "epoch": 1.5649417661801563, "grad_norm": 2.8214210105566804, "learning_rate": 4.120564997591093e-06, "loss": 1.3391, "step": 53075 }, { "epoch": 1.5650891935721658, "grad_norm": 2.742575546567095, "learning_rate": 4.119924863740931e-06, "loss": 1.3368, "step": 53080 }, { "epoch": 1.5652366209641753, "grad_norm": 2.7237537995719383, "learning_rate": 4.119284719005308e-06, "loss": 1.216, "step": 53085 }, { "epoch": 1.5653840483561847, "grad_norm": 2.9112071146602903, "learning_rate": 4.118644563403061e-06, "loss": 1.3126, "step": 53090 }, { "epoch": 1.5655314757481942, "grad_norm": 2.8944148537645282, "learning_rate": 4.118004396953026e-06, "loss": 1.3616, "step": 53095 }, { "epoch": 1.5656789031402034, "grad_norm": 2.782509667887291, "learning_rate": 4.117364219674042e-06, "loss": 1.3236, "step": 53100 }, { "epoch": 1.5658263305322129, "grad_norm": 2.7793683391248325, "learning_rate": 4.116724031584946e-06, "loss": 1.2987, "step": 53105 }, { "epoch": 1.5659737579242223, "grad_norm": 2.9064521262877627, "learning_rate": 4.1160838327045765e-06, "loss": 1.3508, "step": 53110 }, { "epoch": 1.5661211853162318, "grad_norm": 2.929391251832289, "learning_rate": 4.115443623051773e-06, "loss": 1.3109, "step": 53115 }, { "epoch": 1.566268612708241, "grad_norm": 2.824046546525069, "learning_rate": 4.114803402645374e-06, "loss": 1.2723, "step": 53120 }, { "epoch": 1.5664160401002505, "grad_norm": 2.7520336646840007, "learning_rate": 4.1141631715042165e-06, "loss": 1.306, "step": 53125 }, { "epoch": 1.56656346749226, "grad_norm": 2.8357920475958625, "learning_rate": 4.113522929647143e-06, "loss": 1.324, "step": 53130 }, { "epoch": 1.5667108948842694, "grad_norm": 2.7985247809625555, "learning_rate": 4.112882677092991e-06, "loss": 1.3068, "step": 53135 }, { "epoch": 1.5668583222762789, "grad_norm": 2.909076790034075, "learning_rate": 4.112242413860604e-06, "loss": 1.3248, "step": 53140 }, { "epoch": 1.5670057496682883, "grad_norm": 2.819452902711815, "learning_rate": 4.111602139968819e-06, "loss": 1.3258, "step": 53145 }, { "epoch": 1.5671531770602978, "grad_norm": 2.8474216097972866, "learning_rate": 4.110961855436477e-06, "loss": 1.2432, "step": 53150 }, { "epoch": 1.5673006044523072, "grad_norm": 2.7453106462289107, "learning_rate": 4.110321560282422e-06, "loss": 1.2854, "step": 53155 }, { "epoch": 1.5674480318443167, "grad_norm": 2.7604083264236396, "learning_rate": 4.109681254525493e-06, "loss": 1.2918, "step": 53160 }, { "epoch": 1.5675954592363261, "grad_norm": 2.9056668745466916, "learning_rate": 4.109040938184533e-06, "loss": 1.295, "step": 53165 }, { "epoch": 1.5677428866283356, "grad_norm": 2.8312922339426714, "learning_rate": 4.108400611278383e-06, "loss": 1.2805, "step": 53170 }, { "epoch": 1.567890314020345, "grad_norm": 2.8181323812702055, "learning_rate": 4.107760273825886e-06, "loss": 1.3525, "step": 53175 }, { "epoch": 1.5680377414123545, "grad_norm": 2.855408558703715, "learning_rate": 4.107119925845883e-06, "loss": 1.366, "step": 53180 }, { "epoch": 1.568185168804364, "grad_norm": 2.914311223829683, "learning_rate": 4.1064795673572206e-06, "loss": 1.2975, "step": 53185 }, { "epoch": 1.5683325961963734, "grad_norm": 2.864598883527593, "learning_rate": 4.105839198378738e-06, "loss": 1.3231, "step": 53190 }, { "epoch": 1.5684800235883827, "grad_norm": 2.8284831807625683, "learning_rate": 4.105198818929282e-06, "loss": 1.3323, "step": 53195 }, { "epoch": 1.5686274509803921, "grad_norm": 2.799113515757038, "learning_rate": 4.104558429027695e-06, "loss": 1.2479, "step": 53200 }, { "epoch": 1.5687748783724016, "grad_norm": 2.925308040896144, "learning_rate": 4.10391802869282e-06, "loss": 1.3396, "step": 53205 }, { "epoch": 1.568922305764411, "grad_norm": 2.7807360927171514, "learning_rate": 4.103277617943504e-06, "loss": 1.2364, "step": 53210 }, { "epoch": 1.5690697331564205, "grad_norm": 2.693795636982747, "learning_rate": 4.10263719679859e-06, "loss": 1.2971, "step": 53215 }, { "epoch": 1.5692171605484297, "grad_norm": 2.826835451823371, "learning_rate": 4.1019967652769254e-06, "loss": 1.32, "step": 53220 }, { "epoch": 1.5693645879404392, "grad_norm": 2.8749096836671444, "learning_rate": 4.101356323397353e-06, "loss": 1.3188, "step": 53225 }, { "epoch": 1.5695120153324487, "grad_norm": 2.8232349982435796, "learning_rate": 4.100715871178719e-06, "loss": 1.3236, "step": 53230 }, { "epoch": 1.5696594427244581, "grad_norm": 2.9077419142768526, "learning_rate": 4.1000754086398695e-06, "loss": 1.3028, "step": 53235 }, { "epoch": 1.5698068701164676, "grad_norm": 2.6935147096350924, "learning_rate": 4.099434935799653e-06, "loss": 1.3, "step": 53240 }, { "epoch": 1.569954297508477, "grad_norm": 2.7804993903099096, "learning_rate": 4.098794452676914e-06, "loss": 1.2906, "step": 53245 }, { "epoch": 1.5701017249004865, "grad_norm": 2.93402086856125, "learning_rate": 4.0981539592905005e-06, "loss": 1.3458, "step": 53250 }, { "epoch": 1.570249152292496, "grad_norm": 2.926785403099418, "learning_rate": 4.097513455659259e-06, "loss": 1.3565, "step": 53255 }, { "epoch": 1.5703965796845054, "grad_norm": 2.9213946089768283, "learning_rate": 4.0968729418020365e-06, "loss": 1.3382, "step": 53260 }, { "epoch": 1.5705440070765149, "grad_norm": 2.810509628334746, "learning_rate": 4.096232417737682e-06, "loss": 1.2941, "step": 53265 }, { "epoch": 1.5706914344685243, "grad_norm": 2.9566689530088794, "learning_rate": 4.095591883485043e-06, "loss": 1.2929, "step": 53270 }, { "epoch": 1.5708388618605338, "grad_norm": 2.875648953841522, "learning_rate": 4.094951339062968e-06, "loss": 1.2952, "step": 53275 }, { "epoch": 1.5709862892525432, "grad_norm": 2.879876430297815, "learning_rate": 4.094310784490307e-06, "loss": 1.2558, "step": 53280 }, { "epoch": 1.5711337166445527, "grad_norm": 2.778258435675222, "learning_rate": 4.093670219785906e-06, "loss": 1.2999, "step": 53285 }, { "epoch": 1.571281144036562, "grad_norm": 2.800631890564971, "learning_rate": 4.093029644968618e-06, "loss": 1.3341, "step": 53290 }, { "epoch": 1.5714285714285714, "grad_norm": 2.9556025181511565, "learning_rate": 4.0923890600572905e-06, "loss": 1.31, "step": 53295 }, { "epoch": 1.5715759988205809, "grad_norm": 2.7251445578484614, "learning_rate": 4.091748465070775e-06, "loss": 1.3134, "step": 53300 }, { "epoch": 1.5717234262125903, "grad_norm": 2.8377156909707386, "learning_rate": 4.091107860027919e-06, "loss": 1.2682, "step": 53305 }, { "epoch": 1.5718708536045998, "grad_norm": 2.8834020238765095, "learning_rate": 4.090467244947575e-06, "loss": 1.2886, "step": 53310 }, { "epoch": 1.572018280996609, "grad_norm": 2.8701036882037534, "learning_rate": 4.089826619848595e-06, "loss": 1.2894, "step": 53315 }, { "epoch": 1.5721657083886185, "grad_norm": 2.990441532398739, "learning_rate": 4.089185984749828e-06, "loss": 1.3036, "step": 53320 }, { "epoch": 1.572313135780628, "grad_norm": 2.8793691144784583, "learning_rate": 4.088545339670126e-06, "loss": 1.3383, "step": 53325 }, { "epoch": 1.5724605631726374, "grad_norm": 2.762099694846792, "learning_rate": 4.087904684628341e-06, "loss": 1.2842, "step": 53330 }, { "epoch": 1.5726079905646468, "grad_norm": 2.81300571153908, "learning_rate": 4.087264019643323e-06, "loss": 1.3367, "step": 53335 }, { "epoch": 1.5727554179566563, "grad_norm": 2.7512906991837336, "learning_rate": 4.086623344733928e-06, "loss": 1.2736, "step": 53340 }, { "epoch": 1.5729028453486658, "grad_norm": 2.777262300320425, "learning_rate": 4.085982659919006e-06, "loss": 1.333, "step": 53345 }, { "epoch": 1.5730502727406752, "grad_norm": 2.854520784997498, "learning_rate": 4.0853419652174115e-06, "loss": 1.3304, "step": 53350 }, { "epoch": 1.5731977001326847, "grad_norm": 2.8605305542902015, "learning_rate": 4.084701260647995e-06, "loss": 1.3574, "step": 53355 }, { "epoch": 1.5733451275246941, "grad_norm": 2.8348476712857127, "learning_rate": 4.084060546229613e-06, "loss": 1.335, "step": 53360 }, { "epoch": 1.5734925549167036, "grad_norm": 2.9501949012802937, "learning_rate": 4.083419821981118e-06, "loss": 1.3365, "step": 53365 }, { "epoch": 1.573639982308713, "grad_norm": 2.786923073143171, "learning_rate": 4.082779087921364e-06, "loss": 1.3466, "step": 53370 }, { "epoch": 1.5737874097007225, "grad_norm": 2.7657193557012056, "learning_rate": 4.082138344069205e-06, "loss": 1.315, "step": 53375 }, { "epoch": 1.573934837092732, "grad_norm": 2.921327886587192, "learning_rate": 4.081497590443496e-06, "loss": 1.2816, "step": 53380 }, { "epoch": 1.5740822644847414, "grad_norm": 2.679062707978544, "learning_rate": 4.0808568270630925e-06, "loss": 1.2843, "step": 53385 }, { "epoch": 1.5742296918767507, "grad_norm": 2.8848402915000584, "learning_rate": 4.080216053946848e-06, "loss": 1.3151, "step": 53390 }, { "epoch": 1.5743771192687601, "grad_norm": 2.838275203894859, "learning_rate": 4.079575271113619e-06, "loss": 1.2475, "step": 53395 }, { "epoch": 1.5745245466607696, "grad_norm": 2.8007211182731413, "learning_rate": 4.078934478582262e-06, "loss": 1.2976, "step": 53400 }, { "epoch": 1.574671974052779, "grad_norm": 2.7996714343648224, "learning_rate": 4.078293676371632e-06, "loss": 1.3582, "step": 53405 }, { "epoch": 1.5748194014447885, "grad_norm": 2.784497914164246, "learning_rate": 4.077652864500586e-06, "loss": 1.2789, "step": 53410 }, { "epoch": 1.5749668288367977, "grad_norm": 2.980035967734996, "learning_rate": 4.077012042987981e-06, "loss": 1.3302, "step": 53415 }, { "epoch": 1.5751142562288072, "grad_norm": 2.7723148007596334, "learning_rate": 4.076371211852672e-06, "loss": 1.3187, "step": 53420 }, { "epoch": 1.5752616836208166, "grad_norm": 2.8513586003498848, "learning_rate": 4.075730371113518e-06, "loss": 1.2978, "step": 53425 }, { "epoch": 1.575409111012826, "grad_norm": 2.759102974238524, "learning_rate": 4.075089520789376e-06, "loss": 1.2866, "step": 53430 }, { "epoch": 1.5755565384048356, "grad_norm": 2.8044787994423914, "learning_rate": 4.074448660899103e-06, "loss": 1.2919, "step": 53435 }, { "epoch": 1.575703965796845, "grad_norm": 2.803449450724172, "learning_rate": 4.073807791461558e-06, "loss": 1.3189, "step": 53440 }, { "epoch": 1.5758513931888545, "grad_norm": 2.908368057732467, "learning_rate": 4.073166912495599e-06, "loss": 1.329, "step": 53445 }, { "epoch": 1.575998820580864, "grad_norm": 2.7451037832501526, "learning_rate": 4.072526024020085e-06, "loss": 1.2927, "step": 53450 }, { "epoch": 1.5761462479728734, "grad_norm": 2.8081351269845607, "learning_rate": 4.071885126053875e-06, "loss": 1.3043, "step": 53455 }, { "epoch": 1.5762936753648829, "grad_norm": 2.770537065983014, "learning_rate": 4.071244218615827e-06, "loss": 1.3171, "step": 53460 }, { "epoch": 1.5764411027568923, "grad_norm": 2.840301764208417, "learning_rate": 4.0706033017248e-06, "loss": 1.2792, "step": 53465 }, { "epoch": 1.5765885301489018, "grad_norm": 2.7755759280957735, "learning_rate": 4.069962375399655e-06, "loss": 1.3788, "step": 53470 }, { "epoch": 1.5767359575409112, "grad_norm": 2.7402773359626225, "learning_rate": 4.069321439659252e-06, "loss": 1.3345, "step": 53475 }, { "epoch": 1.5768833849329207, "grad_norm": 2.9553120696837722, "learning_rate": 4.068680494522452e-06, "loss": 1.2976, "step": 53480 }, { "epoch": 1.57703081232493, "grad_norm": 2.8959558140341874, "learning_rate": 4.0680395400081134e-06, "loss": 1.2876, "step": 53485 }, { "epoch": 1.5771782397169394, "grad_norm": 2.877614205028739, "learning_rate": 4.0673985761350985e-06, "loss": 1.339, "step": 53490 }, { "epoch": 1.5773256671089488, "grad_norm": 2.6264342787145836, "learning_rate": 4.066757602922268e-06, "loss": 1.2884, "step": 53495 }, { "epoch": 1.5774730945009583, "grad_norm": 2.8529748247044475, "learning_rate": 4.066116620388483e-06, "loss": 1.331, "step": 53500 }, { "epoch": 1.5774730945009583, "eval_loss": 1.0905635356903076, "eval_runtime": 4.1431, "eval_samples_per_second": 95.58, "eval_steps_per_second": 3.138, "step": 53500 }, { "epoch": 1.5776205218929678, "grad_norm": 2.7649747694500233, "learning_rate": 4.065475628552606e-06, "loss": 1.3412, "step": 53505 }, { "epoch": 1.577767949284977, "grad_norm": 2.8914202752200624, "learning_rate": 4.064834627433499e-06, "loss": 1.3367, "step": 53510 }, { "epoch": 1.5779153766769864, "grad_norm": 2.9035991405501456, "learning_rate": 4.064193617050023e-06, "loss": 1.2966, "step": 53515 }, { "epoch": 1.578062804068996, "grad_norm": 3.0491818718379875, "learning_rate": 4.063552597421042e-06, "loss": 1.2992, "step": 53520 }, { "epoch": 1.5782102314610054, "grad_norm": 2.793902738166625, "learning_rate": 4.062911568565416e-06, "loss": 1.2977, "step": 53525 }, { "epoch": 1.5783576588530148, "grad_norm": 2.80940287463678, "learning_rate": 4.06227053050201e-06, "loss": 1.3491, "step": 53530 }, { "epoch": 1.5785050862450243, "grad_norm": 2.913720238690263, "learning_rate": 4.061629483249688e-06, "loss": 1.317, "step": 53535 }, { "epoch": 1.5786525136370337, "grad_norm": 2.9886864793135617, "learning_rate": 4.060988426827312e-06, "loss": 1.3059, "step": 53540 }, { "epoch": 1.5787999410290432, "grad_norm": 2.8161336684951133, "learning_rate": 4.060347361253747e-06, "loss": 1.2914, "step": 53545 }, { "epoch": 1.5789473684210527, "grad_norm": 3.0677647537232553, "learning_rate": 4.059706286547856e-06, "loss": 1.3018, "step": 53550 }, { "epoch": 1.5790947958130621, "grad_norm": 2.6985804096723873, "learning_rate": 4.059065202728505e-06, "loss": 1.2897, "step": 53555 }, { "epoch": 1.5792422232050716, "grad_norm": 2.837870791140304, "learning_rate": 4.058424109814556e-06, "loss": 1.3364, "step": 53560 }, { "epoch": 1.579389650597081, "grad_norm": 2.8364937825948005, "learning_rate": 4.057783007824876e-06, "loss": 1.2278, "step": 53565 }, { "epoch": 1.5795370779890905, "grad_norm": 2.7730357376887227, "learning_rate": 4.0571418967783284e-06, "loss": 1.272, "step": 53570 }, { "epoch": 1.5796845053811, "grad_norm": 2.79181072022884, "learning_rate": 4.056500776693781e-06, "loss": 1.3121, "step": 53575 }, { "epoch": 1.5798319327731094, "grad_norm": 2.773703071782374, "learning_rate": 4.0558596475900985e-06, "loss": 1.2568, "step": 53580 }, { "epoch": 1.5799793601651186, "grad_norm": 2.8763337520960235, "learning_rate": 4.055218509486148e-06, "loss": 1.2957, "step": 53585 }, { "epoch": 1.580126787557128, "grad_norm": 2.805532830638609, "learning_rate": 4.054577362400791e-06, "loss": 1.3174, "step": 53590 }, { "epoch": 1.5802742149491376, "grad_norm": 2.811091250531152, "learning_rate": 4.053936206352899e-06, "loss": 1.3312, "step": 53595 }, { "epoch": 1.580421642341147, "grad_norm": 2.9111383247554743, "learning_rate": 4.053295041361337e-06, "loss": 1.3133, "step": 53600 }, { "epoch": 1.5805690697331565, "grad_norm": 2.749351279324803, "learning_rate": 4.052653867444972e-06, "loss": 1.3317, "step": 53605 }, { "epoch": 1.5807164971251657, "grad_norm": 2.9032898326962697, "learning_rate": 4.052012684622672e-06, "loss": 1.282, "step": 53610 }, { "epoch": 1.5808639245171752, "grad_norm": 2.8912784538014824, "learning_rate": 4.0513714929133025e-06, "loss": 1.337, "step": 53615 }, { "epoch": 1.5810113519091846, "grad_norm": 2.874423967890878, "learning_rate": 4.050730292335733e-06, "loss": 1.2492, "step": 53620 }, { "epoch": 1.581158779301194, "grad_norm": 2.8443116871723295, "learning_rate": 4.050089082908832e-06, "loss": 1.2967, "step": 53625 }, { "epoch": 1.5813062066932035, "grad_norm": 2.782871448240662, "learning_rate": 4.049447864651466e-06, "loss": 1.2653, "step": 53630 }, { "epoch": 1.581453634085213, "grad_norm": 3.041978732421308, "learning_rate": 4.048806637582507e-06, "loss": 1.2726, "step": 53635 }, { "epoch": 1.5816010614772225, "grad_norm": 2.8403252183727767, "learning_rate": 4.04816540172082e-06, "loss": 1.2787, "step": 53640 }, { "epoch": 1.581748488869232, "grad_norm": 2.837360841261348, "learning_rate": 4.047524157085275e-06, "loss": 1.3221, "step": 53645 }, { "epoch": 1.5818959162612414, "grad_norm": 2.8200915301412364, "learning_rate": 4.046882903694743e-06, "loss": 1.362, "step": 53650 }, { "epoch": 1.5820433436532508, "grad_norm": 3.0338521039538233, "learning_rate": 4.0462416415680915e-06, "loss": 1.3333, "step": 53655 }, { "epoch": 1.5821907710452603, "grad_norm": 2.8543449927157396, "learning_rate": 4.045600370724192e-06, "loss": 1.3271, "step": 53660 }, { "epoch": 1.5823381984372697, "grad_norm": 2.775219453072345, "learning_rate": 4.044959091181915e-06, "loss": 1.3125, "step": 53665 }, { "epoch": 1.5824856258292792, "grad_norm": 2.786577088435609, "learning_rate": 4.044317802960128e-06, "loss": 1.3167, "step": 53670 }, { "epoch": 1.5826330532212887, "grad_norm": 2.915865179988514, "learning_rate": 4.043676506077704e-06, "loss": 1.3079, "step": 53675 }, { "epoch": 1.582780480613298, "grad_norm": 2.8931934231387473, "learning_rate": 4.043035200553514e-06, "loss": 1.3224, "step": 53680 }, { "epoch": 1.5829279080053074, "grad_norm": 2.86017211118669, "learning_rate": 4.042393886406429e-06, "loss": 1.3003, "step": 53685 }, { "epoch": 1.5830753353973168, "grad_norm": 3.0985817057034195, "learning_rate": 4.04175256365532e-06, "loss": 1.3441, "step": 53690 }, { "epoch": 1.5832227627893263, "grad_norm": 2.7957201548084076, "learning_rate": 4.041111232319058e-06, "loss": 1.3593, "step": 53695 }, { "epoch": 1.5833701901813357, "grad_norm": 2.8102862089386984, "learning_rate": 4.040469892416515e-06, "loss": 1.2479, "step": 53700 }, { "epoch": 1.583517617573345, "grad_norm": 2.9411940278121453, "learning_rate": 4.039828543966565e-06, "loss": 1.3423, "step": 53705 }, { "epoch": 1.5836650449653544, "grad_norm": 2.8782970904458245, "learning_rate": 4.039187186988079e-06, "loss": 1.3085, "step": 53710 }, { "epoch": 1.5838124723573639, "grad_norm": 2.7372885404785614, "learning_rate": 4.03854582149993e-06, "loss": 1.2396, "step": 53715 }, { "epoch": 1.5839598997493733, "grad_norm": 2.9326090223574632, "learning_rate": 4.03790444752099e-06, "loss": 1.3303, "step": 53720 }, { "epoch": 1.5841073271413828, "grad_norm": 2.919079996356643, "learning_rate": 4.037263065070132e-06, "loss": 1.2993, "step": 53725 }, { "epoch": 1.5842547545333923, "grad_norm": 2.945972084716055, "learning_rate": 4.036621674166232e-06, "loss": 1.2766, "step": 53730 }, { "epoch": 1.5844021819254017, "grad_norm": 2.8296719164892847, "learning_rate": 4.035980274828161e-06, "loss": 1.2544, "step": 53735 }, { "epoch": 1.5845496093174112, "grad_norm": 2.7763122952372887, "learning_rate": 4.035338867074795e-06, "loss": 1.3235, "step": 53740 }, { "epoch": 1.5846970367094206, "grad_norm": 2.809639856882579, "learning_rate": 4.034697450925005e-06, "loss": 1.3008, "step": 53745 }, { "epoch": 1.58484446410143, "grad_norm": 2.755696553659149, "learning_rate": 4.034056026397669e-06, "loss": 1.2995, "step": 53750 }, { "epoch": 1.5849918914934396, "grad_norm": 2.7830824844631, "learning_rate": 4.03341459351166e-06, "loss": 1.3256, "step": 53755 }, { "epoch": 1.585139318885449, "grad_norm": 2.7971727511444144, "learning_rate": 4.032773152285852e-06, "loss": 1.3354, "step": 53760 }, { "epoch": 1.5852867462774585, "grad_norm": 2.984111140668308, "learning_rate": 4.0321317027391226e-06, "loss": 1.3658, "step": 53765 }, { "epoch": 1.585434173669468, "grad_norm": 2.8684938778120435, "learning_rate": 4.031490244890345e-06, "loss": 1.3231, "step": 53770 }, { "epoch": 1.5855816010614774, "grad_norm": 2.858526119214236, "learning_rate": 4.030848778758395e-06, "loss": 1.3051, "step": 53775 }, { "epoch": 1.5857290284534866, "grad_norm": 2.9033358480953795, "learning_rate": 4.03020730436215e-06, "loss": 1.2792, "step": 53780 }, { "epoch": 1.585876455845496, "grad_norm": 2.852460177760411, "learning_rate": 4.0295658217204845e-06, "loss": 1.3083, "step": 53785 }, { "epoch": 1.5860238832375055, "grad_norm": 2.8673098894881903, "learning_rate": 4.028924330852275e-06, "loss": 1.3328, "step": 53790 }, { "epoch": 1.586171310629515, "grad_norm": 2.805130224125356, "learning_rate": 4.0282828317764e-06, "loss": 1.2738, "step": 53795 }, { "epoch": 1.5863187380215242, "grad_norm": 2.8534701203633572, "learning_rate": 4.027641324511733e-06, "loss": 1.3322, "step": 53800 }, { "epoch": 1.5864661654135337, "grad_norm": 2.7996818339105065, "learning_rate": 4.026999809077154e-06, "loss": 1.2976, "step": 53805 }, { "epoch": 1.5866135928055431, "grad_norm": 2.846687404795925, "learning_rate": 4.026358285491539e-06, "loss": 1.3456, "step": 53810 }, { "epoch": 1.5867610201975526, "grad_norm": 2.959603462349208, "learning_rate": 4.025716753773766e-06, "loss": 1.3159, "step": 53815 }, { "epoch": 1.586908447589562, "grad_norm": 2.8851104590764227, "learning_rate": 4.025075213942713e-06, "loss": 1.3025, "step": 53820 }, { "epoch": 1.5870558749815715, "grad_norm": 2.940149160577487, "learning_rate": 4.024433666017257e-06, "loss": 1.3131, "step": 53825 }, { "epoch": 1.587203302373581, "grad_norm": 2.926319132519698, "learning_rate": 4.023792110016277e-06, "loss": 1.2811, "step": 53830 }, { "epoch": 1.5873507297655904, "grad_norm": 2.9318924971954865, "learning_rate": 4.023150545958653e-06, "loss": 1.3564, "step": 53835 }, { "epoch": 1.5874981571576, "grad_norm": 2.8108477759035617, "learning_rate": 4.022508973863261e-06, "loss": 1.2755, "step": 53840 }, { "epoch": 1.5876455845496094, "grad_norm": 2.853443112179451, "learning_rate": 4.021867393748983e-06, "loss": 1.3246, "step": 53845 }, { "epoch": 1.5877930119416188, "grad_norm": 2.8655683448592804, "learning_rate": 4.021225805634694e-06, "loss": 1.2459, "step": 53850 }, { "epoch": 1.5879404393336283, "grad_norm": 2.640943678118144, "learning_rate": 4.020584209539277e-06, "loss": 1.273, "step": 53855 }, { "epoch": 1.5880878667256377, "grad_norm": 2.6765101631226043, "learning_rate": 4.019942605481611e-06, "loss": 1.2915, "step": 53860 }, { "epoch": 1.5882352941176472, "grad_norm": 2.8507553756762225, "learning_rate": 4.019300993480575e-06, "loss": 1.3192, "step": 53865 }, { "epoch": 1.5883827215096566, "grad_norm": 2.9371197050439766, "learning_rate": 4.018659373555051e-06, "loss": 1.2885, "step": 53870 }, { "epoch": 1.5885301489016659, "grad_norm": 2.8823076961523637, "learning_rate": 4.018017745723916e-06, "loss": 1.3145, "step": 53875 }, { "epoch": 1.5886775762936753, "grad_norm": 2.8054456663353227, "learning_rate": 4.017376110006054e-06, "loss": 1.2979, "step": 53880 }, { "epoch": 1.5888250036856848, "grad_norm": 2.7333323749554013, "learning_rate": 4.016734466420345e-06, "loss": 1.3312, "step": 53885 }, { "epoch": 1.5889724310776943, "grad_norm": 2.846497799710945, "learning_rate": 4.0160928149856685e-06, "loss": 1.3583, "step": 53890 }, { "epoch": 1.5891198584697037, "grad_norm": 2.6854764671693947, "learning_rate": 4.015451155720909e-06, "loss": 1.2286, "step": 53895 }, { "epoch": 1.589267285861713, "grad_norm": 2.7878651852309617, "learning_rate": 4.014809488644944e-06, "loss": 1.3647, "step": 53900 }, { "epoch": 1.5894147132537224, "grad_norm": 2.8037978086243958, "learning_rate": 4.01416781377666e-06, "loss": 1.2634, "step": 53905 }, { "epoch": 1.5895621406457319, "grad_norm": 2.8679321847653494, "learning_rate": 4.013526131134935e-06, "loss": 1.2907, "step": 53910 }, { "epoch": 1.5897095680377413, "grad_norm": 2.929524990811037, "learning_rate": 4.012884440738652e-06, "loss": 1.3558, "step": 53915 }, { "epoch": 1.5898569954297508, "grad_norm": 2.7552863514945822, "learning_rate": 4.012242742606695e-06, "loss": 1.3452, "step": 53920 }, { "epoch": 1.5900044228217602, "grad_norm": 2.8782547292088303, "learning_rate": 4.011601036757945e-06, "loss": 1.3282, "step": 53925 }, { "epoch": 1.5901518502137697, "grad_norm": 2.97645093609343, "learning_rate": 4.010959323211287e-06, "loss": 1.2954, "step": 53930 }, { "epoch": 1.5902992776057792, "grad_norm": 2.705834565525873, "learning_rate": 4.010317601985602e-06, "loss": 1.3201, "step": 53935 }, { "epoch": 1.5904467049977886, "grad_norm": 2.8180124119342396, "learning_rate": 4.0096758730997745e-06, "loss": 1.2625, "step": 53940 }, { "epoch": 1.590594132389798, "grad_norm": 2.7330764021118936, "learning_rate": 4.0090341365726885e-06, "loss": 1.2984, "step": 53945 }, { "epoch": 1.5907415597818075, "grad_norm": 2.9568575964585184, "learning_rate": 4.008392392423227e-06, "loss": 1.3437, "step": 53950 }, { "epoch": 1.590888987173817, "grad_norm": 2.8085765350466496, "learning_rate": 4.007750640670274e-06, "loss": 1.3065, "step": 53955 }, { "epoch": 1.5910364145658265, "grad_norm": 2.825497639125621, "learning_rate": 4.007108881332713e-06, "loss": 1.3048, "step": 53960 }, { "epoch": 1.591183841957836, "grad_norm": 2.9037723228151955, "learning_rate": 4.006467114429431e-06, "loss": 1.2833, "step": 53965 }, { "epoch": 1.5913312693498454, "grad_norm": 2.793296071750514, "learning_rate": 4.005825339979312e-06, "loss": 1.3195, "step": 53970 }, { "epoch": 1.5914786967418546, "grad_norm": 2.950022143669887, "learning_rate": 4.00518355800124e-06, "loss": 1.3363, "step": 53975 }, { "epoch": 1.591626124133864, "grad_norm": 2.7845095767651586, "learning_rate": 4.004541768514099e-06, "loss": 1.2818, "step": 53980 }, { "epoch": 1.5917735515258735, "grad_norm": 2.9498502026490176, "learning_rate": 4.0038999715367765e-06, "loss": 1.3061, "step": 53985 }, { "epoch": 1.591920978917883, "grad_norm": 3.086689198256641, "learning_rate": 4.003258167088157e-06, "loss": 1.3639, "step": 53990 }, { "epoch": 1.5920684063098922, "grad_norm": 2.828246862769904, "learning_rate": 4.002616355187128e-06, "loss": 1.2508, "step": 53995 }, { "epoch": 1.5922158337019017, "grad_norm": 2.7090415570188364, "learning_rate": 4.001974535852573e-06, "loss": 1.2905, "step": 54000 }, { "epoch": 1.5922158337019017, "eval_loss": 1.0888055562973022, "eval_runtime": 4.2907, "eval_samples_per_second": 92.293, "eval_steps_per_second": 3.03, "step": 54000 }, { "epoch": 1.5923632610939111, "grad_norm": 2.775707916555226, "learning_rate": 4.0013327091033805e-06, "loss": 1.3253, "step": 54005 }, { "epoch": 1.5925106884859206, "grad_norm": 2.900085359653616, "learning_rate": 4.000690874958435e-06, "loss": 1.3537, "step": 54010 }, { "epoch": 1.59265811587793, "grad_norm": 2.8891389961640535, "learning_rate": 4.0000490334366244e-06, "loss": 1.3192, "step": 54015 }, { "epoch": 1.5928055432699395, "grad_norm": 2.7925231468446423, "learning_rate": 3.999407184556835e-06, "loss": 1.2768, "step": 54020 }, { "epoch": 1.592952970661949, "grad_norm": 2.874101300460521, "learning_rate": 3.998765328337957e-06, "loss": 1.3161, "step": 54025 }, { "epoch": 1.5931003980539584, "grad_norm": 2.7478257306710643, "learning_rate": 3.9981234647988735e-06, "loss": 1.3052, "step": 54030 }, { "epoch": 1.5932478254459679, "grad_norm": 2.7117880594644803, "learning_rate": 3.9974815939584736e-06, "loss": 1.2822, "step": 54035 }, { "epoch": 1.5933952528379773, "grad_norm": 2.8859280645793746, "learning_rate": 3.996839715835645e-06, "loss": 1.3113, "step": 54040 }, { "epoch": 1.5935426802299868, "grad_norm": 2.7485085908519147, "learning_rate": 3.996197830449276e-06, "loss": 1.3301, "step": 54045 }, { "epoch": 1.5936901076219963, "grad_norm": 2.728523538996999, "learning_rate": 3.995555937818254e-06, "loss": 1.3254, "step": 54050 }, { "epoch": 1.5938375350140057, "grad_norm": 2.8337520134803373, "learning_rate": 3.994914037961471e-06, "loss": 1.332, "step": 54055 }, { "epoch": 1.5939849624060152, "grad_norm": 2.8659686555597297, "learning_rate": 3.99427213089781e-06, "loss": 1.263, "step": 54060 }, { "epoch": 1.5941323897980246, "grad_norm": 2.9145153571464384, "learning_rate": 3.993630216646163e-06, "loss": 1.3147, "step": 54065 }, { "epoch": 1.5942798171900339, "grad_norm": 3.082160097113191, "learning_rate": 3.9929882952254185e-06, "loss": 1.3235, "step": 54070 }, { "epoch": 1.5944272445820433, "grad_norm": 3.1325531019449557, "learning_rate": 3.992346366654468e-06, "loss": 1.2938, "step": 54075 }, { "epoch": 1.5945746719740528, "grad_norm": 2.8032921647881364, "learning_rate": 3.991704430952197e-06, "loss": 1.3276, "step": 54080 }, { "epoch": 1.5947220993660622, "grad_norm": 2.836229165240609, "learning_rate": 3.991062488137497e-06, "loss": 1.2623, "step": 54085 }, { "epoch": 1.5948695267580717, "grad_norm": 2.829922545222652, "learning_rate": 3.9904205382292585e-06, "loss": 1.3075, "step": 54090 }, { "epoch": 1.595016954150081, "grad_norm": 2.8948022622652236, "learning_rate": 3.989778581246371e-06, "loss": 1.3933, "step": 54095 }, { "epoch": 1.5951643815420904, "grad_norm": 2.9204654441609876, "learning_rate": 3.989136617207726e-06, "loss": 1.3076, "step": 54100 }, { "epoch": 1.5953118089340999, "grad_norm": 2.9271326981920085, "learning_rate": 3.988494646132213e-06, "loss": 1.2975, "step": 54105 }, { "epoch": 1.5954592363261093, "grad_norm": 2.9898849446709392, "learning_rate": 3.9878526680387215e-06, "loss": 1.4066, "step": 54110 }, { "epoch": 1.5956066637181188, "grad_norm": 2.825702684091193, "learning_rate": 3.9872106829461436e-06, "loss": 1.2835, "step": 54115 }, { "epoch": 1.5957540911101282, "grad_norm": 2.9133013688376415, "learning_rate": 3.986568690873371e-06, "loss": 1.3569, "step": 54120 }, { "epoch": 1.5959015185021377, "grad_norm": 2.7459661577427434, "learning_rate": 3.985926691839294e-06, "loss": 1.3382, "step": 54125 }, { "epoch": 1.5960489458941471, "grad_norm": 2.888281671859283, "learning_rate": 3.9852846858628056e-06, "loss": 1.3212, "step": 54130 }, { "epoch": 1.5961963732861566, "grad_norm": 2.9659924197332153, "learning_rate": 3.984642672962796e-06, "loss": 1.3001, "step": 54135 }, { "epoch": 1.596343800678166, "grad_norm": 2.7828612461295212, "learning_rate": 3.984000653158158e-06, "loss": 1.2548, "step": 54140 }, { "epoch": 1.5964912280701755, "grad_norm": 2.827211997870541, "learning_rate": 3.983358626467783e-06, "loss": 1.3459, "step": 54145 }, { "epoch": 1.596638655462185, "grad_norm": 2.8492196468480557, "learning_rate": 3.982716592910565e-06, "loss": 1.2845, "step": 54150 }, { "epoch": 1.5967860828541944, "grad_norm": 3.032490382777797, "learning_rate": 3.982074552505395e-06, "loss": 1.3084, "step": 54155 }, { "epoch": 1.596933510246204, "grad_norm": 2.9914158090569933, "learning_rate": 3.981432505271166e-06, "loss": 1.2815, "step": 54160 }, { "epoch": 1.5970809376382131, "grad_norm": 2.875972395111935, "learning_rate": 3.980790451226772e-06, "loss": 1.3098, "step": 54165 }, { "epoch": 1.5972283650302226, "grad_norm": 2.890976302194687, "learning_rate": 3.980148390391105e-06, "loss": 1.2829, "step": 54170 }, { "epoch": 1.597375792422232, "grad_norm": 2.9080951500791627, "learning_rate": 3.979506322783059e-06, "loss": 1.3125, "step": 54175 }, { "epoch": 1.5975232198142415, "grad_norm": 2.882298230265642, "learning_rate": 3.978864248421527e-06, "loss": 1.3283, "step": 54180 }, { "epoch": 1.597670647206251, "grad_norm": 2.8451826485551033, "learning_rate": 3.978222167325404e-06, "loss": 1.3048, "step": 54185 }, { "epoch": 1.5978180745982602, "grad_norm": 2.866576111030359, "learning_rate": 3.977580079513582e-06, "loss": 1.3157, "step": 54190 }, { "epoch": 1.5979655019902697, "grad_norm": 2.8806814833838854, "learning_rate": 3.976937985004957e-06, "loss": 1.3217, "step": 54195 }, { "epoch": 1.5981129293822791, "grad_norm": 2.8394502156394448, "learning_rate": 3.976295883818422e-06, "loss": 1.3032, "step": 54200 }, { "epoch": 1.5982603567742886, "grad_norm": 3.0344174138400524, "learning_rate": 3.975653775972873e-06, "loss": 1.3046, "step": 54205 }, { "epoch": 1.598407784166298, "grad_norm": 2.834804944193999, "learning_rate": 3.975011661487204e-06, "loss": 1.2842, "step": 54210 }, { "epoch": 1.5985552115583075, "grad_norm": 2.7371971425218367, "learning_rate": 3.974369540380309e-06, "loss": 1.2517, "step": 54215 }, { "epoch": 1.598702638950317, "grad_norm": 2.9020213531358565, "learning_rate": 3.973727412671084e-06, "loss": 1.339, "step": 54220 }, { "epoch": 1.5988500663423264, "grad_norm": 2.935845262228922, "learning_rate": 3.973085278378425e-06, "loss": 1.3233, "step": 54225 }, { "epoch": 1.5989974937343359, "grad_norm": 2.7609762547248065, "learning_rate": 3.972443137521227e-06, "loss": 1.3002, "step": 54230 }, { "epoch": 1.5991449211263453, "grad_norm": 2.8962192238539703, "learning_rate": 3.971800990118387e-06, "loss": 1.2924, "step": 54235 }, { "epoch": 1.5992923485183548, "grad_norm": 2.9743180326383394, "learning_rate": 3.971158836188798e-06, "loss": 1.295, "step": 54240 }, { "epoch": 1.5994397759103642, "grad_norm": 2.880360269232235, "learning_rate": 3.970516675751358e-06, "loss": 1.3005, "step": 54245 }, { "epoch": 1.5995872033023737, "grad_norm": 2.812820228656854, "learning_rate": 3.969874508824963e-06, "loss": 1.2782, "step": 54250 }, { "epoch": 1.5997346306943832, "grad_norm": 2.8292378786376973, "learning_rate": 3.969232335428509e-06, "loss": 1.3491, "step": 54255 }, { "epoch": 1.5998820580863926, "grad_norm": 2.9352730068389103, "learning_rate": 3.968590155580894e-06, "loss": 1.2981, "step": 54260 }, { "epoch": 1.6000294854784018, "grad_norm": 2.8892291035092152, "learning_rate": 3.967947969301013e-06, "loss": 1.3441, "step": 54265 }, { "epoch": 1.6001769128704113, "grad_norm": 2.9449244958632823, "learning_rate": 3.9673057766077655e-06, "loss": 1.3438, "step": 54270 }, { "epoch": 1.6003243402624208, "grad_norm": 2.7080812733360626, "learning_rate": 3.966663577520046e-06, "loss": 1.2952, "step": 54275 }, { "epoch": 1.6004717676544302, "grad_norm": 2.6934736523321683, "learning_rate": 3.966021372056754e-06, "loss": 1.2186, "step": 54280 }, { "epoch": 1.6006191950464397, "grad_norm": 2.9320090523974316, "learning_rate": 3.965379160236787e-06, "loss": 1.3489, "step": 54285 }, { "epoch": 1.600766622438449, "grad_norm": 2.746475772139975, "learning_rate": 3.964736942079042e-06, "loss": 1.2784, "step": 54290 }, { "epoch": 1.6009140498304584, "grad_norm": 2.848545106083205, "learning_rate": 3.9640947176024175e-06, "loss": 1.3307, "step": 54295 }, { "epoch": 1.6010614772224678, "grad_norm": 3.063332841804892, "learning_rate": 3.963452486825811e-06, "loss": 1.2933, "step": 54300 }, { "epoch": 1.6012089046144773, "grad_norm": 2.872793151150415, "learning_rate": 3.9628102497681215e-06, "loss": 1.3485, "step": 54305 }, { "epoch": 1.6013563320064867, "grad_norm": 2.7890705205492266, "learning_rate": 3.962168006448247e-06, "loss": 1.2714, "step": 54310 }, { "epoch": 1.6015037593984962, "grad_norm": 2.7112753772484224, "learning_rate": 3.9615257568850875e-06, "loss": 1.2989, "step": 54315 }, { "epoch": 1.6016511867905057, "grad_norm": 2.9186534068343066, "learning_rate": 3.96088350109754e-06, "loss": 1.3402, "step": 54320 }, { "epoch": 1.6017986141825151, "grad_norm": 2.826120222139387, "learning_rate": 3.960241239104505e-06, "loss": 1.2577, "step": 54325 }, { "epoch": 1.6019460415745246, "grad_norm": 2.7717884232332457, "learning_rate": 3.959598970924882e-06, "loss": 1.2974, "step": 54330 }, { "epoch": 1.602093468966534, "grad_norm": 2.8521673987577802, "learning_rate": 3.95895669657757e-06, "loss": 1.3226, "step": 54335 }, { "epoch": 1.6022408963585435, "grad_norm": 2.870607668127856, "learning_rate": 3.958314416081468e-06, "loss": 1.2956, "step": 54340 }, { "epoch": 1.602388323750553, "grad_norm": 2.6700141581662877, "learning_rate": 3.957672129455476e-06, "loss": 1.2942, "step": 54345 }, { "epoch": 1.6025357511425624, "grad_norm": 2.6916197306003826, "learning_rate": 3.957029836718495e-06, "loss": 1.2689, "step": 54350 }, { "epoch": 1.6026831785345719, "grad_norm": 2.8577052305843957, "learning_rate": 3.956387537889425e-06, "loss": 1.2966, "step": 54355 }, { "epoch": 1.602830605926581, "grad_norm": 2.847548249799431, "learning_rate": 3.955745232987166e-06, "loss": 1.2902, "step": 54360 }, { "epoch": 1.6029780333185906, "grad_norm": 2.8995502217985787, "learning_rate": 3.955102922030619e-06, "loss": 1.2824, "step": 54365 }, { "epoch": 1.6031254607106, "grad_norm": 2.798364540507528, "learning_rate": 3.954460605038683e-06, "loss": 1.2902, "step": 54370 }, { "epoch": 1.6032728881026095, "grad_norm": 2.8883484348853528, "learning_rate": 3.953818282030261e-06, "loss": 1.2938, "step": 54375 }, { "epoch": 1.603420315494619, "grad_norm": 2.7195714773379254, "learning_rate": 3.953175953024253e-06, "loss": 1.2895, "step": 54380 }, { "epoch": 1.6035677428866282, "grad_norm": 2.6814461771620506, "learning_rate": 3.952533618039561e-06, "loss": 1.2703, "step": 54385 }, { "epoch": 1.6037151702786376, "grad_norm": 3.023623167728897, "learning_rate": 3.951891277095086e-06, "loss": 1.294, "step": 54390 }, { "epoch": 1.603862597670647, "grad_norm": 2.764602056978495, "learning_rate": 3.951248930209729e-06, "loss": 1.3152, "step": 54395 }, { "epoch": 1.6040100250626566, "grad_norm": 2.7666822615605886, "learning_rate": 3.950606577402393e-06, "loss": 1.334, "step": 54400 }, { "epoch": 1.604157452454666, "grad_norm": 2.921789617345953, "learning_rate": 3.949964218691979e-06, "loss": 1.3054, "step": 54405 }, { "epoch": 1.6043048798466755, "grad_norm": 2.726579708920975, "learning_rate": 3.94932185409739e-06, "loss": 1.2728, "step": 54410 }, { "epoch": 1.604452307238685, "grad_norm": 2.6654459625824027, "learning_rate": 3.948679483637528e-06, "loss": 1.3265, "step": 54415 }, { "epoch": 1.6045997346306944, "grad_norm": 2.759049061250977, "learning_rate": 3.948037107331294e-06, "loss": 1.2815, "step": 54420 }, { "epoch": 1.6047471620227038, "grad_norm": 2.814548198268909, "learning_rate": 3.947394725197593e-06, "loss": 1.2928, "step": 54425 }, { "epoch": 1.6048945894147133, "grad_norm": 3.036892947064027, "learning_rate": 3.946752337255327e-06, "loss": 1.3051, "step": 54430 }, { "epoch": 1.6050420168067228, "grad_norm": 2.937132500468257, "learning_rate": 3.946109943523399e-06, "loss": 1.3443, "step": 54435 }, { "epoch": 1.6051894441987322, "grad_norm": 2.8653132970024355, "learning_rate": 3.945467544020711e-06, "loss": 1.287, "step": 54440 }, { "epoch": 1.6053368715907417, "grad_norm": 2.6893712037894226, "learning_rate": 3.944825138766169e-06, "loss": 1.2939, "step": 54445 }, { "epoch": 1.6054842989827511, "grad_norm": 2.907099371482367, "learning_rate": 3.944182727778673e-06, "loss": 1.2975, "step": 54450 }, { "epoch": 1.6056317263747606, "grad_norm": 2.9850518635906025, "learning_rate": 3.943540311077129e-06, "loss": 1.3319, "step": 54455 }, { "epoch": 1.6057791537667698, "grad_norm": 2.7704923771428596, "learning_rate": 3.942897888680441e-06, "loss": 1.283, "step": 54460 }, { "epoch": 1.6059265811587793, "grad_norm": 2.8026578403287488, "learning_rate": 3.942255460607512e-06, "loss": 1.2577, "step": 54465 }, { "epoch": 1.6060740085507887, "grad_norm": 2.9475230851086898, "learning_rate": 3.941613026877247e-06, "loss": 1.2718, "step": 54470 }, { "epoch": 1.6062214359427982, "grad_norm": 2.857138942056742, "learning_rate": 3.940970587508549e-06, "loss": 1.2853, "step": 54475 }, { "epoch": 1.6063688633348077, "grad_norm": 2.865423427453901, "learning_rate": 3.940328142520323e-06, "loss": 1.3213, "step": 54480 }, { "epoch": 1.606516290726817, "grad_norm": 2.920491977487943, "learning_rate": 3.939685691931475e-06, "loss": 1.3237, "step": 54485 }, { "epoch": 1.6066637181188264, "grad_norm": 2.877804791638503, "learning_rate": 3.9390432357609085e-06, "loss": 1.3076, "step": 54490 }, { "epoch": 1.6068111455108358, "grad_norm": 2.8371399066249925, "learning_rate": 3.93840077402753e-06, "loss": 1.3255, "step": 54495 }, { "epoch": 1.6069585729028453, "grad_norm": 2.9559700046076243, "learning_rate": 3.9377583067502425e-06, "loss": 1.294, "step": 54500 }, { "epoch": 1.6069585729028453, "eval_loss": 1.0878608226776123, "eval_runtime": 4.1382, "eval_samples_per_second": 95.695, "eval_steps_per_second": 3.141, "step": 54500 }, { "epoch": 1.6071060002948547, "grad_norm": 2.9399621978539523, "learning_rate": 3.937115833947952e-06, "loss": 1.308, "step": 54505 }, { "epoch": 1.6072534276868642, "grad_norm": 2.8878303903316915, "learning_rate": 3.936473355639565e-06, "loss": 1.2931, "step": 54510 }, { "epoch": 1.6074008550788736, "grad_norm": 2.7546361189464794, "learning_rate": 3.9358308718439865e-06, "loss": 1.3558, "step": 54515 }, { "epoch": 1.607548282470883, "grad_norm": 2.768728372534954, "learning_rate": 3.935188382580123e-06, "loss": 1.3416, "step": 54520 }, { "epoch": 1.6076957098628926, "grad_norm": 2.795333047676997, "learning_rate": 3.934545887866879e-06, "loss": 1.2927, "step": 54525 }, { "epoch": 1.607843137254902, "grad_norm": 2.81071674402955, "learning_rate": 3.933903387723162e-06, "loss": 1.3207, "step": 54530 }, { "epoch": 1.6079905646469115, "grad_norm": 2.7356577710156498, "learning_rate": 3.933260882167877e-06, "loss": 1.2899, "step": 54535 }, { "epoch": 1.608137992038921, "grad_norm": 2.8897695690844905, "learning_rate": 3.932618371219932e-06, "loss": 1.3463, "step": 54540 }, { "epoch": 1.6082854194309304, "grad_norm": 2.8801730039282356, "learning_rate": 3.931975854898234e-06, "loss": 1.3264, "step": 54545 }, { "epoch": 1.6084328468229399, "grad_norm": 2.731913805918305, "learning_rate": 3.931333333221688e-06, "loss": 1.2923, "step": 54550 }, { "epoch": 1.608580274214949, "grad_norm": 2.7740145047492124, "learning_rate": 3.9306908062092e-06, "loss": 1.2823, "step": 54555 }, { "epoch": 1.6087277016069585, "grad_norm": 2.899410625260519, "learning_rate": 3.9300482738796816e-06, "loss": 1.3164, "step": 54560 }, { "epoch": 1.608875128998968, "grad_norm": 2.81697909263961, "learning_rate": 3.929405736252034e-06, "loss": 1.2829, "step": 54565 }, { "epoch": 1.6090225563909775, "grad_norm": 2.911192879060997, "learning_rate": 3.92876319334517e-06, "loss": 1.2859, "step": 54570 }, { "epoch": 1.609169983782987, "grad_norm": 2.881004506403883, "learning_rate": 3.928120645177994e-06, "loss": 1.2927, "step": 54575 }, { "epoch": 1.6093174111749962, "grad_norm": 2.9078418175024265, "learning_rate": 3.9274780917694154e-06, "loss": 1.3318, "step": 54580 }, { "epoch": 1.6094648385670056, "grad_norm": 2.728577377422844, "learning_rate": 3.92683553313834e-06, "loss": 1.3323, "step": 54585 }, { "epoch": 1.609612265959015, "grad_norm": 2.8717571376256306, "learning_rate": 3.926192969303677e-06, "loss": 1.322, "step": 54590 }, { "epoch": 1.6097596933510245, "grad_norm": 2.8296054155079244, "learning_rate": 3.925550400284337e-06, "loss": 1.3023, "step": 54595 }, { "epoch": 1.609907120743034, "grad_norm": 2.8721781000756637, "learning_rate": 3.924907826099224e-06, "loss": 1.323, "step": 54600 }, { "epoch": 1.6100545481350435, "grad_norm": 2.63324603821648, "learning_rate": 3.9242652467672486e-06, "loss": 1.3076, "step": 54605 }, { "epoch": 1.610201975527053, "grad_norm": 2.7712192182217863, "learning_rate": 3.923622662307319e-06, "loss": 1.2814, "step": 54610 }, { "epoch": 1.6103494029190624, "grad_norm": 2.864211352190611, "learning_rate": 3.922980072738345e-06, "loss": 1.2819, "step": 54615 }, { "epoch": 1.6104968303110718, "grad_norm": 2.979120761811525, "learning_rate": 3.922337478079236e-06, "loss": 1.3345, "step": 54620 }, { "epoch": 1.6106442577030813, "grad_norm": 2.698756208628538, "learning_rate": 3.921694878348899e-06, "loss": 1.2834, "step": 54625 }, { "epoch": 1.6107916850950907, "grad_norm": 2.7426657127064953, "learning_rate": 3.921052273566244e-06, "loss": 1.334, "step": 54630 }, { "epoch": 1.6109391124871002, "grad_norm": 2.749690627789231, "learning_rate": 3.92040966375018e-06, "loss": 1.302, "step": 54635 }, { "epoch": 1.6110865398791097, "grad_norm": 2.8896963852487643, "learning_rate": 3.919767048919618e-06, "loss": 1.3294, "step": 54640 }, { "epoch": 1.6112339672711191, "grad_norm": 2.9214665402669278, "learning_rate": 3.919124429093465e-06, "loss": 1.3113, "step": 54645 }, { "epoch": 1.6113813946631286, "grad_norm": 3.0021870849000853, "learning_rate": 3.918481804290635e-06, "loss": 1.2886, "step": 54650 }, { "epoch": 1.6115288220551378, "grad_norm": 2.6223599734376233, "learning_rate": 3.917839174530033e-06, "loss": 1.3447, "step": 54655 }, { "epoch": 1.6116762494471473, "grad_norm": 2.7474896920259133, "learning_rate": 3.917196539830574e-06, "loss": 1.2939, "step": 54660 }, { "epoch": 1.6118236768391567, "grad_norm": 2.7706735904658246, "learning_rate": 3.916553900211164e-06, "loss": 1.2519, "step": 54665 }, { "epoch": 1.6119711042311662, "grad_norm": 2.8060535258051145, "learning_rate": 3.9159112556907155e-06, "loss": 1.2826, "step": 54670 }, { "epoch": 1.6121185316231754, "grad_norm": 2.662428162886127, "learning_rate": 3.9152686062881406e-06, "loss": 1.3195, "step": 54675 }, { "epoch": 1.6122659590151849, "grad_norm": 2.982232514336011, "learning_rate": 3.914625952022346e-06, "loss": 1.2814, "step": 54680 }, { "epoch": 1.6124133864071943, "grad_norm": 2.8346497089497573, "learning_rate": 3.913983292912246e-06, "loss": 1.3095, "step": 54685 }, { "epoch": 1.6125608137992038, "grad_norm": 3.306149815033627, "learning_rate": 3.913340628976751e-06, "loss": 1.2662, "step": 54690 }, { "epoch": 1.6127082411912133, "grad_norm": 2.7868083684346865, "learning_rate": 3.912697960234771e-06, "loss": 1.3373, "step": 54695 }, { "epoch": 1.6128556685832227, "grad_norm": 2.952412972900763, "learning_rate": 3.912055286705217e-06, "loss": 1.3073, "step": 54700 }, { "epoch": 1.6130030959752322, "grad_norm": 2.830960071669353, "learning_rate": 3.911412608407001e-06, "loss": 1.3494, "step": 54705 }, { "epoch": 1.6131505233672416, "grad_norm": 2.6725425235553026, "learning_rate": 3.910769925359036e-06, "loss": 1.3017, "step": 54710 }, { "epoch": 1.613297950759251, "grad_norm": 2.9662079494999425, "learning_rate": 3.910127237580232e-06, "loss": 1.27, "step": 54715 }, { "epoch": 1.6134453781512605, "grad_norm": 2.753407349881483, "learning_rate": 3.909484545089501e-06, "loss": 1.3031, "step": 54720 }, { "epoch": 1.61359280554327, "grad_norm": 2.7240250933748174, "learning_rate": 3.908841847905756e-06, "loss": 1.3146, "step": 54725 }, { "epoch": 1.6137402329352795, "grad_norm": 2.779203465318045, "learning_rate": 3.908199146047906e-06, "loss": 1.3331, "step": 54730 }, { "epoch": 1.613887660327289, "grad_norm": 2.9474004904785214, "learning_rate": 3.907556439534867e-06, "loss": 1.3243, "step": 54735 }, { "epoch": 1.6140350877192984, "grad_norm": 2.7073965993963487, "learning_rate": 3.906913728385549e-06, "loss": 1.3339, "step": 54740 }, { "epoch": 1.6141825151113078, "grad_norm": 2.925651712269595, "learning_rate": 3.906271012618866e-06, "loss": 1.276, "step": 54745 }, { "epoch": 1.614329942503317, "grad_norm": 2.8892961767497476, "learning_rate": 3.9056282922537305e-06, "loss": 1.2837, "step": 54750 }, { "epoch": 1.6144773698953265, "grad_norm": 2.8127478782940094, "learning_rate": 3.904985567309055e-06, "loss": 1.3319, "step": 54755 }, { "epoch": 1.614624797287336, "grad_norm": 2.8210562982341485, "learning_rate": 3.904342837803751e-06, "loss": 1.2661, "step": 54760 }, { "epoch": 1.6147722246793454, "grad_norm": 2.826966413507372, "learning_rate": 3.903700103756733e-06, "loss": 1.2489, "step": 54765 }, { "epoch": 1.614919652071355, "grad_norm": 2.7653368772780262, "learning_rate": 3.903057365186914e-06, "loss": 1.2995, "step": 54770 }, { "epoch": 1.6150670794633641, "grad_norm": 2.8145322695704977, "learning_rate": 3.902414622113207e-06, "loss": 1.3285, "step": 54775 }, { "epoch": 1.6152145068553736, "grad_norm": 2.9026188548215943, "learning_rate": 3.901771874554526e-06, "loss": 1.2936, "step": 54780 }, { "epoch": 1.615361934247383, "grad_norm": 2.812322071818488, "learning_rate": 3.9011291225297846e-06, "loss": 1.3481, "step": 54785 }, { "epoch": 1.6155093616393925, "grad_norm": 2.955419963689061, "learning_rate": 3.900486366057896e-06, "loss": 1.2471, "step": 54790 }, { "epoch": 1.615656789031402, "grad_norm": 2.825290038989397, "learning_rate": 3.8998436051577735e-06, "loss": 1.3279, "step": 54795 }, { "epoch": 1.6158042164234114, "grad_norm": 2.7067137053310195, "learning_rate": 3.899200839848332e-06, "loss": 1.2632, "step": 54800 }, { "epoch": 1.615951643815421, "grad_norm": 2.8174559672703436, "learning_rate": 3.8985580701484865e-06, "loss": 1.29, "step": 54805 }, { "epoch": 1.6160990712074303, "grad_norm": 2.8460410263931255, "learning_rate": 3.89791529607715e-06, "loss": 1.3033, "step": 54810 }, { "epoch": 1.6162464985994398, "grad_norm": 2.961861134088804, "learning_rate": 3.897272517653237e-06, "loss": 1.2704, "step": 54815 }, { "epoch": 1.6163939259914493, "grad_norm": 2.8192901531354457, "learning_rate": 3.896629734895662e-06, "loss": 1.3497, "step": 54820 }, { "epoch": 1.6165413533834587, "grad_norm": 2.804446108253281, "learning_rate": 3.895986947823339e-06, "loss": 1.3302, "step": 54825 }, { "epoch": 1.6166887807754682, "grad_norm": 2.842977257378229, "learning_rate": 3.895344156455183e-06, "loss": 1.2606, "step": 54830 }, { "epoch": 1.6168362081674776, "grad_norm": 2.8391292175518057, "learning_rate": 3.894701360810111e-06, "loss": 1.353, "step": 54835 }, { "epoch": 1.616983635559487, "grad_norm": 2.7656631193218852, "learning_rate": 3.894058560907034e-06, "loss": 1.2715, "step": 54840 }, { "epoch": 1.6171310629514963, "grad_norm": 2.920424334153359, "learning_rate": 3.893415756764871e-06, "loss": 1.329, "step": 54845 }, { "epoch": 1.6172784903435058, "grad_norm": 2.7630491583317522, "learning_rate": 3.8927729484025346e-06, "loss": 1.2817, "step": 54850 }, { "epoch": 1.6174259177355153, "grad_norm": 2.88400673085634, "learning_rate": 3.892130135838942e-06, "loss": 1.3087, "step": 54855 }, { "epoch": 1.6175733451275247, "grad_norm": 2.852877205607646, "learning_rate": 3.891487319093008e-06, "loss": 1.2959, "step": 54860 }, { "epoch": 1.6177207725195342, "grad_norm": 2.848173635580972, "learning_rate": 3.890844498183647e-06, "loss": 1.2992, "step": 54865 }, { "epoch": 1.6178681999115434, "grad_norm": 2.753424909839802, "learning_rate": 3.890201673129776e-06, "loss": 1.2936, "step": 54870 }, { "epoch": 1.6180156273035529, "grad_norm": 2.853206267204662, "learning_rate": 3.889558843950311e-06, "loss": 1.3008, "step": 54875 }, { "epoch": 1.6181630546955623, "grad_norm": 2.7304080861461095, "learning_rate": 3.8889160106641674e-06, "loss": 1.3175, "step": 54880 }, { "epoch": 1.6183104820875718, "grad_norm": 2.8159630783136556, "learning_rate": 3.8882731732902635e-06, "loss": 1.2631, "step": 54885 }, { "epoch": 1.6184579094795812, "grad_norm": 2.775390331793846, "learning_rate": 3.8876303318475105e-06, "loss": 1.2486, "step": 54890 }, { "epoch": 1.6186053368715907, "grad_norm": 2.856876658991784, "learning_rate": 3.886987486354829e-06, "loss": 1.276, "step": 54895 }, { "epoch": 1.6187527642636002, "grad_norm": 2.9230906408376676, "learning_rate": 3.886344636831135e-06, "loss": 1.2968, "step": 54900 }, { "epoch": 1.6189001916556096, "grad_norm": 2.7893157166391136, "learning_rate": 3.885701783295344e-06, "loss": 1.3142, "step": 54905 }, { "epoch": 1.619047619047619, "grad_norm": 2.7543378233617415, "learning_rate": 3.885058925766373e-06, "loss": 1.2892, "step": 54910 }, { "epoch": 1.6191950464396285, "grad_norm": 2.798964441748253, "learning_rate": 3.884416064263139e-06, "loss": 1.3425, "step": 54915 }, { "epoch": 1.619342473831638, "grad_norm": 2.6555133437767107, "learning_rate": 3.883773198804558e-06, "loss": 1.3308, "step": 54920 }, { "epoch": 1.6194899012236474, "grad_norm": 2.7670639125614054, "learning_rate": 3.883130329409548e-06, "loss": 1.3061, "step": 54925 }, { "epoch": 1.619637328615657, "grad_norm": 2.8833543956178014, "learning_rate": 3.882487456097027e-06, "loss": 1.2983, "step": 54930 }, { "epoch": 1.6197847560076664, "grad_norm": 3.028965167503626, "learning_rate": 3.881844578885911e-06, "loss": 1.3318, "step": 54935 }, { "epoch": 1.6199321833996758, "grad_norm": 2.8417445081128325, "learning_rate": 3.881201697795117e-06, "loss": 1.2967, "step": 54940 }, { "epoch": 1.620079610791685, "grad_norm": 2.951615315249817, "learning_rate": 3.880558812843564e-06, "loss": 1.316, "step": 54945 }, { "epoch": 1.6202270381836945, "grad_norm": 2.8036486697599985, "learning_rate": 3.879915924050169e-06, "loss": 1.3781, "step": 54950 }, { "epoch": 1.620374465575704, "grad_norm": 2.9872842656376832, "learning_rate": 3.879273031433849e-06, "loss": 1.3096, "step": 54955 }, { "epoch": 1.6205218929677134, "grad_norm": 2.6802559158152697, "learning_rate": 3.878630135013523e-06, "loss": 1.2956, "step": 54960 }, { "epoch": 1.6206693203597229, "grad_norm": 2.8853905412896745, "learning_rate": 3.877987234808109e-06, "loss": 1.2801, "step": 54965 }, { "epoch": 1.6208167477517321, "grad_norm": 2.771225982914191, "learning_rate": 3.877344330836523e-06, "loss": 1.3283, "step": 54970 }, { "epoch": 1.6209641751437416, "grad_norm": 2.8934241609104747, "learning_rate": 3.876701423117685e-06, "loss": 1.351, "step": 54975 }, { "epoch": 1.621111602535751, "grad_norm": 2.792712378623245, "learning_rate": 3.876058511670513e-06, "loss": 1.3173, "step": 54980 }, { "epoch": 1.6212590299277605, "grad_norm": 2.7473884537167077, "learning_rate": 3.875415596513926e-06, "loss": 1.28, "step": 54985 }, { "epoch": 1.62140645731977, "grad_norm": 2.802248929839193, "learning_rate": 3.8747726776668415e-06, "loss": 1.2823, "step": 54990 }, { "epoch": 1.6215538847117794, "grad_norm": 2.886963038994955, "learning_rate": 3.874129755148179e-06, "loss": 1.3271, "step": 54995 }, { "epoch": 1.6217013121037889, "grad_norm": 2.909404798395759, "learning_rate": 3.8734868289768555e-06, "loss": 1.3102, "step": 55000 }, { "epoch": 1.6217013121037889, "eval_loss": 1.0867574214935303, "eval_runtime": 4.2645, "eval_samples_per_second": 92.86, "eval_steps_per_second": 3.048, "step": 55000 }, { "epoch": 1.6218487394957983, "grad_norm": 2.7583363233290266, "learning_rate": 3.8728438991717915e-06, "loss": 1.3557, "step": 55005 }, { "epoch": 1.6219961668878078, "grad_norm": 2.868677234749905, "learning_rate": 3.872200965751905e-06, "loss": 1.2897, "step": 55010 }, { "epoch": 1.6221435942798172, "grad_norm": 2.871086721391701, "learning_rate": 3.8715580287361175e-06, "loss": 1.3018, "step": 55015 }, { "epoch": 1.6222910216718267, "grad_norm": 2.873496093229147, "learning_rate": 3.870915088143345e-06, "loss": 1.3278, "step": 55020 }, { "epoch": 1.6224384490638362, "grad_norm": 2.7530236358304334, "learning_rate": 3.870272143992507e-06, "loss": 1.2892, "step": 55025 }, { "epoch": 1.6225858764558456, "grad_norm": 2.7665883624530365, "learning_rate": 3.869629196302525e-06, "loss": 1.3132, "step": 55030 }, { "epoch": 1.622733303847855, "grad_norm": 2.904516000903978, "learning_rate": 3.868986245092317e-06, "loss": 1.2783, "step": 55035 }, { "epoch": 1.6228807312398643, "grad_norm": 2.839869523830247, "learning_rate": 3.868343290380802e-06, "loss": 1.3225, "step": 55040 }, { "epoch": 1.6230281586318738, "grad_norm": 2.7771678128957897, "learning_rate": 3.867700332186902e-06, "loss": 1.2847, "step": 55045 }, { "epoch": 1.6231755860238832, "grad_norm": 3.156069390741173, "learning_rate": 3.8670573705295344e-06, "loss": 1.3583, "step": 55050 }, { "epoch": 1.6233230134158927, "grad_norm": 2.8441381000422243, "learning_rate": 3.86641440542762e-06, "loss": 1.3444, "step": 55055 }, { "epoch": 1.6234704408079021, "grad_norm": 3.0195655802136288, "learning_rate": 3.865771436900079e-06, "loss": 1.2538, "step": 55060 }, { "epoch": 1.6236178681999114, "grad_norm": 3.0095671586211075, "learning_rate": 3.865128464965831e-06, "loss": 1.2975, "step": 55065 }, { "epoch": 1.6237652955919208, "grad_norm": 2.727340631938193, "learning_rate": 3.864485489643798e-06, "loss": 1.3536, "step": 55070 }, { "epoch": 1.6239127229839303, "grad_norm": 2.885865747570428, "learning_rate": 3.863842510952896e-06, "loss": 1.2416, "step": 55075 }, { "epoch": 1.6240601503759398, "grad_norm": 2.9379838852396203, "learning_rate": 3.863199528912051e-06, "loss": 1.362, "step": 55080 }, { "epoch": 1.6242075777679492, "grad_norm": 2.706001003917637, "learning_rate": 3.86255654354018e-06, "loss": 1.2925, "step": 55085 }, { "epoch": 1.6243550051599587, "grad_norm": 2.8135109224899897, "learning_rate": 3.861913554856203e-06, "loss": 1.2717, "step": 55090 }, { "epoch": 1.6245024325519681, "grad_norm": 2.7915424452679987, "learning_rate": 3.861270562879043e-06, "loss": 1.2852, "step": 55095 }, { "epoch": 1.6246498599439776, "grad_norm": 2.8405255922927752, "learning_rate": 3.860627567627618e-06, "loss": 1.2886, "step": 55100 }, { "epoch": 1.624797287335987, "grad_norm": 2.750654569899172, "learning_rate": 3.859984569120852e-06, "loss": 1.2912, "step": 55105 }, { "epoch": 1.6249447147279965, "grad_norm": 2.835428299625827, "learning_rate": 3.859341567377663e-06, "loss": 1.3061, "step": 55110 }, { "epoch": 1.625092142120006, "grad_norm": 2.846211846170578, "learning_rate": 3.858698562416975e-06, "loss": 1.3434, "step": 55115 }, { "epoch": 1.6252395695120154, "grad_norm": 2.638464773660148, "learning_rate": 3.858055554257707e-06, "loss": 1.3025, "step": 55120 }, { "epoch": 1.6253869969040249, "grad_norm": 2.8263629054192823, "learning_rate": 3.857412542918781e-06, "loss": 1.302, "step": 55125 }, { "epoch": 1.6255344242960343, "grad_norm": 2.725468769252566, "learning_rate": 3.856769528419118e-06, "loss": 1.263, "step": 55130 }, { "epoch": 1.6256818516880438, "grad_norm": 2.8284510110168766, "learning_rate": 3.85612651077764e-06, "loss": 1.3129, "step": 55135 }, { "epoch": 1.625829279080053, "grad_norm": 2.9291730871948385, "learning_rate": 3.855483490013269e-06, "loss": 1.3243, "step": 55140 }, { "epoch": 1.6259767064720625, "grad_norm": 2.8888800302688264, "learning_rate": 3.854840466144926e-06, "loss": 1.3359, "step": 55145 }, { "epoch": 1.626124133864072, "grad_norm": 2.8600434534024184, "learning_rate": 3.854197439191531e-06, "loss": 1.259, "step": 55150 }, { "epoch": 1.6262715612560814, "grad_norm": 2.7388161544985166, "learning_rate": 3.853554409172008e-06, "loss": 1.2387, "step": 55155 }, { "epoch": 1.6264189886480909, "grad_norm": 2.8430218060761625, "learning_rate": 3.8529113761052775e-06, "loss": 1.2663, "step": 55160 }, { "epoch": 1.6265664160401, "grad_norm": 2.9433462322989863, "learning_rate": 3.852268340010264e-06, "loss": 1.3525, "step": 55165 }, { "epoch": 1.6267138434321096, "grad_norm": 2.9091898564270156, "learning_rate": 3.851625300905887e-06, "loss": 1.3024, "step": 55170 }, { "epoch": 1.626861270824119, "grad_norm": 2.8194298420304578, "learning_rate": 3.8509822588110695e-06, "loss": 1.337, "step": 55175 }, { "epoch": 1.6270086982161285, "grad_norm": 2.797014805819018, "learning_rate": 3.850339213744733e-06, "loss": 1.2874, "step": 55180 }, { "epoch": 1.627156125608138, "grad_norm": 2.8015718226391417, "learning_rate": 3.849696165725801e-06, "loss": 1.3304, "step": 55185 }, { "epoch": 1.6273035530001474, "grad_norm": 3.002650793930089, "learning_rate": 3.8490531147731955e-06, "loss": 1.341, "step": 55190 }, { "epoch": 1.6274509803921569, "grad_norm": 2.872953735335655, "learning_rate": 3.84841006090584e-06, "loss": 1.3303, "step": 55195 }, { "epoch": 1.6275984077841663, "grad_norm": 2.757438253948304, "learning_rate": 3.847767004142654e-06, "loss": 1.2627, "step": 55200 }, { "epoch": 1.6277458351761758, "grad_norm": 2.725389090691377, "learning_rate": 3.847123944502563e-06, "loss": 1.2706, "step": 55205 }, { "epoch": 1.6278932625681852, "grad_norm": 2.8387804441231297, "learning_rate": 3.846480882004491e-06, "loss": 1.2947, "step": 55210 }, { "epoch": 1.6280406899601947, "grad_norm": 2.840326549141503, "learning_rate": 3.8458378166673566e-06, "loss": 1.3387, "step": 55215 }, { "epoch": 1.6281881173522041, "grad_norm": 2.768063401641001, "learning_rate": 3.845194748510086e-06, "loss": 1.2608, "step": 55220 }, { "epoch": 1.6283355447442136, "grad_norm": 2.7525558403755364, "learning_rate": 3.844551677551602e-06, "loss": 1.2801, "step": 55225 }, { "epoch": 1.628482972136223, "grad_norm": 2.877833750441213, "learning_rate": 3.843908603810825e-06, "loss": 1.3167, "step": 55230 }, { "epoch": 1.6286303995282323, "grad_norm": 2.738749744423951, "learning_rate": 3.843265527306681e-06, "loss": 1.3303, "step": 55235 }, { "epoch": 1.6287778269202418, "grad_norm": 2.852088898709506, "learning_rate": 3.8426224480580925e-06, "loss": 1.3089, "step": 55240 }, { "epoch": 1.6289252543122512, "grad_norm": 2.7969470301541737, "learning_rate": 3.841979366083983e-06, "loss": 1.3271, "step": 55245 }, { "epoch": 1.6290726817042607, "grad_norm": 2.7721679800325605, "learning_rate": 3.841336281403275e-06, "loss": 1.3446, "step": 55250 }, { "epoch": 1.6292201090962701, "grad_norm": 2.8253479544574596, "learning_rate": 3.840693194034893e-06, "loss": 1.2634, "step": 55255 }, { "epoch": 1.6293675364882794, "grad_norm": 2.865643734419678, "learning_rate": 3.840050103997759e-06, "loss": 1.2641, "step": 55260 }, { "epoch": 1.6295149638802888, "grad_norm": 2.9244825721239134, "learning_rate": 3.839407011310799e-06, "loss": 1.3169, "step": 55265 }, { "epoch": 1.6296623912722983, "grad_norm": 2.9083179015278464, "learning_rate": 3.838763915992937e-06, "loss": 1.3126, "step": 55270 }, { "epoch": 1.6298098186643077, "grad_norm": 2.773544238289721, "learning_rate": 3.8381208180630934e-06, "loss": 1.2728, "step": 55275 }, { "epoch": 1.6299572460563172, "grad_norm": 2.8704262475551494, "learning_rate": 3.837477717540195e-06, "loss": 1.2964, "step": 55280 }, { "epoch": 1.6301046734483267, "grad_norm": 2.892993537349329, "learning_rate": 3.836834614443164e-06, "loss": 1.2627, "step": 55285 }, { "epoch": 1.6302521008403361, "grad_norm": 2.8604487145458837, "learning_rate": 3.836191508790926e-06, "loss": 1.3359, "step": 55290 }, { "epoch": 1.6303995282323456, "grad_norm": 2.7856731985979484, "learning_rate": 3.835548400602404e-06, "loss": 1.2374, "step": 55295 }, { "epoch": 1.630546955624355, "grad_norm": 2.8026688088337335, "learning_rate": 3.834905289896524e-06, "loss": 1.3103, "step": 55300 }, { "epoch": 1.6306943830163645, "grad_norm": 2.836071446935419, "learning_rate": 3.834262176692207e-06, "loss": 1.3181, "step": 55305 }, { "epoch": 1.630841810408374, "grad_norm": 3.0109081356870173, "learning_rate": 3.83361906100838e-06, "loss": 1.3194, "step": 55310 }, { "epoch": 1.6309892378003834, "grad_norm": 2.8642148294109173, "learning_rate": 3.832975942863966e-06, "loss": 1.3396, "step": 55315 }, { "epoch": 1.6311366651923929, "grad_norm": 2.7086634885538774, "learning_rate": 3.83233282227789e-06, "loss": 1.2988, "step": 55320 }, { "epoch": 1.6312840925844023, "grad_norm": 2.769461939554401, "learning_rate": 3.831689699269079e-06, "loss": 1.3139, "step": 55325 }, { "epoch": 1.6314315199764118, "grad_norm": 2.7700667506238146, "learning_rate": 3.831046573856453e-06, "loss": 1.2768, "step": 55330 }, { "epoch": 1.631578947368421, "grad_norm": 2.8322200911690376, "learning_rate": 3.83040344605894e-06, "loss": 1.2758, "step": 55335 }, { "epoch": 1.6317263747604305, "grad_norm": 2.85042759741529, "learning_rate": 3.829760315895464e-06, "loss": 1.32, "step": 55340 }, { "epoch": 1.63187380215244, "grad_norm": 2.784919573254152, "learning_rate": 3.82911718338495e-06, "loss": 1.3128, "step": 55345 }, { "epoch": 1.6320212295444494, "grad_norm": 2.856090883205489, "learning_rate": 3.828474048546321e-06, "loss": 1.3299, "step": 55350 }, { "epoch": 1.6321686569364586, "grad_norm": 2.836458689508323, "learning_rate": 3.8278309113985045e-06, "loss": 1.3263, "step": 55355 }, { "epoch": 1.632316084328468, "grad_norm": 2.8227089442101336, "learning_rate": 3.827187771960425e-06, "loss": 1.2673, "step": 55360 }, { "epoch": 1.6324635117204775, "grad_norm": 2.9293018202611116, "learning_rate": 3.8265446302510055e-06, "loss": 1.3155, "step": 55365 }, { "epoch": 1.632610939112487, "grad_norm": 2.7591692578590297, "learning_rate": 3.825901486289173e-06, "loss": 1.3173, "step": 55370 }, { "epoch": 1.6327583665044965, "grad_norm": 2.768337424955685, "learning_rate": 3.825258340093855e-06, "loss": 1.2909, "step": 55375 }, { "epoch": 1.632905793896506, "grad_norm": 2.754344544844129, "learning_rate": 3.824615191683973e-06, "loss": 1.2833, "step": 55380 }, { "epoch": 1.6330532212885154, "grad_norm": 2.891555008475053, "learning_rate": 3.823972041078452e-06, "loss": 1.3031, "step": 55385 }, { "epoch": 1.6332006486805248, "grad_norm": 2.7850014308780593, "learning_rate": 3.823328888296222e-06, "loss": 1.3299, "step": 55390 }, { "epoch": 1.6333480760725343, "grad_norm": 2.814033120733726, "learning_rate": 3.822685733356204e-06, "loss": 1.3267, "step": 55395 }, { "epoch": 1.6334955034645438, "grad_norm": 3.0256566580858566, "learning_rate": 3.822042576277326e-06, "loss": 1.3083, "step": 55400 }, { "epoch": 1.6336429308565532, "grad_norm": 2.7946867895217644, "learning_rate": 3.8213994170785135e-06, "loss": 1.302, "step": 55405 }, { "epoch": 1.6337903582485627, "grad_norm": 2.81905908485282, "learning_rate": 3.820756255778691e-06, "loss": 1.2904, "step": 55410 }, { "epoch": 1.6339377856405721, "grad_norm": 2.959104321500827, "learning_rate": 3.820113092396785e-06, "loss": 1.3458, "step": 55415 }, { "epoch": 1.6340852130325816, "grad_norm": 2.8812830330981365, "learning_rate": 3.81946992695172e-06, "loss": 1.3333, "step": 55420 }, { "epoch": 1.634232640424591, "grad_norm": 2.8767027918380106, "learning_rate": 3.818826759462425e-06, "loss": 1.2838, "step": 55425 }, { "epoch": 1.6343800678166003, "grad_norm": 2.8917205069279555, "learning_rate": 3.818183589947823e-06, "loss": 1.273, "step": 55430 }, { "epoch": 1.6345274952086097, "grad_norm": 2.814242616554383, "learning_rate": 3.81754041842684e-06, "loss": 1.2931, "step": 55435 }, { "epoch": 1.6346749226006192, "grad_norm": 2.7272623166582632, "learning_rate": 3.816897244918405e-06, "loss": 1.2858, "step": 55440 }, { "epoch": 1.6348223499926287, "grad_norm": 2.692117307595641, "learning_rate": 3.816254069441441e-06, "loss": 1.2447, "step": 55445 }, { "epoch": 1.6349697773846381, "grad_norm": 2.7996987352917357, "learning_rate": 3.815610892014875e-06, "loss": 1.3268, "step": 55450 }, { "epoch": 1.6351172047766473, "grad_norm": 2.9039683341987597, "learning_rate": 3.814967712657636e-06, "loss": 1.3193, "step": 55455 }, { "epoch": 1.6352646321686568, "grad_norm": 2.7697675927428387, "learning_rate": 3.8143245313886456e-06, "loss": 1.2851, "step": 55460 }, { "epoch": 1.6354120595606663, "grad_norm": 2.7456270641387888, "learning_rate": 3.8136813482268326e-06, "loss": 1.2927, "step": 55465 }, { "epoch": 1.6355594869526757, "grad_norm": 2.7584713490869253, "learning_rate": 3.813038163191123e-06, "loss": 1.3083, "step": 55470 }, { "epoch": 1.6357069143446852, "grad_norm": 2.8261598991044385, "learning_rate": 3.8123949763004444e-06, "loss": 1.2851, "step": 55475 }, { "epoch": 1.6358543417366946, "grad_norm": 2.9736239608717248, "learning_rate": 3.8117517875737223e-06, "loss": 1.2903, "step": 55480 }, { "epoch": 1.636001769128704, "grad_norm": 2.8643977871409443, "learning_rate": 3.8111085970298837e-06, "loss": 1.2649, "step": 55485 }, { "epoch": 1.6361491965207136, "grad_norm": 2.9369310443359193, "learning_rate": 3.8104654046878543e-06, "loss": 1.3691, "step": 55490 }, { "epoch": 1.636296623912723, "grad_norm": 2.753776315847331, "learning_rate": 3.8098222105665607e-06, "loss": 1.3229, "step": 55495 }, { "epoch": 1.6364440513047325, "grad_norm": 2.8267257516276154, "learning_rate": 3.809179014684931e-06, "loss": 1.2641, "step": 55500 }, { "epoch": 1.6364440513047325, "eval_loss": 1.085837721824646, "eval_runtime": 4.1373, "eval_samples_per_second": 95.715, "eval_steps_per_second": 3.142, "step": 55500 }, { "epoch": 1.636591478696742, "grad_norm": 2.773418239604708, "learning_rate": 3.808535817061891e-06, "loss": 1.2995, "step": 55505 }, { "epoch": 1.6367389060887514, "grad_norm": 2.791683330319082, "learning_rate": 3.8078926177163677e-06, "loss": 1.3225, "step": 55510 }, { "epoch": 1.6368863334807608, "grad_norm": 3.0013169639708708, "learning_rate": 3.807249416667288e-06, "loss": 1.3207, "step": 55515 }, { "epoch": 1.6370337608727703, "grad_norm": 2.818164578124269, "learning_rate": 3.806606213933579e-06, "loss": 1.3401, "step": 55520 }, { "epoch": 1.6371811882647795, "grad_norm": 2.929842129633237, "learning_rate": 3.805963009534167e-06, "loss": 1.3324, "step": 55525 }, { "epoch": 1.637328615656789, "grad_norm": 2.7841967352236914, "learning_rate": 3.8053198034879805e-06, "loss": 1.2829, "step": 55530 }, { "epoch": 1.6374760430487985, "grad_norm": 2.753502127052892, "learning_rate": 3.8046765958139445e-06, "loss": 1.3055, "step": 55535 }, { "epoch": 1.637623470440808, "grad_norm": 2.8932296390993395, "learning_rate": 3.804033386530989e-06, "loss": 1.2361, "step": 55540 }, { "epoch": 1.6377708978328174, "grad_norm": 3.130789002104928, "learning_rate": 3.8033901756580378e-06, "loss": 1.312, "step": 55545 }, { "epoch": 1.6379183252248266, "grad_norm": 2.770759489668383, "learning_rate": 3.80274696321402e-06, "loss": 1.2856, "step": 55550 }, { "epoch": 1.638065752616836, "grad_norm": 2.807403468328176, "learning_rate": 3.802103749217862e-06, "loss": 1.2773, "step": 55555 }, { "epoch": 1.6382131800088455, "grad_norm": 2.849424268494769, "learning_rate": 3.801460533688493e-06, "loss": 1.3183, "step": 55560 }, { "epoch": 1.638360607400855, "grad_norm": 2.8755487103313033, "learning_rate": 3.800817316644839e-06, "loss": 1.2985, "step": 55565 }, { "epoch": 1.6385080347928644, "grad_norm": 2.9163093762450467, "learning_rate": 3.8001740981058265e-06, "loss": 1.3261, "step": 55570 }, { "epoch": 1.638655462184874, "grad_norm": 2.824809529612166, "learning_rate": 3.7995308780903833e-06, "loss": 1.3375, "step": 55575 }, { "epoch": 1.6388028895768834, "grad_norm": 2.8246719482585427, "learning_rate": 3.7988876566174386e-06, "loss": 1.3054, "step": 55580 }, { "epoch": 1.6389503169688928, "grad_norm": 2.790417363620532, "learning_rate": 3.798244433705919e-06, "loss": 1.3314, "step": 55585 }, { "epoch": 1.6390977443609023, "grad_norm": 2.8662798236035414, "learning_rate": 3.797601209374752e-06, "loss": 1.2471, "step": 55590 }, { "epoch": 1.6392451717529117, "grad_norm": 2.8084608770336343, "learning_rate": 3.796957983642864e-06, "loss": 1.3117, "step": 55595 }, { "epoch": 1.6393925991449212, "grad_norm": 2.854813743277487, "learning_rate": 3.796314756529184e-06, "loss": 1.2741, "step": 55600 }, { "epoch": 1.6395400265369307, "grad_norm": 3.0216225013572315, "learning_rate": 3.79567152805264e-06, "loss": 1.317, "step": 55605 }, { "epoch": 1.63968745392894, "grad_norm": 2.8519815731212455, "learning_rate": 3.7950282982321586e-06, "loss": 1.3268, "step": 55610 }, { "epoch": 1.6398348813209496, "grad_norm": 2.928203685636912, "learning_rate": 3.7943850670866686e-06, "loss": 1.3537, "step": 55615 }, { "epoch": 1.639982308712959, "grad_norm": 2.81290398028193, "learning_rate": 3.793741834635097e-06, "loss": 1.3136, "step": 55620 }, { "epoch": 1.6401297361049683, "grad_norm": 2.7722838325230432, "learning_rate": 3.7930986008963715e-06, "loss": 1.2953, "step": 55625 }, { "epoch": 1.6402771634969777, "grad_norm": 2.850372593615861, "learning_rate": 3.7924553658894205e-06, "loss": 1.3777, "step": 55630 }, { "epoch": 1.6404245908889872, "grad_norm": 2.729868998714068, "learning_rate": 3.791812129633173e-06, "loss": 1.2982, "step": 55635 }, { "epoch": 1.6405720182809966, "grad_norm": 2.659271279713795, "learning_rate": 3.7911688921465545e-06, "loss": 1.2841, "step": 55640 }, { "epoch": 1.640719445673006, "grad_norm": 2.937002154714271, "learning_rate": 3.7905256534484952e-06, "loss": 1.3322, "step": 55645 }, { "epoch": 1.6408668730650153, "grad_norm": 2.7747030363347287, "learning_rate": 3.7898824135579215e-06, "loss": 1.2555, "step": 55650 }, { "epoch": 1.6410143004570248, "grad_norm": 2.8887695864822542, "learning_rate": 3.7892391724937625e-06, "loss": 1.2766, "step": 55655 }, { "epoch": 1.6411617278490342, "grad_norm": 2.805275896895419, "learning_rate": 3.788595930274947e-06, "loss": 1.3159, "step": 55660 }, { "epoch": 1.6413091552410437, "grad_norm": 2.902770874739739, "learning_rate": 3.787952686920401e-06, "loss": 1.2874, "step": 55665 }, { "epoch": 1.6414565826330532, "grad_norm": 2.924016794424089, "learning_rate": 3.787309442449055e-06, "loss": 1.3447, "step": 55670 }, { "epoch": 1.6416040100250626, "grad_norm": 2.9077325848576114, "learning_rate": 3.7866661968798354e-06, "loss": 1.3714, "step": 55675 }, { "epoch": 1.641751437417072, "grad_norm": 2.920713123572623, "learning_rate": 3.7860229502316706e-06, "loss": 1.3285, "step": 55680 }, { "epoch": 1.6418988648090815, "grad_norm": 2.867253472845987, "learning_rate": 3.7853797025234893e-06, "loss": 1.3488, "step": 55685 }, { "epoch": 1.642046292201091, "grad_norm": 3.5443027449472098, "learning_rate": 3.784736453774221e-06, "loss": 1.3167, "step": 55690 }, { "epoch": 1.6421937195931005, "grad_norm": 2.9768095804907073, "learning_rate": 3.784093204002792e-06, "loss": 1.295, "step": 55695 }, { "epoch": 1.64234114698511, "grad_norm": 2.815435668348953, "learning_rate": 3.783449953228132e-06, "loss": 1.2598, "step": 55700 }, { "epoch": 1.6424885743771194, "grad_norm": 2.9548830983722083, "learning_rate": 3.7828067014691682e-06, "loss": 1.3632, "step": 55705 }, { "epoch": 1.6426360017691288, "grad_norm": 2.741699245946121, "learning_rate": 3.7821634487448303e-06, "loss": 1.2829, "step": 55710 }, { "epoch": 1.6427834291611383, "grad_norm": 2.9334689711789443, "learning_rate": 3.781520195074047e-06, "loss": 1.3227, "step": 55715 }, { "epoch": 1.6429308565531475, "grad_norm": 2.9265906274434594, "learning_rate": 3.7808769404757452e-06, "loss": 1.3179, "step": 55720 }, { "epoch": 1.643078283945157, "grad_norm": 2.840587131495281, "learning_rate": 3.7802336849688533e-06, "loss": 1.3498, "step": 55725 }, { "epoch": 1.6432257113371664, "grad_norm": 2.7325289205138312, "learning_rate": 3.7795904285723014e-06, "loss": 1.3201, "step": 55730 }, { "epoch": 1.643373138729176, "grad_norm": 2.895327859869813, "learning_rate": 3.7789471713050183e-06, "loss": 1.3239, "step": 55735 }, { "epoch": 1.6435205661211854, "grad_norm": 2.9421643499900423, "learning_rate": 3.77830391318593e-06, "loss": 1.3334, "step": 55740 }, { "epoch": 1.6436679935131946, "grad_norm": 2.855538281516391, "learning_rate": 3.777660654233968e-06, "loss": 1.2725, "step": 55745 }, { "epoch": 1.643815420905204, "grad_norm": 2.857283383226676, "learning_rate": 3.7770173944680596e-06, "loss": 1.2728, "step": 55750 }, { "epoch": 1.6439628482972135, "grad_norm": 2.823112852987907, "learning_rate": 3.776374133907132e-06, "loss": 1.2904, "step": 55755 }, { "epoch": 1.644110275689223, "grad_norm": 2.8893198688454844, "learning_rate": 3.7757308725701174e-06, "loss": 1.2706, "step": 55760 }, { "epoch": 1.6442577030812324, "grad_norm": 3.0242843603734926, "learning_rate": 3.7750876104759413e-06, "loss": 1.3353, "step": 55765 }, { "epoch": 1.6444051304732419, "grad_norm": 2.84619885984195, "learning_rate": 3.774444347643534e-06, "loss": 1.342, "step": 55770 }, { "epoch": 1.6445525578652513, "grad_norm": 2.7978868891499618, "learning_rate": 3.7738010840918233e-06, "loss": 1.2715, "step": 55775 }, { "epoch": 1.6446999852572608, "grad_norm": 2.921132796702641, "learning_rate": 3.7731578198397393e-06, "loss": 1.2806, "step": 55780 }, { "epoch": 1.6448474126492703, "grad_norm": 2.7229971191436007, "learning_rate": 3.7725145549062096e-06, "loss": 1.3444, "step": 55785 }, { "epoch": 1.6449948400412797, "grad_norm": 2.7166850349437044, "learning_rate": 3.7718712893101637e-06, "loss": 1.3163, "step": 55790 }, { "epoch": 1.6451422674332892, "grad_norm": 2.7034645434438964, "learning_rate": 3.77122802307053e-06, "loss": 1.2718, "step": 55795 }, { "epoch": 1.6452896948252986, "grad_norm": 2.7820559059457155, "learning_rate": 3.770584756206238e-06, "loss": 1.2719, "step": 55800 }, { "epoch": 1.645437122217308, "grad_norm": 2.7778338388257477, "learning_rate": 3.7699414887362146e-06, "loss": 1.2953, "step": 55805 }, { "epoch": 1.6455845496093175, "grad_norm": 2.8329596902365206, "learning_rate": 3.7692982206793907e-06, "loss": 1.3157, "step": 55810 }, { "epoch": 1.645731977001327, "grad_norm": 2.7743300593227853, "learning_rate": 3.7686549520546955e-06, "loss": 1.3156, "step": 55815 }, { "epoch": 1.6458794043933362, "grad_norm": 2.811864844945933, "learning_rate": 3.7680116828810566e-06, "loss": 1.3172, "step": 55820 }, { "epoch": 1.6460268317853457, "grad_norm": 2.5646183922503107, "learning_rate": 3.7673684131774032e-06, "loss": 1.3077, "step": 55825 }, { "epoch": 1.6461742591773552, "grad_norm": 2.847085519754536, "learning_rate": 3.7667251429626638e-06, "loss": 1.2935, "step": 55830 }, { "epoch": 1.6463216865693646, "grad_norm": 2.961694728534857, "learning_rate": 3.766081872255768e-06, "loss": 1.3059, "step": 55835 }, { "epoch": 1.646469113961374, "grad_norm": 2.9570338581245057, "learning_rate": 3.765438601075646e-06, "loss": 1.3423, "step": 55840 }, { "epoch": 1.6466165413533833, "grad_norm": 2.799090952241949, "learning_rate": 3.764795329441225e-06, "loss": 1.2982, "step": 55845 }, { "epoch": 1.6467639687453928, "grad_norm": 2.866132214409355, "learning_rate": 3.7641520573714345e-06, "loss": 1.3032, "step": 55850 }, { "epoch": 1.6469113961374022, "grad_norm": 2.7626600334851363, "learning_rate": 3.763508784885203e-06, "loss": 1.2749, "step": 55855 }, { "epoch": 1.6470588235294117, "grad_norm": 2.8865467394959285, "learning_rate": 3.76286551200146e-06, "loss": 1.3, "step": 55860 }, { "epoch": 1.6472062509214211, "grad_norm": 2.6961333592280954, "learning_rate": 3.762222238739136e-06, "loss": 1.2742, "step": 55865 }, { "epoch": 1.6473536783134306, "grad_norm": 2.9640591917317245, "learning_rate": 3.761578965117158e-06, "loss": 1.3472, "step": 55870 }, { "epoch": 1.64750110570544, "grad_norm": 2.8270984520637703, "learning_rate": 3.760935691154456e-06, "loss": 1.3042, "step": 55875 }, { "epoch": 1.6476485330974495, "grad_norm": 2.832594634113652, "learning_rate": 3.7602924168699577e-06, "loss": 1.2329, "step": 55880 }, { "epoch": 1.647795960489459, "grad_norm": 2.821757406682462, "learning_rate": 3.7596491422825936e-06, "loss": 1.2824, "step": 55885 }, { "epoch": 1.6479433878814684, "grad_norm": 2.793735876186327, "learning_rate": 3.7590058674112935e-06, "loss": 1.3092, "step": 55890 }, { "epoch": 1.648090815273478, "grad_norm": 2.7593158861425597, "learning_rate": 3.758362592274985e-06, "loss": 1.3024, "step": 55895 }, { "epoch": 1.6482382426654874, "grad_norm": 2.9170197216435416, "learning_rate": 3.7577193168925968e-06, "loss": 1.3149, "step": 55900 }, { "epoch": 1.6483856700574968, "grad_norm": 2.7802419790653, "learning_rate": 3.7570760412830596e-06, "loss": 1.2802, "step": 55905 }, { "epoch": 1.6485330974495063, "grad_norm": 2.8599498035345303, "learning_rate": 3.7564327654653013e-06, "loss": 1.3158, "step": 55910 }, { "epoch": 1.6486805248415155, "grad_norm": 3.0302189338748526, "learning_rate": 3.755789489458252e-06, "loss": 1.3018, "step": 55915 }, { "epoch": 1.648827952233525, "grad_norm": 2.808292864626466, "learning_rate": 3.7551462132808406e-06, "loss": 1.3134, "step": 55920 }, { "epoch": 1.6489753796255344, "grad_norm": 2.732990592554967, "learning_rate": 3.7545029369519963e-06, "loss": 1.2997, "step": 55925 }, { "epoch": 1.6491228070175439, "grad_norm": 2.903746609784036, "learning_rate": 3.7538596604906476e-06, "loss": 1.2693, "step": 55930 }, { "epoch": 1.6492702344095533, "grad_norm": 2.8653343707863654, "learning_rate": 3.7532163839157237e-06, "loss": 1.3108, "step": 55935 }, { "epoch": 1.6494176618015626, "grad_norm": 2.8038097765614194, "learning_rate": 3.7525731072461535e-06, "loss": 1.3329, "step": 55940 }, { "epoch": 1.649565089193572, "grad_norm": 2.8970691766090053, "learning_rate": 3.751929830500867e-06, "loss": 1.3106, "step": 55945 }, { "epoch": 1.6497125165855815, "grad_norm": 2.887512504539193, "learning_rate": 3.7512865536987935e-06, "loss": 1.2932, "step": 55950 }, { "epoch": 1.649859943977591, "grad_norm": 2.861301230473495, "learning_rate": 3.7506432768588613e-06, "loss": 1.3241, "step": 55955 }, { "epoch": 1.6500073713696004, "grad_norm": 2.9925372492071665, "learning_rate": 3.75e-06, "loss": 1.3422, "step": 55960 }, { "epoch": 1.6501547987616099, "grad_norm": 2.7010458133121777, "learning_rate": 3.7493567231411397e-06, "loss": 1.3007, "step": 55965 }, { "epoch": 1.6503022261536193, "grad_norm": 2.7153774967486255, "learning_rate": 3.7487134463012067e-06, "loss": 1.2859, "step": 55970 }, { "epoch": 1.6504496535456288, "grad_norm": 2.9081857590716873, "learning_rate": 3.7480701694991335e-06, "loss": 1.2984, "step": 55975 }, { "epoch": 1.6505970809376382, "grad_norm": 2.763572896365814, "learning_rate": 3.747426892753847e-06, "loss": 1.3279, "step": 55980 }, { "epoch": 1.6507445083296477, "grad_norm": 2.6882766315135074, "learning_rate": 3.746783616084278e-06, "loss": 1.2858, "step": 55985 }, { "epoch": 1.6508919357216572, "grad_norm": 2.8255190652532356, "learning_rate": 3.7461403395093534e-06, "loss": 1.3228, "step": 55990 }, { "epoch": 1.6510393631136666, "grad_norm": 2.78284762021737, "learning_rate": 3.7454970630480048e-06, "loss": 1.2846, "step": 55995 }, { "epoch": 1.651186790505676, "grad_norm": 2.824160717000539, "learning_rate": 3.7448537867191596e-06, "loss": 1.2797, "step": 56000 }, { "epoch": 1.651186790505676, "eval_loss": 1.084498405456543, "eval_runtime": 4.278, "eval_samples_per_second": 92.567, "eval_steps_per_second": 3.039, "step": 56000 }, { "epoch": 1.6513342178976855, "grad_norm": 2.8446698992182955, "learning_rate": 3.7442105105417486e-06, "loss": 1.3391, "step": 56005 }, { "epoch": 1.651481645289695, "grad_norm": 2.6950234721920356, "learning_rate": 3.7435672345346984e-06, "loss": 1.3007, "step": 56010 }, { "epoch": 1.6516290726817042, "grad_norm": 2.806104406265072, "learning_rate": 3.742923958716941e-06, "loss": 1.2961, "step": 56015 }, { "epoch": 1.6517765000737137, "grad_norm": 2.748278779911611, "learning_rate": 3.742280683107404e-06, "loss": 1.2569, "step": 56020 }, { "epoch": 1.6519239274657231, "grad_norm": 2.8704993022515946, "learning_rate": 3.741637407725016e-06, "loss": 1.2736, "step": 56025 }, { "epoch": 1.6520713548577326, "grad_norm": 2.77748745606876, "learning_rate": 3.7409941325887075e-06, "loss": 1.3325, "step": 56030 }, { "epoch": 1.652218782249742, "grad_norm": 2.6923710527184164, "learning_rate": 3.7403508577174066e-06, "loss": 1.2801, "step": 56035 }, { "epoch": 1.6523662096417513, "grad_norm": 2.9197802473188896, "learning_rate": 3.7397075831300437e-06, "loss": 1.2756, "step": 56040 }, { "epoch": 1.6525136370337608, "grad_norm": 2.816615124458963, "learning_rate": 3.7390643088455447e-06, "loss": 1.271, "step": 56045 }, { "epoch": 1.6526610644257702, "grad_norm": 2.7247063350430207, "learning_rate": 3.738421034882843e-06, "loss": 1.2835, "step": 56050 }, { "epoch": 1.6528084918177797, "grad_norm": 2.7728219109115884, "learning_rate": 3.7377777612608647e-06, "loss": 1.3369, "step": 56055 }, { "epoch": 1.6529559192097891, "grad_norm": 2.8134779685891735, "learning_rate": 3.7371344879985405e-06, "loss": 1.2973, "step": 56060 }, { "epoch": 1.6531033466017986, "grad_norm": 2.758436285948383, "learning_rate": 3.736491215114797e-06, "loss": 1.2824, "step": 56065 }, { "epoch": 1.653250773993808, "grad_norm": 2.829739070445142, "learning_rate": 3.735847942628566e-06, "loss": 1.2908, "step": 56070 }, { "epoch": 1.6533982013858175, "grad_norm": 2.758680421190607, "learning_rate": 3.7352046705587754e-06, "loss": 1.331, "step": 56075 }, { "epoch": 1.653545628777827, "grad_norm": 2.7636700502412137, "learning_rate": 3.734561398924355e-06, "loss": 1.3331, "step": 56080 }, { "epoch": 1.6536930561698364, "grad_norm": 2.7921010120955323, "learning_rate": 3.7339181277442313e-06, "loss": 1.2137, "step": 56085 }, { "epoch": 1.6538404835618459, "grad_norm": 2.9084318878490203, "learning_rate": 3.733274857037337e-06, "loss": 1.2964, "step": 56090 }, { "epoch": 1.6539879109538553, "grad_norm": 2.766314541265016, "learning_rate": 3.7326315868225987e-06, "loss": 1.2976, "step": 56095 }, { "epoch": 1.6541353383458648, "grad_norm": 2.8107735543000114, "learning_rate": 3.731988317118944e-06, "loss": 1.2436, "step": 56100 }, { "epoch": 1.6542827657378743, "grad_norm": 2.887462055159776, "learning_rate": 3.7313450479453055e-06, "loss": 1.248, "step": 56105 }, { "epoch": 1.6544301931298835, "grad_norm": 2.7827115050833404, "learning_rate": 3.7307017793206095e-06, "loss": 1.3544, "step": 56110 }, { "epoch": 1.654577620521893, "grad_norm": 2.893012157817347, "learning_rate": 3.7300585112637864e-06, "loss": 1.3086, "step": 56115 }, { "epoch": 1.6547250479139024, "grad_norm": 2.8573591591772765, "learning_rate": 3.7294152437937628e-06, "loss": 1.2688, "step": 56120 }, { "epoch": 1.6548724753059119, "grad_norm": 2.7625287703753023, "learning_rate": 3.7287719769294706e-06, "loss": 1.2935, "step": 56125 }, { "epoch": 1.6550199026979213, "grad_norm": 3.0476966176101574, "learning_rate": 3.728128710689837e-06, "loss": 1.3125, "step": 56130 }, { "epoch": 1.6551673300899306, "grad_norm": 2.7874640244256543, "learning_rate": 3.7274854450937915e-06, "loss": 1.2991, "step": 56135 }, { "epoch": 1.65531475748194, "grad_norm": 2.7122294282533526, "learning_rate": 3.726842180160261e-06, "loss": 1.3075, "step": 56140 }, { "epoch": 1.6554621848739495, "grad_norm": 2.882307744926693, "learning_rate": 3.726198915908177e-06, "loss": 1.306, "step": 56145 }, { "epoch": 1.655609612265959, "grad_norm": 2.6611084472918574, "learning_rate": 3.7255556523564662e-06, "loss": 1.3253, "step": 56150 }, { "epoch": 1.6557570396579684, "grad_norm": 2.861124381100979, "learning_rate": 3.724912389524059e-06, "loss": 1.2859, "step": 56155 }, { "epoch": 1.6559044670499778, "grad_norm": 2.8892337570440456, "learning_rate": 3.7242691274298836e-06, "loss": 1.2866, "step": 56160 }, { "epoch": 1.6560518944419873, "grad_norm": 2.8393383366975473, "learning_rate": 3.723625866092868e-06, "loss": 1.2743, "step": 56165 }, { "epoch": 1.6561993218339968, "grad_norm": 2.694501180304239, "learning_rate": 3.7229826055319423e-06, "loss": 1.2982, "step": 56170 }, { "epoch": 1.6563467492260062, "grad_norm": 2.856650003639179, "learning_rate": 3.7223393457660323e-06, "loss": 1.2751, "step": 56175 }, { "epoch": 1.6564941766180157, "grad_norm": 2.9052301987882987, "learning_rate": 3.7216960868140705e-06, "loss": 1.3386, "step": 56180 }, { "epoch": 1.6566416040100251, "grad_norm": 2.7867146697309093, "learning_rate": 3.7210528286949827e-06, "loss": 1.2395, "step": 56185 }, { "epoch": 1.6567890314020346, "grad_norm": 2.638299597153696, "learning_rate": 3.720409571427699e-06, "loss": 1.3076, "step": 56190 }, { "epoch": 1.656936458794044, "grad_norm": 2.7684959821104798, "learning_rate": 3.7197663150311465e-06, "loss": 1.2693, "step": 56195 }, { "epoch": 1.6570838861860535, "grad_norm": 2.7794813079734464, "learning_rate": 3.719123059524256e-06, "loss": 1.3179, "step": 56200 }, { "epoch": 1.657231313578063, "grad_norm": 2.8141468213243432, "learning_rate": 3.718479804925954e-06, "loss": 1.3165, "step": 56205 }, { "epoch": 1.6573787409700722, "grad_norm": 2.8068933250192782, "learning_rate": 3.7178365512551703e-06, "loss": 1.2916, "step": 56210 }, { "epoch": 1.6575261683620817, "grad_norm": 2.8818048586915896, "learning_rate": 3.717193298530831e-06, "loss": 1.31, "step": 56215 }, { "epoch": 1.6576735957540911, "grad_norm": 2.841892206557916, "learning_rate": 3.716550046771869e-06, "loss": 1.2828, "step": 56220 }, { "epoch": 1.6578210231461006, "grad_norm": 2.6688685420536182, "learning_rate": 3.715906795997209e-06, "loss": 1.2561, "step": 56225 }, { "epoch": 1.6579684505381098, "grad_norm": 2.8783911657271, "learning_rate": 3.715263546225779e-06, "loss": 1.3223, "step": 56230 }, { "epoch": 1.6581158779301193, "grad_norm": 2.964790454691381, "learning_rate": 3.714620297476511e-06, "loss": 1.3102, "step": 56235 }, { "epoch": 1.6582633053221287, "grad_norm": 2.7715367272107385, "learning_rate": 3.7139770497683296e-06, "loss": 1.3243, "step": 56240 }, { "epoch": 1.6584107327141382, "grad_norm": 2.809085148592374, "learning_rate": 3.713333803120166e-06, "loss": 1.3185, "step": 56245 }, { "epoch": 1.6585581601061476, "grad_norm": 2.7166558340126783, "learning_rate": 3.7126905575509454e-06, "loss": 1.3305, "step": 56250 }, { "epoch": 1.658705587498157, "grad_norm": 2.747525478624997, "learning_rate": 3.7120473130795993e-06, "loss": 1.291, "step": 56255 }, { "epoch": 1.6588530148901666, "grad_norm": 2.7612463758369974, "learning_rate": 3.7114040697250534e-06, "loss": 1.3183, "step": 56260 }, { "epoch": 1.659000442282176, "grad_norm": 2.751642003994043, "learning_rate": 3.710760827506238e-06, "loss": 1.2787, "step": 56265 }, { "epoch": 1.6591478696741855, "grad_norm": 2.7837215997629423, "learning_rate": 3.7101175864420783e-06, "loss": 1.3099, "step": 56270 }, { "epoch": 1.659295297066195, "grad_norm": 2.931983663332242, "learning_rate": 3.709474346551506e-06, "loss": 1.3192, "step": 56275 }, { "epoch": 1.6594427244582044, "grad_norm": 2.8021531737575875, "learning_rate": 3.708831107853446e-06, "loss": 1.2843, "step": 56280 }, { "epoch": 1.6595901518502139, "grad_norm": 2.699555093234591, "learning_rate": 3.7081878703668277e-06, "loss": 1.2617, "step": 56285 }, { "epoch": 1.6597375792422233, "grad_norm": 2.752696393485717, "learning_rate": 3.70754463411058e-06, "loss": 1.2466, "step": 56290 }, { "epoch": 1.6598850066342328, "grad_norm": 2.9333539210399295, "learning_rate": 3.706901399103629e-06, "loss": 1.3323, "step": 56295 }, { "epoch": 1.6600324340262422, "grad_norm": 2.7845634611502432, "learning_rate": 3.706258165364905e-06, "loss": 1.2747, "step": 56300 }, { "epoch": 1.6601798614182515, "grad_norm": 2.7997117730113574, "learning_rate": 3.705614932913332e-06, "loss": 1.3329, "step": 56305 }, { "epoch": 1.660327288810261, "grad_norm": 2.9971421605439716, "learning_rate": 3.7049717017678424e-06, "loss": 1.2991, "step": 56310 }, { "epoch": 1.6604747162022704, "grad_norm": 2.7488808510259193, "learning_rate": 3.704328471947361e-06, "loss": 1.2999, "step": 56315 }, { "epoch": 1.6606221435942798, "grad_norm": 3.2541347329480637, "learning_rate": 3.703685243470817e-06, "loss": 1.2814, "step": 56320 }, { "epoch": 1.6607695709862893, "grad_norm": 2.786655030442175, "learning_rate": 3.703042016357136e-06, "loss": 1.2827, "step": 56325 }, { "epoch": 1.6609169983782985, "grad_norm": 2.8334785111200573, "learning_rate": 3.702398790625249e-06, "loss": 1.3186, "step": 56330 }, { "epoch": 1.661064425770308, "grad_norm": 2.8208392477506057, "learning_rate": 3.7017555662940816e-06, "loss": 1.3167, "step": 56335 }, { "epoch": 1.6612118531623175, "grad_norm": 2.8980096856571635, "learning_rate": 3.701112343382562e-06, "loss": 1.3468, "step": 56340 }, { "epoch": 1.661359280554327, "grad_norm": 2.8184887407338017, "learning_rate": 3.700469121909616e-06, "loss": 1.3407, "step": 56345 }, { "epoch": 1.6615067079463364, "grad_norm": 2.8712812328234505, "learning_rate": 3.6998259018941745e-06, "loss": 1.3293, "step": 56350 }, { "epoch": 1.6616541353383458, "grad_norm": 2.6582368796015716, "learning_rate": 3.699182683355163e-06, "loss": 1.2706, "step": 56355 }, { "epoch": 1.6618015627303553, "grad_norm": 2.9158948337333137, "learning_rate": 3.698539466311507e-06, "loss": 1.2246, "step": 56360 }, { "epoch": 1.6619489901223647, "grad_norm": 2.79186382694316, "learning_rate": 3.6978962507821383e-06, "loss": 1.3045, "step": 56365 }, { "epoch": 1.6620964175143742, "grad_norm": 2.776960380912986, "learning_rate": 3.6972530367859806e-06, "loss": 1.2414, "step": 56370 }, { "epoch": 1.6622438449063837, "grad_norm": 2.8769756375757938, "learning_rate": 3.6966098243419637e-06, "loss": 1.2791, "step": 56375 }, { "epoch": 1.6623912722983931, "grad_norm": 2.7872695829396323, "learning_rate": 3.695966613469012e-06, "loss": 1.3117, "step": 56380 }, { "epoch": 1.6625386996904026, "grad_norm": 2.8973198718710327, "learning_rate": 3.695323404186056e-06, "loss": 1.2613, "step": 56385 }, { "epoch": 1.662686127082412, "grad_norm": 2.846382954267174, "learning_rate": 3.69468019651202e-06, "loss": 1.3323, "step": 56390 }, { "epoch": 1.6628335544744215, "grad_norm": 2.769607474212406, "learning_rate": 3.6940369904658337e-06, "loss": 1.3222, "step": 56395 }, { "epoch": 1.6629809818664307, "grad_norm": 2.8912203716174334, "learning_rate": 3.693393786066421e-06, "loss": 1.3772, "step": 56400 }, { "epoch": 1.6631284092584402, "grad_norm": 2.8195937560129547, "learning_rate": 3.692750583332713e-06, "loss": 1.3179, "step": 56405 }, { "epoch": 1.6632758366504496, "grad_norm": 2.6545691978450368, "learning_rate": 3.692107382283632e-06, "loss": 1.3147, "step": 56410 }, { "epoch": 1.663423264042459, "grad_norm": 2.8382250045290145, "learning_rate": 3.6914641829381093e-06, "loss": 1.2772, "step": 56415 }, { "epoch": 1.6635706914344686, "grad_norm": 2.743032643774511, "learning_rate": 3.69082098531507e-06, "loss": 1.3278, "step": 56420 }, { "epoch": 1.6637181188264778, "grad_norm": 2.8499682531366974, "learning_rate": 3.69017778943344e-06, "loss": 1.265, "step": 56425 }, { "epoch": 1.6638655462184873, "grad_norm": 2.779472685368537, "learning_rate": 3.6895345953121475e-06, "loss": 1.2669, "step": 56430 }, { "epoch": 1.6640129736104967, "grad_norm": 2.8551945476752736, "learning_rate": 3.688891402970117e-06, "loss": 1.2338, "step": 56435 }, { "epoch": 1.6641604010025062, "grad_norm": 2.8737728029608864, "learning_rate": 3.6882482124262783e-06, "loss": 1.3182, "step": 56440 }, { "epoch": 1.6643078283945156, "grad_norm": 2.8438664595103718, "learning_rate": 3.687605023699556e-06, "loss": 1.2598, "step": 56445 }, { "epoch": 1.664455255786525, "grad_norm": 2.80957376584951, "learning_rate": 3.686961836808877e-06, "loss": 1.2563, "step": 56450 }, { "epoch": 1.6646026831785345, "grad_norm": 2.8422991272468296, "learning_rate": 3.686318651773167e-06, "loss": 1.2291, "step": 56455 }, { "epoch": 1.664750110570544, "grad_norm": 3.006776887330189, "learning_rate": 3.685675468611355e-06, "loss": 1.302, "step": 56460 }, { "epoch": 1.6648975379625535, "grad_norm": 2.823030623700009, "learning_rate": 3.6850322873423654e-06, "loss": 1.3082, "step": 56465 }, { "epoch": 1.665044965354563, "grad_norm": 2.838053075079035, "learning_rate": 3.684389107985125e-06, "loss": 1.3412, "step": 56470 }, { "epoch": 1.6651923927465724, "grad_norm": 2.8898446054747686, "learning_rate": 3.683745930558559e-06, "loss": 1.2654, "step": 56475 }, { "epoch": 1.6653398201385818, "grad_norm": 2.869306082468109, "learning_rate": 3.683102755081596e-06, "loss": 1.2468, "step": 56480 }, { "epoch": 1.6654872475305913, "grad_norm": 2.8945514217536727, "learning_rate": 3.6824595815731602e-06, "loss": 1.283, "step": 56485 }, { "epoch": 1.6656346749226008, "grad_norm": 2.9011675422108256, "learning_rate": 3.6818164100521777e-06, "loss": 1.3027, "step": 56490 }, { "epoch": 1.6657821023146102, "grad_norm": 2.8973598642042084, "learning_rate": 3.6811732405375766e-06, "loss": 1.3612, "step": 56495 }, { "epoch": 1.6659295297066194, "grad_norm": 2.8361256751836046, "learning_rate": 3.6805300730482803e-06, "loss": 1.2672, "step": 56500 }, { "epoch": 1.6659295297066194, "eval_loss": 1.0836387872695923, "eval_runtime": 4.1391, "eval_samples_per_second": 95.673, "eval_steps_per_second": 3.141, "step": 56500 }, { "epoch": 1.666076957098629, "grad_norm": 2.914802939974163, "learning_rate": 3.6798869076032172e-06, "loss": 1.3088, "step": 56505 }, { "epoch": 1.6662243844906384, "grad_norm": 2.823560538721418, "learning_rate": 3.6792437442213095e-06, "loss": 1.2801, "step": 56510 }, { "epoch": 1.6663718118826478, "grad_norm": 2.7610269914888197, "learning_rate": 3.6786005829214876e-06, "loss": 1.293, "step": 56515 }, { "epoch": 1.6665192392746573, "grad_norm": 2.735929467929306, "learning_rate": 3.677957423722674e-06, "loss": 1.253, "step": 56520 }, { "epoch": 1.6666666666666665, "grad_norm": 2.8439226781090214, "learning_rate": 3.677314266643797e-06, "loss": 1.2736, "step": 56525 }, { "epoch": 1.666814094058676, "grad_norm": 2.827920090689535, "learning_rate": 3.6766711117037783e-06, "loss": 1.3113, "step": 56530 }, { "epoch": 1.6669615214506854, "grad_norm": 2.831397004458227, "learning_rate": 3.6760279589215477e-06, "loss": 1.3253, "step": 56535 }, { "epoch": 1.667108948842695, "grad_norm": 2.8760065071193988, "learning_rate": 3.6753848083160275e-06, "loss": 1.3276, "step": 56540 }, { "epoch": 1.6672563762347044, "grad_norm": 2.845380839645744, "learning_rate": 3.6747416599061463e-06, "loss": 1.3255, "step": 56545 }, { "epoch": 1.6674038036267138, "grad_norm": 2.8103296595761846, "learning_rate": 3.6740985137108268e-06, "loss": 1.2783, "step": 56550 }, { "epoch": 1.6675512310187233, "grad_norm": 2.8651559161465907, "learning_rate": 3.6734553697489955e-06, "loss": 1.3477, "step": 56555 }, { "epoch": 1.6676986584107327, "grad_norm": 2.8729242473809142, "learning_rate": 3.6728122280395773e-06, "loss": 1.2651, "step": 56560 }, { "epoch": 1.6678460858027422, "grad_norm": 2.8265129507516975, "learning_rate": 3.672169088601496e-06, "loss": 1.3005, "step": 56565 }, { "epoch": 1.6679935131947516, "grad_norm": 2.8683666198611752, "learning_rate": 3.6715259514536803e-06, "loss": 1.3233, "step": 56570 }, { "epoch": 1.668140940586761, "grad_norm": 2.7527615771894163, "learning_rate": 3.6708828166150516e-06, "loss": 1.2836, "step": 56575 }, { "epoch": 1.6682883679787706, "grad_norm": 2.915129977664997, "learning_rate": 3.6702396841045374e-06, "loss": 1.3087, "step": 56580 }, { "epoch": 1.66843579537078, "grad_norm": 2.6582504938519853, "learning_rate": 3.66959655394106e-06, "loss": 1.2847, "step": 56585 }, { "epoch": 1.6685832227627895, "grad_norm": 2.8280601045017133, "learning_rate": 3.6689534261435477e-06, "loss": 1.3277, "step": 56590 }, { "epoch": 1.6687306501547987, "grad_norm": 2.9390203892792157, "learning_rate": 3.668310300730922e-06, "loss": 1.3088, "step": 56595 }, { "epoch": 1.6688780775468082, "grad_norm": 2.861546144176007, "learning_rate": 3.66766717772211e-06, "loss": 1.3268, "step": 56600 }, { "epoch": 1.6690255049388176, "grad_norm": 3.0711331337642434, "learning_rate": 3.6670240571360336e-06, "loss": 1.244, "step": 56605 }, { "epoch": 1.669172932330827, "grad_norm": 2.8242779101642173, "learning_rate": 3.6663809389916213e-06, "loss": 1.3177, "step": 56610 }, { "epoch": 1.6693203597228365, "grad_norm": 2.839010198761764, "learning_rate": 3.665737823307795e-06, "loss": 1.3272, "step": 56615 }, { "epoch": 1.6694677871148458, "grad_norm": 2.791504313895208, "learning_rate": 3.6650947101034777e-06, "loss": 1.3024, "step": 56620 }, { "epoch": 1.6696152145068552, "grad_norm": 2.842893843999529, "learning_rate": 3.664451599397597e-06, "loss": 1.3031, "step": 56625 }, { "epoch": 1.6697626418988647, "grad_norm": 2.9200903563351224, "learning_rate": 3.663808491209075e-06, "loss": 1.3258, "step": 56630 }, { "epoch": 1.6699100692908742, "grad_norm": 2.789541638035604, "learning_rate": 3.6631653855568374e-06, "loss": 1.3606, "step": 56635 }, { "epoch": 1.6700574966828836, "grad_norm": 2.8023484947729003, "learning_rate": 3.6625222824598057e-06, "loss": 1.3244, "step": 56640 }, { "epoch": 1.670204924074893, "grad_norm": 2.8678128148059674, "learning_rate": 3.661879181936908e-06, "loss": 1.2853, "step": 56645 }, { "epoch": 1.6703523514669025, "grad_norm": 2.8879043626786536, "learning_rate": 3.6612360840070645e-06, "loss": 1.2734, "step": 56650 }, { "epoch": 1.670499778858912, "grad_norm": 2.7941482999457787, "learning_rate": 3.6605929886892016e-06, "loss": 1.2698, "step": 56655 }, { "epoch": 1.6706472062509214, "grad_norm": 2.8551647698914406, "learning_rate": 3.6599498960022404e-06, "loss": 1.2873, "step": 56660 }, { "epoch": 1.670794633642931, "grad_norm": 2.8398241096882546, "learning_rate": 3.6593068059651084e-06, "loss": 1.32, "step": 56665 }, { "epoch": 1.6709420610349404, "grad_norm": 2.8837201306792344, "learning_rate": 3.6586637185967247e-06, "loss": 1.2674, "step": 56670 }, { "epoch": 1.6710894884269498, "grad_norm": 2.820282814992505, "learning_rate": 3.6580206339160176e-06, "loss": 1.2899, "step": 56675 }, { "epoch": 1.6712369158189593, "grad_norm": 2.8454761478338937, "learning_rate": 3.6573775519419086e-06, "loss": 1.3002, "step": 56680 }, { "epoch": 1.6713843432109687, "grad_norm": 2.8237243296894086, "learning_rate": 3.6567344726933195e-06, "loss": 1.3197, "step": 56685 }, { "epoch": 1.6715317706029782, "grad_norm": 2.9179043559127233, "learning_rate": 3.656091396189176e-06, "loss": 1.3316, "step": 56690 }, { "epoch": 1.6716791979949874, "grad_norm": 2.9420232184881767, "learning_rate": 3.6554483224483993e-06, "loss": 1.3082, "step": 56695 }, { "epoch": 1.671826625386997, "grad_norm": 2.76947636872741, "learning_rate": 3.6548052514899148e-06, "loss": 1.2565, "step": 56700 }, { "epoch": 1.6719740527790063, "grad_norm": 2.7967897345990247, "learning_rate": 3.6541621833326436e-06, "loss": 1.2999, "step": 56705 }, { "epoch": 1.6721214801710158, "grad_norm": 2.789389319327171, "learning_rate": 3.6535191179955107e-06, "loss": 1.246, "step": 56710 }, { "epoch": 1.6722689075630253, "grad_norm": 2.850412481598601, "learning_rate": 3.652876055497436e-06, "loss": 1.3344, "step": 56715 }, { "epoch": 1.6724163349550345, "grad_norm": 3.0237168012621853, "learning_rate": 3.6522329958573463e-06, "loss": 1.3392, "step": 56720 }, { "epoch": 1.672563762347044, "grad_norm": 2.8175624529329477, "learning_rate": 3.651589939094161e-06, "loss": 1.3282, "step": 56725 }, { "epoch": 1.6727111897390534, "grad_norm": 2.8037104805864894, "learning_rate": 3.6509468852268056e-06, "loss": 1.2902, "step": 56730 }, { "epoch": 1.6728586171310629, "grad_norm": 2.7981510688726283, "learning_rate": 3.650303834274199e-06, "loss": 1.3106, "step": 56735 }, { "epoch": 1.6730060445230723, "grad_norm": 2.7686050784286036, "learning_rate": 3.6496607862552676e-06, "loss": 1.2871, "step": 56740 }, { "epoch": 1.6731534719150818, "grad_norm": 2.7802505227103858, "learning_rate": 3.6490177411889324e-06, "loss": 1.2799, "step": 56745 }, { "epoch": 1.6733008993070912, "grad_norm": 2.7685381710606367, "learning_rate": 3.648374699094114e-06, "loss": 1.3485, "step": 56750 }, { "epoch": 1.6734483266991007, "grad_norm": 2.7874188181547455, "learning_rate": 3.6477316599897377e-06, "loss": 1.254, "step": 56755 }, { "epoch": 1.6735957540911102, "grad_norm": 2.7236186053517724, "learning_rate": 3.6470886238947227e-06, "loss": 1.2858, "step": 56760 }, { "epoch": 1.6737431814831196, "grad_norm": 2.8427893757033096, "learning_rate": 3.6464455908279937e-06, "loss": 1.2745, "step": 56765 }, { "epoch": 1.673890608875129, "grad_norm": 2.844426670020713, "learning_rate": 3.6458025608084695e-06, "loss": 1.3355, "step": 56770 }, { "epoch": 1.6740380362671385, "grad_norm": 2.867418344982146, "learning_rate": 3.645159533855076e-06, "loss": 1.2521, "step": 56775 }, { "epoch": 1.674185463659148, "grad_norm": 2.8685784405096397, "learning_rate": 3.644516509986732e-06, "loss": 1.3031, "step": 56780 }, { "epoch": 1.6743328910511575, "grad_norm": 2.734470989675652, "learning_rate": 3.643873489222361e-06, "loss": 1.3245, "step": 56785 }, { "epoch": 1.6744803184431667, "grad_norm": 2.7927293170315814, "learning_rate": 3.643230471580882e-06, "loss": 1.3426, "step": 56790 }, { "epoch": 1.6746277458351762, "grad_norm": 2.870325117016414, "learning_rate": 3.6425874570812197e-06, "loss": 1.345, "step": 56795 }, { "epoch": 1.6747751732271856, "grad_norm": 2.799753482554971, "learning_rate": 3.6419444457422927e-06, "loss": 1.3405, "step": 56800 }, { "epoch": 1.674922600619195, "grad_norm": 2.784986678710191, "learning_rate": 3.6413014375830253e-06, "loss": 1.2793, "step": 56805 }, { "epoch": 1.6750700280112045, "grad_norm": 2.902202662453079, "learning_rate": 3.6406584326223373e-06, "loss": 1.3059, "step": 56810 }, { "epoch": 1.6752174554032138, "grad_norm": 2.777131674184737, "learning_rate": 3.640015430879149e-06, "loss": 1.3643, "step": 56815 }, { "epoch": 1.6753648827952232, "grad_norm": 2.8347291948027062, "learning_rate": 3.6393724323723834e-06, "loss": 1.3549, "step": 56820 }, { "epoch": 1.6755123101872327, "grad_norm": 2.762011037884118, "learning_rate": 3.6387294371209584e-06, "loss": 1.2829, "step": 56825 }, { "epoch": 1.6756597375792421, "grad_norm": 2.8045227528564185, "learning_rate": 3.638086445143798e-06, "loss": 1.3325, "step": 56830 }, { "epoch": 1.6758071649712516, "grad_norm": 2.8085982588287988, "learning_rate": 3.6374434564598213e-06, "loss": 1.3074, "step": 56835 }, { "epoch": 1.675954592363261, "grad_norm": 2.7550151325364354, "learning_rate": 3.63680047108795e-06, "loss": 1.3533, "step": 56840 }, { "epoch": 1.6761020197552705, "grad_norm": 2.8285519483142036, "learning_rate": 3.636157489047103e-06, "loss": 1.2967, "step": 56845 }, { "epoch": 1.67624944714728, "grad_norm": 2.9086819851795442, "learning_rate": 3.635514510356203e-06, "loss": 1.3391, "step": 56850 }, { "epoch": 1.6763968745392894, "grad_norm": 2.807469358989118, "learning_rate": 3.6348715350341684e-06, "loss": 1.3523, "step": 56855 }, { "epoch": 1.6765443019312989, "grad_norm": 2.8424658585164386, "learning_rate": 3.6342285630999218e-06, "loss": 1.2317, "step": 56860 }, { "epoch": 1.6766917293233083, "grad_norm": 3.1649745758893264, "learning_rate": 3.6335855945723794e-06, "loss": 1.2717, "step": 56865 }, { "epoch": 1.6768391567153178, "grad_norm": 2.7279291148552582, "learning_rate": 3.632942629470466e-06, "loss": 1.3038, "step": 56870 }, { "epoch": 1.6769865841073273, "grad_norm": 2.82372542721199, "learning_rate": 3.6322996678130996e-06, "loss": 1.312, "step": 56875 }, { "epoch": 1.6771340114993367, "grad_norm": 2.87352666661813, "learning_rate": 3.631656709619198e-06, "loss": 1.3509, "step": 56880 }, { "epoch": 1.6772814388913462, "grad_norm": 2.7318231882889057, "learning_rate": 3.6310137549076845e-06, "loss": 1.2779, "step": 56885 }, { "epoch": 1.6774288662833554, "grad_norm": 2.7110694928442953, "learning_rate": 3.6303708036974762e-06, "loss": 1.2854, "step": 56890 }, { "epoch": 1.6775762936753649, "grad_norm": 2.797560104441706, "learning_rate": 3.629727856007494e-06, "loss": 1.3165, "step": 56895 }, { "epoch": 1.6777237210673743, "grad_norm": 2.7701613659239737, "learning_rate": 3.629084911856656e-06, "loss": 1.2747, "step": 56900 }, { "epoch": 1.6778711484593838, "grad_norm": 2.72718056380723, "learning_rate": 3.628441971263884e-06, "loss": 1.2973, "step": 56905 }, { "epoch": 1.678018575851393, "grad_norm": 2.8360111306897995, "learning_rate": 3.627799034248095e-06, "loss": 1.2966, "step": 56910 }, { "epoch": 1.6781660032434025, "grad_norm": 2.885747672778935, "learning_rate": 3.62715610082821e-06, "loss": 1.2866, "step": 56915 }, { "epoch": 1.678313430635412, "grad_norm": 2.896174453218463, "learning_rate": 3.626513171023145e-06, "loss": 1.3128, "step": 56920 }, { "epoch": 1.6784608580274214, "grad_norm": 2.8470129248431393, "learning_rate": 3.625870244851823e-06, "loss": 1.3457, "step": 56925 }, { "epoch": 1.6786082854194309, "grad_norm": 2.860026234276432, "learning_rate": 3.6252273223331582e-06, "loss": 1.3047, "step": 56930 }, { "epoch": 1.6787557128114403, "grad_norm": 2.780510285931988, "learning_rate": 3.6245844034860744e-06, "loss": 1.247, "step": 56935 }, { "epoch": 1.6789031402034498, "grad_norm": 2.920076597876364, "learning_rate": 3.6239414883294872e-06, "loss": 1.3253, "step": 56940 }, { "epoch": 1.6790505675954592, "grad_norm": 2.8018552605746705, "learning_rate": 3.6232985768823153e-06, "loss": 1.3254, "step": 56945 }, { "epoch": 1.6791979949874687, "grad_norm": 2.9340009193530885, "learning_rate": 3.6226556691634783e-06, "loss": 1.314, "step": 56950 }, { "epoch": 1.6793454223794781, "grad_norm": 2.9798450232764586, "learning_rate": 3.6220127651918918e-06, "loss": 1.3459, "step": 56955 }, { "epoch": 1.6794928497714876, "grad_norm": 2.9141626203777338, "learning_rate": 3.6213698649864776e-06, "loss": 1.3643, "step": 56960 }, { "epoch": 1.679640277163497, "grad_norm": 2.767792970239195, "learning_rate": 3.620726968566151e-06, "loss": 1.324, "step": 56965 }, { "epoch": 1.6797877045555065, "grad_norm": 2.9778598960220846, "learning_rate": 3.620084075949832e-06, "loss": 1.3416, "step": 56970 }, { "epoch": 1.679935131947516, "grad_norm": 2.7925389148636763, "learning_rate": 3.6194411871564356e-06, "loss": 1.2508, "step": 56975 }, { "epoch": 1.6800825593395254, "grad_norm": 2.7511513822388056, "learning_rate": 3.618798302204883e-06, "loss": 1.2661, "step": 56980 }, { "epoch": 1.6802299867315347, "grad_norm": 2.7368664348156764, "learning_rate": 3.6181554211140893e-06, "loss": 1.2524, "step": 56985 }, { "epoch": 1.6803774141235441, "grad_norm": 3.090265293113655, "learning_rate": 3.617512543902974e-06, "loss": 1.3116, "step": 56990 }, { "epoch": 1.6805248415155536, "grad_norm": 2.7534433103846943, "learning_rate": 3.6168696705904513e-06, "loss": 1.3181, "step": 56995 }, { "epoch": 1.680672268907563, "grad_norm": 2.852536266492231, "learning_rate": 3.6162268011954426e-06, "loss": 1.3044, "step": 57000 }, { "epoch": 1.680672268907563, "eval_loss": 1.0823062658309937, "eval_runtime": 4.3059, "eval_samples_per_second": 91.967, "eval_steps_per_second": 3.019, "step": 57000 }, { "epoch": 1.6808196962995725, "grad_norm": 2.9143005711745653, "learning_rate": 3.615583935736861e-06, "loss": 1.3145, "step": 57005 }, { "epoch": 1.6809671236915817, "grad_norm": 2.9295836771082935, "learning_rate": 3.614941074233628e-06, "loss": 1.3079, "step": 57010 }, { "epoch": 1.6811145510835912, "grad_norm": 2.8811508956358938, "learning_rate": 3.6142982167046575e-06, "loss": 1.3164, "step": 57015 }, { "epoch": 1.6812619784756007, "grad_norm": 2.6701942812309345, "learning_rate": 3.613655363168866e-06, "loss": 1.2585, "step": 57020 }, { "epoch": 1.6814094058676101, "grad_norm": 2.9268618896580767, "learning_rate": 3.613012513645172e-06, "loss": 1.293, "step": 57025 }, { "epoch": 1.6815568332596196, "grad_norm": 2.860619482160934, "learning_rate": 3.6123696681524897e-06, "loss": 1.3006, "step": 57030 }, { "epoch": 1.681704260651629, "grad_norm": 2.7356511164924866, "learning_rate": 3.611726826709739e-06, "loss": 1.2708, "step": 57035 }, { "epoch": 1.6818516880436385, "grad_norm": 2.817385080054286, "learning_rate": 3.6110839893358328e-06, "loss": 1.2943, "step": 57040 }, { "epoch": 1.681999115435648, "grad_norm": 2.8149394387160718, "learning_rate": 3.6104411560496903e-06, "loss": 1.3317, "step": 57045 }, { "epoch": 1.6821465428276574, "grad_norm": 2.6563192241458755, "learning_rate": 3.609798326870224e-06, "loss": 1.2463, "step": 57050 }, { "epoch": 1.6822939702196669, "grad_norm": 2.8413976704321855, "learning_rate": 3.609155501816354e-06, "loss": 1.2839, "step": 57055 }, { "epoch": 1.6824413976116763, "grad_norm": 2.783910810450863, "learning_rate": 3.608512680906993e-06, "loss": 1.2395, "step": 57060 }, { "epoch": 1.6825888250036858, "grad_norm": 2.860538559116513, "learning_rate": 3.6078698641610584e-06, "loss": 1.2741, "step": 57065 }, { "epoch": 1.6827362523956952, "grad_norm": 2.707226148738812, "learning_rate": 3.607227051597465e-06, "loss": 1.3183, "step": 57070 }, { "epoch": 1.6828836797877047, "grad_norm": 2.8338176460865405, "learning_rate": 3.60658424323513e-06, "loss": 1.2961, "step": 57075 }, { "epoch": 1.683031107179714, "grad_norm": 2.8175956742146773, "learning_rate": 3.605941439092967e-06, "loss": 1.3275, "step": 57080 }, { "epoch": 1.6831785345717234, "grad_norm": 2.8344185329146527, "learning_rate": 3.6052986391898902e-06, "loss": 1.286, "step": 57085 }, { "epoch": 1.6833259619637329, "grad_norm": 2.8787919205222505, "learning_rate": 3.604655843544817e-06, "loss": 1.2856, "step": 57090 }, { "epoch": 1.6834733893557423, "grad_norm": 2.8085078667255647, "learning_rate": 3.604013052176662e-06, "loss": 1.3267, "step": 57095 }, { "epoch": 1.6836208167477518, "grad_norm": 2.8360458416800216, "learning_rate": 3.6033702651043393e-06, "loss": 1.3213, "step": 57100 }, { "epoch": 1.683768244139761, "grad_norm": 2.812905645445266, "learning_rate": 3.602727482346763e-06, "loss": 1.3262, "step": 57105 }, { "epoch": 1.6839156715317705, "grad_norm": 2.858584062563181, "learning_rate": 3.6020847039228503e-06, "loss": 1.3481, "step": 57110 }, { "epoch": 1.68406309892378, "grad_norm": 2.9017316620958953, "learning_rate": 3.6014419298515132e-06, "loss": 1.3327, "step": 57115 }, { "epoch": 1.6842105263157894, "grad_norm": 2.9073882321734215, "learning_rate": 3.6007991601516683e-06, "loss": 1.3334, "step": 57120 }, { "epoch": 1.6843579537077988, "grad_norm": 2.936934668733007, "learning_rate": 3.600156394842226e-06, "loss": 1.2935, "step": 57125 }, { "epoch": 1.6845053810998083, "grad_norm": 2.6842893743989826, "learning_rate": 3.599513633942105e-06, "loss": 1.3408, "step": 57130 }, { "epoch": 1.6846528084918178, "grad_norm": 2.9452559456110943, "learning_rate": 3.598870877470215e-06, "loss": 1.3461, "step": 57135 }, { "epoch": 1.6848002358838272, "grad_norm": 2.7744041567793074, "learning_rate": 3.598228125445474e-06, "loss": 1.243, "step": 57140 }, { "epoch": 1.6849476632758367, "grad_norm": 2.8444218663273277, "learning_rate": 3.597585377886794e-06, "loss": 1.3056, "step": 57145 }, { "epoch": 1.6850950906678461, "grad_norm": 2.7176329023805996, "learning_rate": 3.596942634813087e-06, "loss": 1.3546, "step": 57150 }, { "epoch": 1.6852425180598556, "grad_norm": 2.79922454658256, "learning_rate": 3.596299896243269e-06, "loss": 1.2978, "step": 57155 }, { "epoch": 1.685389945451865, "grad_norm": 2.82171375302639, "learning_rate": 3.59565716219625e-06, "loss": 1.2697, "step": 57160 }, { "epoch": 1.6855373728438745, "grad_norm": 2.8858677616463377, "learning_rate": 3.595014432690947e-06, "loss": 1.2848, "step": 57165 }, { "epoch": 1.685684800235884, "grad_norm": 2.9228778327450713, "learning_rate": 3.5943717077462706e-06, "loss": 1.326, "step": 57170 }, { "epoch": 1.6858322276278934, "grad_norm": 2.866235942672691, "learning_rate": 3.5937289873811354e-06, "loss": 1.3093, "step": 57175 }, { "epoch": 1.6859796550199027, "grad_norm": 2.765997104610701, "learning_rate": 3.593086271614451e-06, "loss": 1.309, "step": 57180 }, { "epoch": 1.6861270824119121, "grad_norm": 2.69636294021264, "learning_rate": 3.5924435604651343e-06, "loss": 1.2592, "step": 57185 }, { "epoch": 1.6862745098039216, "grad_norm": 2.9439302724308303, "learning_rate": 3.591800853952094e-06, "loss": 1.3248, "step": 57190 }, { "epoch": 1.686421937195931, "grad_norm": 2.793705093863409, "learning_rate": 3.591158152094246e-06, "loss": 1.2979, "step": 57195 }, { "epoch": 1.6865693645879405, "grad_norm": 2.8767753327975107, "learning_rate": 3.5905154549104996e-06, "loss": 1.2849, "step": 57200 }, { "epoch": 1.6867167919799497, "grad_norm": 2.6579417118315787, "learning_rate": 3.589872762419769e-06, "loss": 1.2253, "step": 57205 }, { "epoch": 1.6868642193719592, "grad_norm": 2.8422746215368426, "learning_rate": 3.589230074640965e-06, "loss": 1.307, "step": 57210 }, { "epoch": 1.6870116467639686, "grad_norm": 2.800595618182163, "learning_rate": 3.5885873915929984e-06, "loss": 1.2696, "step": 57215 }, { "epoch": 1.687159074155978, "grad_norm": 2.832287631297388, "learning_rate": 3.5879447132947834e-06, "loss": 1.3143, "step": 57220 }, { "epoch": 1.6873065015479876, "grad_norm": 2.68331002976512, "learning_rate": 3.58730203976523e-06, "loss": 1.2669, "step": 57225 }, { "epoch": 1.687453928939997, "grad_norm": 2.7803205937947966, "learning_rate": 3.5866593710232502e-06, "loss": 1.3107, "step": 57230 }, { "epoch": 1.6876013563320065, "grad_norm": 2.8166684557423123, "learning_rate": 3.5860167070877537e-06, "loss": 1.2979, "step": 57235 }, { "epoch": 1.687748783724016, "grad_norm": 2.837769224055164, "learning_rate": 3.585374047977654e-06, "loss": 1.2705, "step": 57240 }, { "epoch": 1.6878962111160254, "grad_norm": 2.951511289771479, "learning_rate": 3.58473139371186e-06, "loss": 1.3174, "step": 57245 }, { "epoch": 1.6880436385080348, "grad_norm": 2.8248452201899887, "learning_rate": 3.584088744309285e-06, "loss": 1.3204, "step": 57250 }, { "epoch": 1.6881910659000443, "grad_norm": 2.6853388277312673, "learning_rate": 3.5834460997888358e-06, "loss": 1.2523, "step": 57255 }, { "epoch": 1.6883384932920538, "grad_norm": 2.8474194873505, "learning_rate": 3.5828034601694274e-06, "loss": 1.3037, "step": 57260 }, { "epoch": 1.6884859206840632, "grad_norm": 2.753541024056482, "learning_rate": 3.5821608254699663e-06, "loss": 1.3037, "step": 57265 }, { "epoch": 1.6886333480760727, "grad_norm": 2.8102982132016403, "learning_rate": 3.5815181957093663e-06, "loss": 1.3107, "step": 57270 }, { "epoch": 1.688780775468082, "grad_norm": 2.8967119850984284, "learning_rate": 3.5808755709065353e-06, "loss": 1.3029, "step": 57275 }, { "epoch": 1.6889282028600914, "grad_norm": 2.764721314470492, "learning_rate": 3.5802329510803837e-06, "loss": 1.2981, "step": 57280 }, { "epoch": 1.6890756302521008, "grad_norm": 2.723719938015296, "learning_rate": 3.5795903362498215e-06, "loss": 1.3047, "step": 57285 }, { "epoch": 1.6892230576441103, "grad_norm": 2.800580224380625, "learning_rate": 3.578947726433757e-06, "loss": 1.3377, "step": 57290 }, { "epoch": 1.6893704850361198, "grad_norm": 2.8662394603788885, "learning_rate": 3.5783051216511027e-06, "loss": 1.2991, "step": 57295 }, { "epoch": 1.689517912428129, "grad_norm": 2.7197414781035674, "learning_rate": 3.577662521920765e-06, "loss": 1.3106, "step": 57300 }, { "epoch": 1.6896653398201384, "grad_norm": 2.753214435570557, "learning_rate": 3.5770199272616555e-06, "loss": 1.3014, "step": 57305 }, { "epoch": 1.689812767212148, "grad_norm": 2.911432354881098, "learning_rate": 3.5763773376926803e-06, "loss": 1.3212, "step": 57310 }, { "epoch": 1.6899601946041574, "grad_norm": 2.8429932242873632, "learning_rate": 3.575734753232752e-06, "loss": 1.2727, "step": 57315 }, { "epoch": 1.6901076219961668, "grad_norm": 2.8108414544327074, "learning_rate": 3.575092173900776e-06, "loss": 1.3166, "step": 57320 }, { "epoch": 1.6902550493881763, "grad_norm": 2.841275399216115, "learning_rate": 3.574449599715664e-06, "loss": 1.2881, "step": 57325 }, { "epoch": 1.6904024767801857, "grad_norm": 2.7478367565280286, "learning_rate": 3.5738070306963224e-06, "loss": 1.3537, "step": 57330 }, { "epoch": 1.6905499041721952, "grad_norm": 2.790743629705465, "learning_rate": 3.5731644668616604e-06, "loss": 1.2913, "step": 57335 }, { "epoch": 1.6906973315642047, "grad_norm": 2.77371568362723, "learning_rate": 3.5725219082305865e-06, "loss": 1.2605, "step": 57340 }, { "epoch": 1.6908447589562141, "grad_norm": 2.8790081382591475, "learning_rate": 3.571879354822006e-06, "loss": 1.3112, "step": 57345 }, { "epoch": 1.6909921863482236, "grad_norm": 2.8264852358055377, "learning_rate": 3.571236806654831e-06, "loss": 1.3102, "step": 57350 }, { "epoch": 1.691139613740233, "grad_norm": 2.8859176962238497, "learning_rate": 3.570594263747966e-06, "loss": 1.3559, "step": 57355 }, { "epoch": 1.6912870411322425, "grad_norm": 2.8246063880075876, "learning_rate": 3.5699517261203203e-06, "loss": 1.3227, "step": 57360 }, { "epoch": 1.691434468524252, "grad_norm": 2.9025735715368612, "learning_rate": 3.5693091937907993e-06, "loss": 1.2642, "step": 57365 }, { "epoch": 1.6915818959162614, "grad_norm": 2.7937251449415372, "learning_rate": 3.568666666778313e-06, "loss": 1.2909, "step": 57370 }, { "epoch": 1.6917293233082706, "grad_norm": 2.76242472373724, "learning_rate": 3.5680241451017666e-06, "loss": 1.2789, "step": 57375 }, { "epoch": 1.69187675070028, "grad_norm": 2.9095168166743783, "learning_rate": 3.567381628780068e-06, "loss": 1.3376, "step": 57380 }, { "epoch": 1.6920241780922896, "grad_norm": 2.857781699498376, "learning_rate": 3.566739117832122e-06, "loss": 1.3546, "step": 57385 }, { "epoch": 1.692171605484299, "grad_norm": 2.8047959315901596, "learning_rate": 3.5660966122768388e-06, "loss": 1.274, "step": 57390 }, { "epoch": 1.6923190328763085, "grad_norm": 2.857947586825322, "learning_rate": 3.5654541121331205e-06, "loss": 1.3158, "step": 57395 }, { "epoch": 1.6924664602683177, "grad_norm": 2.826885359943478, "learning_rate": 3.5648116174198775e-06, "loss": 1.2801, "step": 57400 }, { "epoch": 1.6926138876603272, "grad_norm": 2.81052652389845, "learning_rate": 3.564169128156014e-06, "loss": 1.2648, "step": 57405 }, { "epoch": 1.6927613150523366, "grad_norm": 2.8200236947088597, "learning_rate": 3.5635266443604353e-06, "loss": 1.2932, "step": 57410 }, { "epoch": 1.692908742444346, "grad_norm": 2.8291214361201447, "learning_rate": 3.5628841660520495e-06, "loss": 1.311, "step": 57415 }, { "epoch": 1.6930561698363555, "grad_norm": 2.842595215724373, "learning_rate": 3.562241693249758e-06, "loss": 1.2333, "step": 57420 }, { "epoch": 1.693203597228365, "grad_norm": 2.798052613312174, "learning_rate": 3.5615992259724717e-06, "loss": 1.2555, "step": 57425 }, { "epoch": 1.6933510246203745, "grad_norm": 2.8038393064425424, "learning_rate": 3.560956764239092e-06, "loss": 1.2716, "step": 57430 }, { "epoch": 1.693498452012384, "grad_norm": 2.8049188574215145, "learning_rate": 3.5603143080685265e-06, "loss": 1.282, "step": 57435 }, { "epoch": 1.6936458794043934, "grad_norm": 2.793378797029457, "learning_rate": 3.559671857479677e-06, "loss": 1.3329, "step": 57440 }, { "epoch": 1.6937933067964028, "grad_norm": 2.766477055741486, "learning_rate": 3.5590294124914524e-06, "loss": 1.3043, "step": 57445 }, { "epoch": 1.6939407341884123, "grad_norm": 2.7962722862488345, "learning_rate": 3.5583869731227535e-06, "loss": 1.2824, "step": 57450 }, { "epoch": 1.6940881615804217, "grad_norm": 2.816416690529999, "learning_rate": 3.557744539392489e-06, "loss": 1.2812, "step": 57455 }, { "epoch": 1.6942355889724312, "grad_norm": 2.9489618752443207, "learning_rate": 3.5571021113195596e-06, "loss": 1.3235, "step": 57460 }, { "epoch": 1.6943830163644407, "grad_norm": 2.8245031757932764, "learning_rate": 3.5564596889228712e-06, "loss": 1.2718, "step": 57465 }, { "epoch": 1.69453044375645, "grad_norm": 2.8487818490009365, "learning_rate": 3.5558172722213283e-06, "loss": 1.3057, "step": 57470 }, { "epoch": 1.6946778711484594, "grad_norm": 2.796681312197314, "learning_rate": 3.555174861233832e-06, "loss": 1.3192, "step": 57475 }, { "epoch": 1.6948252985404688, "grad_norm": 2.745668293267448, "learning_rate": 3.5545324559792896e-06, "loss": 1.2793, "step": 57480 }, { "epoch": 1.6949727259324783, "grad_norm": 2.7542073810749064, "learning_rate": 3.5538900564766016e-06, "loss": 1.3011, "step": 57485 }, { "epoch": 1.6951201533244877, "grad_norm": 2.7421099364201513, "learning_rate": 3.5532476627446737e-06, "loss": 1.3318, "step": 57490 }, { "epoch": 1.695267580716497, "grad_norm": 2.823710498385027, "learning_rate": 3.5526052748024065e-06, "loss": 1.2989, "step": 57495 }, { "epoch": 1.6954150081085064, "grad_norm": 2.787610747145795, "learning_rate": 3.5519628926687064e-06, "loss": 1.2694, "step": 57500 }, { "epoch": 1.6954150081085064, "eval_loss": 1.0815422534942627, "eval_runtime": 4.1831, "eval_samples_per_second": 94.666, "eval_steps_per_second": 3.108, "step": 57500 }, { "epoch": 1.6955624355005159, "grad_norm": 2.870925685067824, "learning_rate": 3.551320516362473e-06, "loss": 1.318, "step": 57505 }, { "epoch": 1.6957098628925253, "grad_norm": 2.6982843253704596, "learning_rate": 3.5506781459026114e-06, "loss": 1.2908, "step": 57510 }, { "epoch": 1.6958572902845348, "grad_norm": 2.743307854412444, "learning_rate": 3.5500357813080206e-06, "loss": 1.3244, "step": 57515 }, { "epoch": 1.6960047176765443, "grad_norm": 2.8647026898399393, "learning_rate": 3.5493934225976075e-06, "loss": 1.2496, "step": 57520 }, { "epoch": 1.6961521450685537, "grad_norm": 2.836132227408544, "learning_rate": 3.548751069790271e-06, "loss": 1.3096, "step": 57525 }, { "epoch": 1.6962995724605632, "grad_norm": 2.8550398366352496, "learning_rate": 3.5481087229049146e-06, "loss": 1.278, "step": 57530 }, { "epoch": 1.6964469998525726, "grad_norm": 2.8026868840804027, "learning_rate": 3.54746638196044e-06, "loss": 1.2934, "step": 57535 }, { "epoch": 1.696594427244582, "grad_norm": 2.857174583421712, "learning_rate": 3.5468240469757475e-06, "loss": 1.2737, "step": 57540 }, { "epoch": 1.6967418546365916, "grad_norm": 2.8131446559385442, "learning_rate": 3.5461817179697406e-06, "loss": 1.3303, "step": 57545 }, { "epoch": 1.696889282028601, "grad_norm": 2.7011033833349076, "learning_rate": 3.5455393949613173e-06, "loss": 1.2534, "step": 57550 }, { "epoch": 1.6970367094206105, "grad_norm": 2.713926479840991, "learning_rate": 3.5448970779693826e-06, "loss": 1.3, "step": 57555 }, { "epoch": 1.69718413681262, "grad_norm": 2.767161338128999, "learning_rate": 3.5442547670128347e-06, "loss": 1.3367, "step": 57560 }, { "epoch": 1.6973315642046294, "grad_norm": 2.619247508497955, "learning_rate": 3.5436124621105763e-06, "loss": 1.2977, "step": 57565 }, { "epoch": 1.6974789915966386, "grad_norm": 2.8785759027458564, "learning_rate": 3.5429701632815047e-06, "loss": 1.3005, "step": 57570 }, { "epoch": 1.697626418988648, "grad_norm": 2.8633865796139175, "learning_rate": 3.5423278705445245e-06, "loss": 1.3307, "step": 57575 }, { "epoch": 1.6977738463806575, "grad_norm": 2.7729718603240583, "learning_rate": 3.541685583918532e-06, "loss": 1.2597, "step": 57580 }, { "epoch": 1.697921273772667, "grad_norm": 2.804373882157449, "learning_rate": 3.541043303422431e-06, "loss": 1.2563, "step": 57585 }, { "epoch": 1.6980687011646762, "grad_norm": 2.854816372098361, "learning_rate": 3.540401029075118e-06, "loss": 1.2861, "step": 57590 }, { "epoch": 1.6982161285566857, "grad_norm": 2.743577501056839, "learning_rate": 3.539758760895496e-06, "loss": 1.29, "step": 57595 }, { "epoch": 1.6983635559486951, "grad_norm": 2.8354429871448903, "learning_rate": 3.539116498902461e-06, "loss": 1.2661, "step": 57600 }, { "epoch": 1.6985109833407046, "grad_norm": 2.946149636037394, "learning_rate": 3.538474243114913e-06, "loss": 1.2807, "step": 57605 }, { "epoch": 1.698658410732714, "grad_norm": 2.8588919546560088, "learning_rate": 3.5378319935517536e-06, "loss": 1.3266, "step": 57610 }, { "epoch": 1.6988058381247235, "grad_norm": 2.82089156848326, "learning_rate": 3.5371897502318796e-06, "loss": 1.318, "step": 57615 }, { "epoch": 1.698953265516733, "grad_norm": 2.8800149315346624, "learning_rate": 3.5365475131741904e-06, "loss": 1.212, "step": 57620 }, { "epoch": 1.6991006929087424, "grad_norm": 2.7486102041500953, "learning_rate": 3.5359052823975826e-06, "loss": 1.315, "step": 57625 }, { "epoch": 1.699248120300752, "grad_norm": 2.7899891321583787, "learning_rate": 3.5352630579209586e-06, "loss": 1.2936, "step": 57630 }, { "epoch": 1.6993955476927614, "grad_norm": 2.8255125205401876, "learning_rate": 3.5346208397632134e-06, "loss": 1.3515, "step": 57635 }, { "epoch": 1.6995429750847708, "grad_norm": 2.8309092360580665, "learning_rate": 3.533978627943246e-06, "loss": 1.2898, "step": 57640 }, { "epoch": 1.6996904024767803, "grad_norm": 2.7691643793205647, "learning_rate": 3.5333364224799533e-06, "loss": 1.294, "step": 57645 }, { "epoch": 1.6998378298687897, "grad_norm": 2.7675491443128033, "learning_rate": 3.5326942233922355e-06, "loss": 1.3179, "step": 57650 }, { "epoch": 1.6999852572607992, "grad_norm": 2.818769599371006, "learning_rate": 3.5320520306989863e-06, "loss": 1.3176, "step": 57655 }, { "epoch": 1.7001326846528086, "grad_norm": 2.838856902765387, "learning_rate": 3.5314098444191067e-06, "loss": 1.2739, "step": 57660 }, { "epoch": 1.7002801120448179, "grad_norm": 2.720633776319307, "learning_rate": 3.530767664571492e-06, "loss": 1.2722, "step": 57665 }, { "epoch": 1.7004275394368273, "grad_norm": 2.967778252869602, "learning_rate": 3.5301254911750382e-06, "loss": 1.3149, "step": 57670 }, { "epoch": 1.7005749668288368, "grad_norm": 2.881853990760465, "learning_rate": 3.529483324248644e-06, "loss": 1.2871, "step": 57675 }, { "epoch": 1.7007223942208463, "grad_norm": 2.8089579314738775, "learning_rate": 3.528841163811203e-06, "loss": 1.3347, "step": 57680 }, { "epoch": 1.7008698216128557, "grad_norm": 2.8036233723061152, "learning_rate": 3.5281990098816144e-06, "loss": 1.2934, "step": 57685 }, { "epoch": 1.701017249004865, "grad_norm": 2.83796119672723, "learning_rate": 3.527556862478773e-06, "loss": 1.3376, "step": 57690 }, { "epoch": 1.7011646763968744, "grad_norm": 2.7117739089367823, "learning_rate": 3.5269147216215752e-06, "loss": 1.341, "step": 57695 }, { "epoch": 1.7013121037888839, "grad_norm": 2.8308179528316284, "learning_rate": 3.5262725873289154e-06, "loss": 1.313, "step": 57700 }, { "epoch": 1.7014595311808933, "grad_norm": 2.9395602122889692, "learning_rate": 3.525630459619692e-06, "loss": 1.31, "step": 57705 }, { "epoch": 1.7016069585729028, "grad_norm": 2.7885492092060606, "learning_rate": 3.5249883385127967e-06, "loss": 1.268, "step": 57710 }, { "epoch": 1.7017543859649122, "grad_norm": 2.867384817985183, "learning_rate": 3.5243462240271273e-06, "loss": 1.3016, "step": 57715 }, { "epoch": 1.7019018133569217, "grad_norm": 2.8970988493564107, "learning_rate": 3.523704116181578e-06, "loss": 1.3167, "step": 57720 }, { "epoch": 1.7020492407489312, "grad_norm": 2.825597690586022, "learning_rate": 3.5230620149950437e-06, "loss": 1.3181, "step": 57725 }, { "epoch": 1.7021966681409406, "grad_norm": 2.9174526522313524, "learning_rate": 3.5224199204864195e-06, "loss": 1.3092, "step": 57730 }, { "epoch": 1.70234409553295, "grad_norm": 2.9127973275060612, "learning_rate": 3.521777832674597e-06, "loss": 1.2964, "step": 57735 }, { "epoch": 1.7024915229249595, "grad_norm": 2.857356025685101, "learning_rate": 3.521135751578474e-06, "loss": 1.3092, "step": 57740 }, { "epoch": 1.702638950316969, "grad_norm": 3.0073469046992667, "learning_rate": 3.520493677216942e-06, "loss": 1.3503, "step": 57745 }, { "epoch": 1.7027863777089784, "grad_norm": 2.8266788108116114, "learning_rate": 3.5198516096088965e-06, "loss": 1.2996, "step": 57750 }, { "epoch": 1.702933805100988, "grad_norm": 2.746289952524026, "learning_rate": 3.519209548773228e-06, "loss": 1.3054, "step": 57755 }, { "epoch": 1.7030812324929971, "grad_norm": 2.785387302226401, "learning_rate": 3.5185674947288343e-06, "loss": 1.264, "step": 57760 }, { "epoch": 1.7032286598850066, "grad_norm": 2.790534929273623, "learning_rate": 3.5179254474946057e-06, "loss": 1.3307, "step": 57765 }, { "epoch": 1.703376087277016, "grad_norm": 3.035600510944872, "learning_rate": 3.5172834070894363e-06, "loss": 1.3172, "step": 57770 }, { "epoch": 1.7035235146690255, "grad_norm": 2.7974033885156806, "learning_rate": 3.5166413735322166e-06, "loss": 1.3138, "step": 57775 }, { "epoch": 1.703670942061035, "grad_norm": 2.8083402468896046, "learning_rate": 3.515999346841843e-06, "loss": 1.314, "step": 57780 }, { "epoch": 1.7038183694530442, "grad_norm": 2.752310823470103, "learning_rate": 3.515357327037204e-06, "loss": 1.2724, "step": 57785 }, { "epoch": 1.7039657968450537, "grad_norm": 2.9618259788316923, "learning_rate": 3.514715314137195e-06, "loss": 1.3384, "step": 57790 }, { "epoch": 1.7041132242370631, "grad_norm": 2.880932334161403, "learning_rate": 3.514073308160707e-06, "loss": 1.2821, "step": 57795 }, { "epoch": 1.7042606516290726, "grad_norm": 2.8378784999645554, "learning_rate": 3.5134313091266302e-06, "loss": 1.3189, "step": 57800 }, { "epoch": 1.704408079021082, "grad_norm": 2.737982789424893, "learning_rate": 3.512789317053858e-06, "loss": 1.3118, "step": 57805 }, { "epoch": 1.7045555064130915, "grad_norm": 2.9538185775776022, "learning_rate": 3.5121473319612796e-06, "loss": 1.2989, "step": 57810 }, { "epoch": 1.704702933805101, "grad_norm": 2.8374454440809416, "learning_rate": 3.5115053538677886e-06, "loss": 1.2719, "step": 57815 }, { "epoch": 1.7048503611971104, "grad_norm": 2.7778163735586987, "learning_rate": 3.510863382792275e-06, "loss": 1.3484, "step": 57820 }, { "epoch": 1.7049977885891199, "grad_norm": 2.8503543953297332, "learning_rate": 3.510221418753629e-06, "loss": 1.2938, "step": 57825 }, { "epoch": 1.7051452159811293, "grad_norm": 2.9210319523193498, "learning_rate": 3.5095794617707413e-06, "loss": 1.3158, "step": 57830 }, { "epoch": 1.7052926433731388, "grad_norm": 2.73909223563205, "learning_rate": 3.5089375118625034e-06, "loss": 1.3155, "step": 57835 }, { "epoch": 1.7054400707651483, "grad_norm": 2.8688048231244654, "learning_rate": 3.508295569047803e-06, "loss": 1.345, "step": 57840 }, { "epoch": 1.7055874981571577, "grad_norm": 2.8891711041012527, "learning_rate": 3.5076536333455333e-06, "loss": 1.2453, "step": 57845 }, { "epoch": 1.7057349255491672, "grad_norm": 2.8801477170082848, "learning_rate": 3.507011704774581e-06, "loss": 1.2294, "step": 57850 }, { "epoch": 1.7058823529411766, "grad_norm": 2.864613156116145, "learning_rate": 3.5063697833538376e-06, "loss": 1.296, "step": 57855 }, { "epoch": 1.7060297803331859, "grad_norm": 2.7327767662572255, "learning_rate": 3.505727869102192e-06, "loss": 1.2631, "step": 57860 }, { "epoch": 1.7061772077251953, "grad_norm": 2.792064318632416, "learning_rate": 3.5050859620385305e-06, "loss": 1.3253, "step": 57865 }, { "epoch": 1.7063246351172048, "grad_norm": 2.7554028210694264, "learning_rate": 3.504444062181746e-06, "loss": 1.3301, "step": 57870 }, { "epoch": 1.7064720625092142, "grad_norm": 2.87466373064823, "learning_rate": 3.5038021695507247e-06, "loss": 1.2565, "step": 57875 }, { "epoch": 1.7066194899012237, "grad_norm": 2.829952395235379, "learning_rate": 3.5031602841643563e-06, "loss": 1.2497, "step": 57880 }, { "epoch": 1.706766917293233, "grad_norm": 2.7661248534109775, "learning_rate": 3.502518406041527e-06, "loss": 1.298, "step": 57885 }, { "epoch": 1.7069143446852424, "grad_norm": 3.072315032411548, "learning_rate": 3.501876535201128e-06, "loss": 1.3173, "step": 57890 }, { "epoch": 1.7070617720772518, "grad_norm": 2.9165001677734845, "learning_rate": 3.501234671662044e-06, "loss": 1.2442, "step": 57895 }, { "epoch": 1.7072091994692613, "grad_norm": 2.7958164765073135, "learning_rate": 3.500592815443165e-06, "loss": 1.2769, "step": 57900 }, { "epoch": 1.7073566268612708, "grad_norm": 2.861805382166967, "learning_rate": 3.4999509665633753e-06, "loss": 1.2705, "step": 57905 }, { "epoch": 1.7075040542532802, "grad_norm": 2.917189750085802, "learning_rate": 3.4993091250415657e-06, "loss": 1.3253, "step": 57910 }, { "epoch": 1.7076514816452897, "grad_norm": 2.816760667320238, "learning_rate": 3.49866729089662e-06, "loss": 1.3, "step": 57915 }, { "epoch": 1.7077989090372991, "grad_norm": 2.844446402837887, "learning_rate": 3.4980254641474276e-06, "loss": 1.3264, "step": 57920 }, { "epoch": 1.7079463364293086, "grad_norm": 2.905285949707857, "learning_rate": 3.4973836448128735e-06, "loss": 1.3116, "step": 57925 }, { "epoch": 1.708093763821318, "grad_norm": 2.7628102515934003, "learning_rate": 3.4967418329118433e-06, "loss": 1.2845, "step": 57930 }, { "epoch": 1.7082411912133275, "grad_norm": 2.799342711136395, "learning_rate": 3.496100028463225e-06, "loss": 1.3047, "step": 57935 }, { "epoch": 1.708388618605337, "grad_norm": 2.7454920402588106, "learning_rate": 3.4954582314859017e-06, "loss": 1.319, "step": 57940 }, { "epoch": 1.7085360459973464, "grad_norm": 2.915807279066314, "learning_rate": 3.4948164419987618e-06, "loss": 1.2706, "step": 57945 }, { "epoch": 1.708683473389356, "grad_norm": 3.7485599627431476, "learning_rate": 3.494174660020689e-06, "loss": 1.3136, "step": 57950 }, { "epoch": 1.7088309007813651, "grad_norm": 2.8908912984272446, "learning_rate": 3.4935328855705697e-06, "loss": 1.3845, "step": 57955 }, { "epoch": 1.7089783281733746, "grad_norm": 2.781978876479711, "learning_rate": 3.4928911186672864e-06, "loss": 1.3105, "step": 57960 }, { "epoch": 1.709125755565384, "grad_norm": 2.770219221646631, "learning_rate": 3.4922493593297273e-06, "loss": 1.2947, "step": 57965 }, { "epoch": 1.7092731829573935, "grad_norm": 2.888472207880248, "learning_rate": 3.491607607576773e-06, "loss": 1.235, "step": 57970 }, { "epoch": 1.709420610349403, "grad_norm": 2.821633390290198, "learning_rate": 3.490965863427312e-06, "loss": 1.3143, "step": 57975 }, { "epoch": 1.7095680377414122, "grad_norm": 2.7879167567559016, "learning_rate": 3.4903241269002257e-06, "loss": 1.327, "step": 57980 }, { "epoch": 1.7097154651334217, "grad_norm": 2.6828468199353344, "learning_rate": 3.4896823980143984e-06, "loss": 1.2491, "step": 57985 }, { "epoch": 1.709862892525431, "grad_norm": 2.80640334983822, "learning_rate": 3.4890406767887144e-06, "loss": 1.307, "step": 57990 }, { "epoch": 1.7100103199174406, "grad_norm": 2.757957980701326, "learning_rate": 3.488398963242055e-06, "loss": 1.3184, "step": 57995 }, { "epoch": 1.71015774730945, "grad_norm": 2.763725978976806, "learning_rate": 3.4877572573933063e-06, "loss": 1.2786, "step": 58000 }, { "epoch": 1.71015774730945, "eval_loss": 1.0809130668640137, "eval_runtime": 4.2394, "eval_samples_per_second": 93.41, "eval_steps_per_second": 3.067, "step": 58000 }, { "epoch": 1.7103051747014595, "grad_norm": 2.6756921739391553, "learning_rate": 3.4871155592613485e-06, "loss": 1.2942, "step": 58005 }, { "epoch": 1.710452602093469, "grad_norm": 2.7472168393651555, "learning_rate": 3.486473868865067e-06, "loss": 1.3337, "step": 58010 }, { "epoch": 1.7106000294854784, "grad_norm": 2.910453287301519, "learning_rate": 3.485832186223341e-06, "loss": 1.311, "step": 58015 }, { "epoch": 1.7107474568774879, "grad_norm": 2.820980571462123, "learning_rate": 3.485190511355056e-06, "loss": 1.3157, "step": 58020 }, { "epoch": 1.7108948842694973, "grad_norm": 2.7504229925964054, "learning_rate": 3.484548844279092e-06, "loss": 1.2539, "step": 58025 }, { "epoch": 1.7110423116615068, "grad_norm": 2.784295728653331, "learning_rate": 3.4839071850143317e-06, "loss": 1.3066, "step": 58030 }, { "epoch": 1.7111897390535162, "grad_norm": 2.7610157021223767, "learning_rate": 3.4832655335796555e-06, "loss": 1.2794, "step": 58035 }, { "epoch": 1.7113371664455257, "grad_norm": 2.903840338108573, "learning_rate": 3.4826238899939466e-06, "loss": 1.3119, "step": 58040 }, { "epoch": 1.7114845938375352, "grad_norm": 2.8888076658302, "learning_rate": 3.4819822542760833e-06, "loss": 1.3074, "step": 58045 }, { "epoch": 1.7116320212295446, "grad_norm": 2.745310800521748, "learning_rate": 3.4813406264449504e-06, "loss": 1.2526, "step": 58050 }, { "epoch": 1.7117794486215538, "grad_norm": 2.7855842989171715, "learning_rate": 3.480699006519426e-06, "loss": 1.2657, "step": 58055 }, { "epoch": 1.7119268760135633, "grad_norm": 2.8951200653762617, "learning_rate": 3.4800573945183903e-06, "loss": 1.2529, "step": 58060 }, { "epoch": 1.7120743034055728, "grad_norm": 2.8976784993447513, "learning_rate": 3.4794157904607245e-06, "loss": 1.3222, "step": 58065 }, { "epoch": 1.7122217307975822, "grad_norm": 2.7891891967931013, "learning_rate": 3.478774194365306e-06, "loss": 1.2612, "step": 58070 }, { "epoch": 1.7123691581895917, "grad_norm": 2.7927443682125372, "learning_rate": 3.478132606251019e-06, "loss": 1.2535, "step": 58075 }, { "epoch": 1.712516585581601, "grad_norm": 2.884690922333757, "learning_rate": 3.4774910261367395e-06, "loss": 1.3587, "step": 58080 }, { "epoch": 1.7126640129736104, "grad_norm": 2.9380444824675434, "learning_rate": 3.4768494540413483e-06, "loss": 1.3184, "step": 58085 }, { "epoch": 1.7128114403656198, "grad_norm": 2.7228952346927704, "learning_rate": 3.476207889983722e-06, "loss": 1.3138, "step": 58090 }, { "epoch": 1.7129588677576293, "grad_norm": 2.805461810225563, "learning_rate": 3.4755663339827433e-06, "loss": 1.3281, "step": 58095 }, { "epoch": 1.7131062951496387, "grad_norm": 2.699582726832402, "learning_rate": 3.474924786057287e-06, "loss": 1.2621, "step": 58100 }, { "epoch": 1.7132537225416482, "grad_norm": 2.6348321462549995, "learning_rate": 3.4742832462262344e-06, "loss": 1.2544, "step": 58105 }, { "epoch": 1.7134011499336577, "grad_norm": 2.8223219528762846, "learning_rate": 3.473641714508461e-06, "loss": 1.2566, "step": 58110 }, { "epoch": 1.7135485773256671, "grad_norm": 2.820204332910986, "learning_rate": 3.4730001909228467e-06, "loss": 1.2865, "step": 58115 }, { "epoch": 1.7136960047176766, "grad_norm": 2.766106096148054, "learning_rate": 3.4723586754882684e-06, "loss": 1.3177, "step": 58120 }, { "epoch": 1.713843432109686, "grad_norm": 2.777164858508151, "learning_rate": 3.4717171682236016e-06, "loss": 1.3059, "step": 58125 }, { "epoch": 1.7139908595016955, "grad_norm": 2.683459001872919, "learning_rate": 3.471075669147726e-06, "loss": 1.2922, "step": 58130 }, { "epoch": 1.714138286893705, "grad_norm": 2.810662002763367, "learning_rate": 3.470434178279517e-06, "loss": 1.3015, "step": 58135 }, { "epoch": 1.7142857142857144, "grad_norm": 2.808285305972749, "learning_rate": 3.4697926956378515e-06, "loss": 1.3027, "step": 58140 }, { "epoch": 1.7144331416777239, "grad_norm": 2.815963153063468, "learning_rate": 3.469151221241605e-06, "loss": 1.3024, "step": 58145 }, { "epoch": 1.714580569069733, "grad_norm": 2.7680230052227843, "learning_rate": 3.4685097551096563e-06, "loss": 1.3162, "step": 58150 }, { "epoch": 1.7147279964617426, "grad_norm": 2.8874935740003376, "learning_rate": 3.467868297260878e-06, "loss": 1.3009, "step": 58155 }, { "epoch": 1.714875423853752, "grad_norm": 2.8139728337158827, "learning_rate": 3.467226847714148e-06, "loss": 1.2992, "step": 58160 }, { "epoch": 1.7150228512457615, "grad_norm": 2.8184838375639414, "learning_rate": 3.46658540648834e-06, "loss": 1.2866, "step": 58165 }, { "epoch": 1.715170278637771, "grad_norm": 2.7465633541857057, "learning_rate": 3.4659439736023314e-06, "loss": 1.2847, "step": 58170 }, { "epoch": 1.7153177060297802, "grad_norm": 2.78033552043611, "learning_rate": 3.4653025490749937e-06, "loss": 1.3064, "step": 58175 }, { "epoch": 1.7154651334217896, "grad_norm": 2.865551114902711, "learning_rate": 3.4646611329252057e-06, "loss": 1.3023, "step": 58180 }, { "epoch": 1.715612560813799, "grad_norm": 2.9115836497062224, "learning_rate": 3.4640197251718398e-06, "loss": 1.2774, "step": 58185 }, { "epoch": 1.7157599882058086, "grad_norm": 2.823197083999473, "learning_rate": 3.463378325833768e-06, "loss": 1.2518, "step": 58190 }, { "epoch": 1.715907415597818, "grad_norm": 2.7843781733464765, "learning_rate": 3.4627369349298684e-06, "loss": 1.2806, "step": 58195 }, { "epoch": 1.7160548429898275, "grad_norm": 2.7107297906622927, "learning_rate": 3.4620955524790105e-06, "loss": 1.2791, "step": 58200 }, { "epoch": 1.716202270381837, "grad_norm": 2.8382940822628293, "learning_rate": 3.4614541785000716e-06, "loss": 1.2937, "step": 58205 }, { "epoch": 1.7163496977738464, "grad_norm": 2.7005231804026772, "learning_rate": 3.4608128130119217e-06, "loss": 1.3211, "step": 58210 }, { "epoch": 1.7164971251658558, "grad_norm": 2.8206299600158373, "learning_rate": 3.4601714560334363e-06, "loss": 1.2615, "step": 58215 }, { "epoch": 1.7166445525578653, "grad_norm": 2.823223935327022, "learning_rate": 3.4595301075834847e-06, "loss": 1.3279, "step": 58220 }, { "epoch": 1.7167919799498748, "grad_norm": 2.7620758135953816, "learning_rate": 3.458888767680943e-06, "loss": 1.2752, "step": 58225 }, { "epoch": 1.7169394073418842, "grad_norm": 2.7899220486930245, "learning_rate": 3.45824743634468e-06, "loss": 1.2971, "step": 58230 }, { "epoch": 1.7170868347338937, "grad_norm": 2.8423820075475104, "learning_rate": 3.4576061135935717e-06, "loss": 1.3291, "step": 58235 }, { "epoch": 1.7172342621259031, "grad_norm": 2.8157640411901403, "learning_rate": 3.456964799446486e-06, "loss": 1.252, "step": 58240 }, { "epoch": 1.7173816895179126, "grad_norm": 2.9064895714420707, "learning_rate": 3.4563234939222963e-06, "loss": 1.3152, "step": 58245 }, { "epoch": 1.7175291169099218, "grad_norm": 2.7799595766821072, "learning_rate": 3.4556821970398736e-06, "loss": 1.2758, "step": 58250 }, { "epoch": 1.7176765443019313, "grad_norm": 2.8233216413093656, "learning_rate": 3.4550409088180866e-06, "loss": 1.2856, "step": 58255 }, { "epoch": 1.7178239716939407, "grad_norm": 2.7816794839457697, "learning_rate": 3.454399629275809e-06, "loss": 1.3324, "step": 58260 }, { "epoch": 1.7179713990859502, "grad_norm": 2.716260576831411, "learning_rate": 3.4537583584319095e-06, "loss": 1.2367, "step": 58265 }, { "epoch": 1.7181188264779597, "grad_norm": 2.813625264593961, "learning_rate": 3.4531170963052586e-06, "loss": 1.3105, "step": 58270 }, { "epoch": 1.718266253869969, "grad_norm": 2.9230054904666, "learning_rate": 3.4524758429147255e-06, "loss": 1.3042, "step": 58275 }, { "epoch": 1.7184136812619784, "grad_norm": 3.1462408656492897, "learning_rate": 3.4518345982791813e-06, "loss": 1.3411, "step": 58280 }, { "epoch": 1.7185611086539878, "grad_norm": 2.8212637526556064, "learning_rate": 3.451193362417494e-06, "loss": 1.2662, "step": 58285 }, { "epoch": 1.7187085360459973, "grad_norm": 2.78500217789372, "learning_rate": 3.450552135348534e-06, "loss": 1.3145, "step": 58290 }, { "epoch": 1.7188559634380067, "grad_norm": 2.6883325820502413, "learning_rate": 3.4499109170911677e-06, "loss": 1.2837, "step": 58295 }, { "epoch": 1.7190033908300162, "grad_norm": 2.955384296847862, "learning_rate": 3.449269707664267e-06, "loss": 1.3035, "step": 58300 }, { "epoch": 1.7191508182220256, "grad_norm": 2.7300231118472134, "learning_rate": 3.448628507086697e-06, "loss": 1.3095, "step": 58305 }, { "epoch": 1.719298245614035, "grad_norm": 2.900223937047954, "learning_rate": 3.447987315377329e-06, "loss": 1.3555, "step": 58310 }, { "epoch": 1.7194456730060446, "grad_norm": 2.854744537216724, "learning_rate": 3.447346132555029e-06, "loss": 1.3025, "step": 58315 }, { "epoch": 1.719593100398054, "grad_norm": 2.7949822146494876, "learning_rate": 3.4467049586386636e-06, "loss": 1.3748, "step": 58320 }, { "epoch": 1.7197405277900635, "grad_norm": 2.684007302442107, "learning_rate": 3.446063793647102e-06, "loss": 1.3068, "step": 58325 }, { "epoch": 1.719887955182073, "grad_norm": 2.834841711957621, "learning_rate": 3.445422637599209e-06, "loss": 1.2739, "step": 58330 }, { "epoch": 1.7200353825740824, "grad_norm": 2.7066477401205087, "learning_rate": 3.4447814905138542e-06, "loss": 1.2767, "step": 58335 }, { "epoch": 1.7201828099660919, "grad_norm": 2.748308708103654, "learning_rate": 3.4441403524099017e-06, "loss": 1.2388, "step": 58340 }, { "epoch": 1.720330237358101, "grad_norm": 3.049677543553836, "learning_rate": 3.4434992233062198e-06, "loss": 1.3282, "step": 58345 }, { "epoch": 1.7204776647501105, "grad_norm": 2.8438605931082277, "learning_rate": 3.442858103221671e-06, "loss": 1.3231, "step": 58350 }, { "epoch": 1.72062509214212, "grad_norm": 2.8946271869797005, "learning_rate": 3.442216992175125e-06, "loss": 1.2641, "step": 58355 }, { "epoch": 1.7207725195341295, "grad_norm": 2.7700064506163438, "learning_rate": 3.441575890185444e-06, "loss": 1.2791, "step": 58360 }, { "epoch": 1.720919946926139, "grad_norm": 2.826009337781615, "learning_rate": 3.440934797271496e-06, "loss": 1.2746, "step": 58365 }, { "epoch": 1.7210673743181482, "grad_norm": 2.7994202501176306, "learning_rate": 3.440293713452144e-06, "loss": 1.2929, "step": 58370 }, { "epoch": 1.7212148017101576, "grad_norm": 2.864184369969965, "learning_rate": 3.439652638746254e-06, "loss": 1.2912, "step": 58375 }, { "epoch": 1.721362229102167, "grad_norm": 2.8249818165552494, "learning_rate": 3.4390115731726892e-06, "loss": 1.3233, "step": 58380 }, { "epoch": 1.7215096564941765, "grad_norm": 2.840288362586, "learning_rate": 3.438370516750312e-06, "loss": 1.2347, "step": 58385 }, { "epoch": 1.721657083886186, "grad_norm": 2.850977836048926, "learning_rate": 3.4377294694979904e-06, "loss": 1.2922, "step": 58390 }, { "epoch": 1.7218045112781954, "grad_norm": 2.882807835222795, "learning_rate": 3.437088431434585e-06, "loss": 1.2893, "step": 58395 }, { "epoch": 1.721951938670205, "grad_norm": 2.888926585220734, "learning_rate": 3.43644740257896e-06, "loss": 1.3054, "step": 58400 }, { "epoch": 1.7220993660622144, "grad_norm": 2.920365492556753, "learning_rate": 3.435806382949977e-06, "loss": 1.2988, "step": 58405 }, { "epoch": 1.7222467934542238, "grad_norm": 2.7277613233415003, "learning_rate": 3.435165372566502e-06, "loss": 1.2528, "step": 58410 }, { "epoch": 1.7223942208462333, "grad_norm": 3.0148292105602357, "learning_rate": 3.434524371447394e-06, "loss": 1.3327, "step": 58415 }, { "epoch": 1.7225416482382427, "grad_norm": 2.7331111967918504, "learning_rate": 3.4338833796115174e-06, "loss": 1.2776, "step": 58420 }, { "epoch": 1.7226890756302522, "grad_norm": 2.8563509852414515, "learning_rate": 3.433242397077732e-06, "loss": 1.2776, "step": 58425 }, { "epoch": 1.7228365030222617, "grad_norm": 2.862290296564033, "learning_rate": 3.432601423864902e-06, "loss": 1.2652, "step": 58430 }, { "epoch": 1.7229839304142711, "grad_norm": 2.8588967125811546, "learning_rate": 3.4319604599918863e-06, "loss": 1.3136, "step": 58435 }, { "epoch": 1.7231313578062806, "grad_norm": 2.854360971386282, "learning_rate": 3.4313195054775487e-06, "loss": 1.315, "step": 58440 }, { "epoch": 1.7232787851982898, "grad_norm": 2.966202855004208, "learning_rate": 3.430678560340749e-06, "loss": 1.3295, "step": 58445 }, { "epoch": 1.7234262125902993, "grad_norm": 2.805931709136045, "learning_rate": 3.4300376246003455e-06, "loss": 1.2706, "step": 58450 }, { "epoch": 1.7235736399823087, "grad_norm": 2.7097511176731306, "learning_rate": 3.4293966982752017e-06, "loss": 1.2604, "step": 58455 }, { "epoch": 1.7237210673743182, "grad_norm": 2.887942157796892, "learning_rate": 3.4287557813841744e-06, "loss": 1.3021, "step": 58460 }, { "epoch": 1.7238684947663274, "grad_norm": 2.8037023886130448, "learning_rate": 3.428114873946127e-06, "loss": 1.2692, "step": 58465 }, { "epoch": 1.7240159221583369, "grad_norm": 2.8448415774453037, "learning_rate": 3.4274739759799154e-06, "loss": 1.3071, "step": 58470 }, { "epoch": 1.7241633495503463, "grad_norm": 2.7399817789489127, "learning_rate": 3.426833087504402e-06, "loss": 1.3001, "step": 58475 }, { "epoch": 1.7243107769423558, "grad_norm": 2.888948431810226, "learning_rate": 3.426192208538442e-06, "loss": 1.2851, "step": 58480 }, { "epoch": 1.7244582043343653, "grad_norm": 2.8726697642944856, "learning_rate": 3.4255513391008978e-06, "loss": 1.2807, "step": 58485 }, { "epoch": 1.7246056317263747, "grad_norm": 2.909758918022535, "learning_rate": 3.424910479210624e-06, "loss": 1.2914, "step": 58490 }, { "epoch": 1.7247530591183842, "grad_norm": 2.8900705715097845, "learning_rate": 3.424269628886482e-06, "loss": 1.3093, "step": 58495 }, { "epoch": 1.7249004865103936, "grad_norm": 2.746429797382395, "learning_rate": 3.423628788147328e-06, "loss": 1.2908, "step": 58500 }, { "epoch": 1.7249004865103936, "eval_loss": 1.078961730003357, "eval_runtime": 4.1917, "eval_samples_per_second": 94.473, "eval_steps_per_second": 3.101, "step": 58500 }, { "epoch": 1.725047913902403, "grad_norm": 2.8089205221737834, "learning_rate": 3.42298795701202e-06, "loss": 1.2443, "step": 58505 }, { "epoch": 1.7251953412944125, "grad_norm": 2.903722593387863, "learning_rate": 3.422347135499415e-06, "loss": 1.3335, "step": 58510 }, { "epoch": 1.725342768686422, "grad_norm": 2.863370045054774, "learning_rate": 3.4217063236283675e-06, "loss": 1.3214, "step": 58515 }, { "epoch": 1.7254901960784315, "grad_norm": 2.7800951050782405, "learning_rate": 3.4210655214177388e-06, "loss": 1.3074, "step": 58520 }, { "epoch": 1.725637623470441, "grad_norm": 2.886149401596964, "learning_rate": 3.4204247288863813e-06, "loss": 1.3219, "step": 58525 }, { "epoch": 1.7257850508624504, "grad_norm": 2.8665862492574985, "learning_rate": 3.4197839460531533e-06, "loss": 1.3407, "step": 58530 }, { "epoch": 1.7259324782544598, "grad_norm": 2.7729555080337596, "learning_rate": 3.4191431729369086e-06, "loss": 1.3247, "step": 58535 }, { "epoch": 1.726079905646469, "grad_norm": 2.638733445252503, "learning_rate": 3.418502409556505e-06, "loss": 1.2802, "step": 58540 }, { "epoch": 1.7262273330384785, "grad_norm": 2.8354059779842236, "learning_rate": 3.417861655930796e-06, "loss": 1.34, "step": 58545 }, { "epoch": 1.726374760430488, "grad_norm": 2.9214628711074293, "learning_rate": 3.417220912078638e-06, "loss": 1.2885, "step": 58550 }, { "epoch": 1.7265221878224974, "grad_norm": 3.046642494712558, "learning_rate": 3.4165801780188824e-06, "loss": 1.2848, "step": 58555 }, { "epoch": 1.726669615214507, "grad_norm": 2.797936130308164, "learning_rate": 3.4159394537703875e-06, "loss": 1.2983, "step": 58560 }, { "epoch": 1.7268170426065161, "grad_norm": 2.863771627534315, "learning_rate": 3.4152987393520044e-06, "loss": 1.2987, "step": 58565 }, { "epoch": 1.7269644699985256, "grad_norm": 2.7189010115892325, "learning_rate": 3.4146580347825895e-06, "loss": 1.2762, "step": 58570 }, { "epoch": 1.727111897390535, "grad_norm": 2.899873096471726, "learning_rate": 3.414017340080995e-06, "loss": 1.3382, "step": 58575 }, { "epoch": 1.7272593247825445, "grad_norm": 2.8521640673774207, "learning_rate": 3.4133766552660724e-06, "loss": 1.2878, "step": 58580 }, { "epoch": 1.727406752174554, "grad_norm": 2.671493889369959, "learning_rate": 3.412735980356678e-06, "loss": 1.2711, "step": 58585 }, { "epoch": 1.7275541795665634, "grad_norm": 2.924760726031164, "learning_rate": 3.4120953153716602e-06, "loss": 1.2959, "step": 58590 }, { "epoch": 1.7277016069585729, "grad_norm": 2.809057220253427, "learning_rate": 3.411454660329876e-06, "loss": 1.3066, "step": 58595 }, { "epoch": 1.7278490343505823, "grad_norm": 2.735439391836995, "learning_rate": 3.410814015250173e-06, "loss": 1.256, "step": 58600 }, { "epoch": 1.7279964617425918, "grad_norm": 2.8791296711890006, "learning_rate": 3.4101733801514063e-06, "loss": 1.3125, "step": 58605 }, { "epoch": 1.7281438891346013, "grad_norm": 2.790567459322821, "learning_rate": 3.409532755052424e-06, "loss": 1.2811, "step": 58610 }, { "epoch": 1.7282913165266107, "grad_norm": 2.7704317876328077, "learning_rate": 3.4088921399720813e-06, "loss": 1.2853, "step": 58615 }, { "epoch": 1.7284387439186202, "grad_norm": 2.7945223336353466, "learning_rate": 3.4082515349292254e-06, "loss": 1.2408, "step": 58620 }, { "epoch": 1.7285861713106296, "grad_norm": 2.7611354685048823, "learning_rate": 3.4076109399427093e-06, "loss": 1.2796, "step": 58625 }, { "epoch": 1.728733598702639, "grad_norm": 2.884253329902995, "learning_rate": 3.406970355031382e-06, "loss": 1.3286, "step": 58630 }, { "epoch": 1.7288810260946483, "grad_norm": 2.785479052717298, "learning_rate": 3.4063297802140943e-06, "loss": 1.3106, "step": 58635 }, { "epoch": 1.7290284534866578, "grad_norm": 2.780060890711108, "learning_rate": 3.405689215509695e-06, "loss": 1.2522, "step": 58640 }, { "epoch": 1.7291758808786672, "grad_norm": 2.792168478708427, "learning_rate": 3.4050486609370327e-06, "loss": 1.3232, "step": 58645 }, { "epoch": 1.7293233082706767, "grad_norm": 2.785438428353153, "learning_rate": 3.4044081165149585e-06, "loss": 1.2607, "step": 58650 }, { "epoch": 1.7294707356626862, "grad_norm": 2.881524480558052, "learning_rate": 3.403767582262319e-06, "loss": 1.3106, "step": 58655 }, { "epoch": 1.7296181630546954, "grad_norm": 2.7999786578286328, "learning_rate": 3.4031270581979653e-06, "loss": 1.2858, "step": 58660 }, { "epoch": 1.7297655904467049, "grad_norm": 3.023933334531832, "learning_rate": 3.402486544340742e-06, "loss": 1.2734, "step": 58665 }, { "epoch": 1.7299130178387143, "grad_norm": 2.738245702988845, "learning_rate": 3.4018460407095005e-06, "loss": 1.33, "step": 58670 }, { "epoch": 1.7300604452307238, "grad_norm": 2.8188188312905686, "learning_rate": 3.401205547323087e-06, "loss": 1.3408, "step": 58675 }, { "epoch": 1.7302078726227332, "grad_norm": 2.750078915074914, "learning_rate": 3.400565064200348e-06, "loss": 1.2671, "step": 58680 }, { "epoch": 1.7303553000147427, "grad_norm": 2.909319226605712, "learning_rate": 3.3999245913601294e-06, "loss": 1.3016, "step": 58685 }, { "epoch": 1.7305027274067522, "grad_norm": 2.7999838102471135, "learning_rate": 3.399284128821282e-06, "loss": 1.2804, "step": 58690 }, { "epoch": 1.7306501547987616, "grad_norm": 2.7233052297209666, "learning_rate": 3.3986436766026474e-06, "loss": 1.3016, "step": 58695 }, { "epoch": 1.730797582190771, "grad_norm": 2.8073363953306747, "learning_rate": 3.3980032347230756e-06, "loss": 1.3122, "step": 58700 }, { "epoch": 1.7309450095827805, "grad_norm": 2.7485708361069343, "learning_rate": 3.39736280320141e-06, "loss": 1.2488, "step": 58705 }, { "epoch": 1.73109243697479, "grad_norm": 2.733095669235034, "learning_rate": 3.3967223820564968e-06, "loss": 1.316, "step": 58710 }, { "epoch": 1.7312398643667994, "grad_norm": 2.8348605811902607, "learning_rate": 3.396081971307181e-06, "loss": 1.3381, "step": 58715 }, { "epoch": 1.731387291758809, "grad_norm": 2.7805457439839563, "learning_rate": 3.395441570972306e-06, "loss": 1.283, "step": 58720 }, { "epoch": 1.7315347191508184, "grad_norm": 2.72599606496752, "learning_rate": 3.3948011810707192e-06, "loss": 1.2721, "step": 58725 }, { "epoch": 1.7316821465428278, "grad_norm": 2.730046264663698, "learning_rate": 3.3941608016212623e-06, "loss": 1.2598, "step": 58730 }, { "epoch": 1.731829573934837, "grad_norm": 2.8553603106417706, "learning_rate": 3.3935204326427813e-06, "loss": 1.3363, "step": 58735 }, { "epoch": 1.7319770013268465, "grad_norm": 2.8365922007445095, "learning_rate": 3.392880074154117e-06, "loss": 1.2347, "step": 58740 }, { "epoch": 1.732124428718856, "grad_norm": 2.9610086627275813, "learning_rate": 3.3922397261741155e-06, "loss": 1.3193, "step": 58745 }, { "epoch": 1.7322718561108654, "grad_norm": 2.867900423637253, "learning_rate": 3.3915993887216168e-06, "loss": 1.2628, "step": 58750 }, { "epoch": 1.7324192835028749, "grad_norm": 2.829478000350267, "learning_rate": 3.3909590618154673e-06, "loss": 1.2914, "step": 58755 }, { "epoch": 1.7325667108948841, "grad_norm": 2.944242396771092, "learning_rate": 3.3903187454745067e-06, "loss": 1.3285, "step": 58760 }, { "epoch": 1.7327141382868936, "grad_norm": 2.9307210403161856, "learning_rate": 3.3896784397175785e-06, "loss": 1.2816, "step": 58765 }, { "epoch": 1.732861565678903, "grad_norm": 2.8297939997687513, "learning_rate": 3.3890381445635234e-06, "loss": 1.3465, "step": 58770 }, { "epoch": 1.7330089930709125, "grad_norm": 2.910559751130217, "learning_rate": 3.3883978600311817e-06, "loss": 1.329, "step": 58775 }, { "epoch": 1.733156420462922, "grad_norm": 2.739973100012431, "learning_rate": 3.3877575861393976e-06, "loss": 1.2186, "step": 58780 }, { "epoch": 1.7333038478549314, "grad_norm": 2.731024623605062, "learning_rate": 3.3871173229070093e-06, "loss": 1.287, "step": 58785 }, { "epoch": 1.7334512752469409, "grad_norm": 2.768003149822127, "learning_rate": 3.386477070352859e-06, "loss": 1.2646, "step": 58790 }, { "epoch": 1.7335987026389503, "grad_norm": 2.8840516571672574, "learning_rate": 3.3858368284957837e-06, "loss": 1.2967, "step": 58795 }, { "epoch": 1.7337461300309598, "grad_norm": 2.8848784757021915, "learning_rate": 3.3851965973546278e-06, "loss": 1.2955, "step": 58800 }, { "epoch": 1.7338935574229692, "grad_norm": 2.9511406819399033, "learning_rate": 3.384556376948228e-06, "loss": 1.2978, "step": 58805 }, { "epoch": 1.7340409848149787, "grad_norm": 2.79340377255906, "learning_rate": 3.383916167295424e-06, "loss": 1.2606, "step": 58810 }, { "epoch": 1.7341884122069882, "grad_norm": 2.924363698078162, "learning_rate": 3.3832759684150537e-06, "loss": 1.3375, "step": 58815 }, { "epoch": 1.7343358395989976, "grad_norm": 2.83889691425537, "learning_rate": 3.382635780325959e-06, "loss": 1.2984, "step": 58820 }, { "epoch": 1.734483266991007, "grad_norm": 2.918161435561538, "learning_rate": 3.3819956030469733e-06, "loss": 1.3153, "step": 58825 }, { "epoch": 1.7346306943830163, "grad_norm": 2.8144469791072777, "learning_rate": 3.3813554365969393e-06, "loss": 1.3133, "step": 58830 }, { "epoch": 1.7347781217750258, "grad_norm": 2.8674966886164692, "learning_rate": 3.3807152809946924e-06, "loss": 1.3321, "step": 58835 }, { "epoch": 1.7349255491670352, "grad_norm": 2.8254451083530743, "learning_rate": 3.3800751362590697e-06, "loss": 1.3366, "step": 58840 }, { "epoch": 1.7350729765590447, "grad_norm": 2.766055221914109, "learning_rate": 3.3794350024089085e-06, "loss": 1.303, "step": 58845 }, { "epoch": 1.7352204039510541, "grad_norm": 2.6778975918533283, "learning_rate": 3.378794879463045e-06, "loss": 1.318, "step": 58850 }, { "epoch": 1.7353678313430634, "grad_norm": 2.8691352229424587, "learning_rate": 3.3781547674403166e-06, "loss": 1.3162, "step": 58855 }, { "epoch": 1.7355152587350728, "grad_norm": 2.90166518747077, "learning_rate": 3.377514666359558e-06, "loss": 1.3276, "step": 58860 }, { "epoch": 1.7356626861270823, "grad_norm": 2.7575519747449144, "learning_rate": 3.3768745762396073e-06, "loss": 1.288, "step": 58865 }, { "epoch": 1.7358101135190918, "grad_norm": 2.7979658554566385, "learning_rate": 3.3762344970992965e-06, "loss": 1.3054, "step": 58870 }, { "epoch": 1.7359575409111012, "grad_norm": 2.6908751673413307, "learning_rate": 3.3755944289574636e-06, "loss": 1.2623, "step": 58875 }, { "epoch": 1.7361049683031107, "grad_norm": 2.763123962947397, "learning_rate": 3.3749543718329405e-06, "loss": 1.3468, "step": 58880 }, { "epoch": 1.7362523956951201, "grad_norm": 2.8040746722242087, "learning_rate": 3.374314325744565e-06, "loss": 1.3103, "step": 58885 }, { "epoch": 1.7363998230871296, "grad_norm": 2.6319631760067925, "learning_rate": 3.373674290711169e-06, "loss": 1.2806, "step": 58890 }, { "epoch": 1.736547250479139, "grad_norm": 2.7731277442929554, "learning_rate": 3.373034266751587e-06, "loss": 1.3354, "step": 58895 }, { "epoch": 1.7366946778711485, "grad_norm": 2.8683871249673465, "learning_rate": 3.3723942538846525e-06, "loss": 1.2379, "step": 58900 }, { "epoch": 1.736842105263158, "grad_norm": 2.8670613740029007, "learning_rate": 3.3717542521291964e-06, "loss": 1.302, "step": 58905 }, { "epoch": 1.7369895326551674, "grad_norm": 2.9051664443628065, "learning_rate": 3.371114261504055e-06, "loss": 1.3397, "step": 58910 }, { "epoch": 1.7371369600471769, "grad_norm": 2.893518362125944, "learning_rate": 3.3704742820280587e-06, "loss": 1.2988, "step": 58915 }, { "epoch": 1.7372843874391863, "grad_norm": 2.785414419257914, "learning_rate": 3.36983431372004e-06, "loss": 1.2828, "step": 58920 }, { "epoch": 1.7374318148311958, "grad_norm": 2.8791399556258934, "learning_rate": 3.36919435659883e-06, "loss": 1.3346, "step": 58925 }, { "epoch": 1.737579242223205, "grad_norm": 2.877711882461012, "learning_rate": 3.368554410683262e-06, "loss": 1.3219, "step": 58930 }, { "epoch": 1.7377266696152145, "grad_norm": 2.8813955326905516, "learning_rate": 3.3679144759921653e-06, "loss": 1.3234, "step": 58935 }, { "epoch": 1.737874097007224, "grad_norm": 2.9120797378002132, "learning_rate": 3.3672745525443723e-06, "loss": 1.3693, "step": 58940 }, { "epoch": 1.7380215243992334, "grad_norm": 3.4042772246792103, "learning_rate": 3.3666346403587112e-06, "loss": 1.2999, "step": 58945 }, { "epoch": 1.7381689517912429, "grad_norm": 2.8252735211680973, "learning_rate": 3.365994739454015e-06, "loss": 1.3358, "step": 58950 }, { "epoch": 1.738316379183252, "grad_norm": 2.879094124711309, "learning_rate": 3.3653548498491103e-06, "loss": 1.3495, "step": 58955 }, { "epoch": 1.7384638065752616, "grad_norm": 2.924494850698705, "learning_rate": 3.36471497156283e-06, "loss": 1.2971, "step": 58960 }, { "epoch": 1.738611233967271, "grad_norm": 2.9171322529760366, "learning_rate": 3.364075104614002e-06, "loss": 1.2736, "step": 58965 }, { "epoch": 1.7387586613592805, "grad_norm": 2.8205920450221544, "learning_rate": 3.3634352490214534e-06, "loss": 1.306, "step": 58970 }, { "epoch": 1.73890608875129, "grad_norm": 2.761838336362567, "learning_rate": 3.362795404804015e-06, "loss": 1.3243, "step": 58975 }, { "epoch": 1.7390535161432994, "grad_norm": 2.9165054305815215, "learning_rate": 3.3621555719805127e-06, "loss": 1.3122, "step": 58980 }, { "epoch": 1.7392009435353089, "grad_norm": 2.9029454301650697, "learning_rate": 3.3615157505697767e-06, "loss": 1.2442, "step": 58985 }, { "epoch": 1.7393483709273183, "grad_norm": 2.735092555914454, "learning_rate": 3.3608759405906325e-06, "loss": 1.3205, "step": 58990 }, { "epoch": 1.7394957983193278, "grad_norm": 2.857839030193117, "learning_rate": 3.3602361420619093e-06, "loss": 1.2583, "step": 58995 }, { "epoch": 1.7396432257113372, "grad_norm": 2.7899376253368806, "learning_rate": 3.359596355002431e-06, "loss": 1.3049, "step": 59000 }, { "epoch": 1.7396432257113372, "eval_loss": 1.0784550905227661, "eval_runtime": 4.2624, "eval_samples_per_second": 92.906, "eval_steps_per_second": 3.05, "step": 59000 }, { "epoch": 1.7397906531033467, "grad_norm": 2.783027287482679, "learning_rate": 3.358956579431027e-06, "loss": 1.3512, "step": 59005 }, { "epoch": 1.7399380804953561, "grad_norm": 2.8101450676398905, "learning_rate": 3.3583168153665207e-06, "loss": 1.2534, "step": 59010 }, { "epoch": 1.7400855078873656, "grad_norm": 2.900192316404377, "learning_rate": 3.3576770628277414e-06, "loss": 1.3281, "step": 59015 }, { "epoch": 1.740232935279375, "grad_norm": 2.6794908936409176, "learning_rate": 3.3570373218335113e-06, "loss": 1.3113, "step": 59020 }, { "epoch": 1.7403803626713843, "grad_norm": 2.8070700207085033, "learning_rate": 3.3563975924026578e-06, "loss": 1.3126, "step": 59025 }, { "epoch": 1.7405277900633938, "grad_norm": 2.7578951867496104, "learning_rate": 3.355757874554004e-06, "loss": 1.3355, "step": 59030 }, { "epoch": 1.7406752174554032, "grad_norm": 2.738085706863714, "learning_rate": 3.3551181683063736e-06, "loss": 1.2601, "step": 59035 }, { "epoch": 1.7408226448474127, "grad_norm": 2.8243397975745284, "learning_rate": 3.354478473678594e-06, "loss": 1.2789, "step": 59040 }, { "epoch": 1.7409700722394221, "grad_norm": 2.7077086160292216, "learning_rate": 3.353838790689486e-06, "loss": 1.3152, "step": 59045 }, { "epoch": 1.7411174996314314, "grad_norm": 2.832847948136436, "learning_rate": 3.353199119357874e-06, "loss": 1.2644, "step": 59050 }, { "epoch": 1.7412649270234408, "grad_norm": 2.805808591759521, "learning_rate": 3.3525594597025807e-06, "loss": 1.2447, "step": 59055 }, { "epoch": 1.7414123544154503, "grad_norm": 2.8163618867080733, "learning_rate": 3.35191981174243e-06, "loss": 1.2998, "step": 59060 }, { "epoch": 1.7415597818074597, "grad_norm": 2.6965729613182434, "learning_rate": 3.3512801754962425e-06, "loss": 1.2907, "step": 59065 }, { "epoch": 1.7417072091994692, "grad_norm": 2.854514451649652, "learning_rate": 3.350640550982842e-06, "loss": 1.295, "step": 59070 }, { "epoch": 1.7418546365914787, "grad_norm": 2.9271066485870647, "learning_rate": 3.350000938221048e-06, "loss": 1.2937, "step": 59075 }, { "epoch": 1.7420020639834881, "grad_norm": 2.8639356081752876, "learning_rate": 3.3493613372296847e-06, "loss": 1.353, "step": 59080 }, { "epoch": 1.7421494913754976, "grad_norm": 2.781372361066597, "learning_rate": 3.3487217480275696e-06, "loss": 1.2797, "step": 59085 }, { "epoch": 1.742296918767507, "grad_norm": 2.971704300965789, "learning_rate": 3.348082170633527e-06, "loss": 1.3233, "step": 59090 }, { "epoch": 1.7424443461595165, "grad_norm": 2.892534033300894, "learning_rate": 3.3474426050663746e-06, "loss": 1.3024, "step": 59095 }, { "epoch": 1.742591773551526, "grad_norm": 2.7647004551499705, "learning_rate": 3.346803051344934e-06, "loss": 1.2817, "step": 59100 }, { "epoch": 1.7427392009435354, "grad_norm": 2.7730969539032957, "learning_rate": 3.3461635094880243e-06, "loss": 1.3029, "step": 59105 }, { "epoch": 1.7428866283355449, "grad_norm": 2.8646396136343526, "learning_rate": 3.3455239795144627e-06, "loss": 1.2737, "step": 59110 }, { "epoch": 1.7430340557275543, "grad_norm": 2.7118328003009298, "learning_rate": 3.3448844614430717e-06, "loss": 1.2851, "step": 59115 }, { "epoch": 1.7431814831195638, "grad_norm": 2.7308346486929564, "learning_rate": 3.3442449552926666e-06, "loss": 1.2856, "step": 59120 }, { "epoch": 1.743328910511573, "grad_norm": 3.036638104722776, "learning_rate": 3.3436054610820685e-06, "loss": 1.3725, "step": 59125 }, { "epoch": 1.7434763379035825, "grad_norm": 2.824377362473933, "learning_rate": 3.3429659788300915e-06, "loss": 1.3445, "step": 59130 }, { "epoch": 1.743623765295592, "grad_norm": 2.825091554454169, "learning_rate": 3.3423265085555577e-06, "loss": 1.2996, "step": 59135 }, { "epoch": 1.7437711926876014, "grad_norm": 2.8457958070076774, "learning_rate": 3.3416870502772796e-06, "loss": 1.267, "step": 59140 }, { "epoch": 1.7439186200796106, "grad_norm": 2.7511303711775215, "learning_rate": 3.3410476040140783e-06, "loss": 1.2799, "step": 59145 }, { "epoch": 1.74406604747162, "grad_norm": 2.8520102625184145, "learning_rate": 3.3404081697847673e-06, "loss": 1.3514, "step": 59150 }, { "epoch": 1.7442134748636295, "grad_norm": 2.8619003270971155, "learning_rate": 3.3397687476081646e-06, "loss": 1.3277, "step": 59155 }, { "epoch": 1.744360902255639, "grad_norm": 2.7770430753881308, "learning_rate": 3.3391293375030833e-06, "loss": 1.2463, "step": 59160 }, { "epoch": 1.7445083296476485, "grad_norm": 2.8095074139543637, "learning_rate": 3.3384899394883415e-06, "loss": 1.326, "step": 59165 }, { "epoch": 1.744655757039658, "grad_norm": 2.8459099318503083, "learning_rate": 3.337850553582754e-06, "loss": 1.3149, "step": 59170 }, { "epoch": 1.7448031844316674, "grad_norm": 2.812256690361144, "learning_rate": 3.337211179805133e-06, "loss": 1.2716, "step": 59175 }, { "epoch": 1.7449506118236768, "grad_norm": 2.731262884661028, "learning_rate": 3.3365718181742957e-06, "loss": 1.2907, "step": 59180 }, { "epoch": 1.7450980392156863, "grad_norm": 2.9574399355608807, "learning_rate": 3.335932468709053e-06, "loss": 1.2869, "step": 59185 }, { "epoch": 1.7452454666076958, "grad_norm": 2.8532845652340937, "learning_rate": 3.3352931314282216e-06, "loss": 1.2684, "step": 59190 }, { "epoch": 1.7453928939997052, "grad_norm": 2.848609204430684, "learning_rate": 3.3346538063506127e-06, "loss": 1.3971, "step": 59195 }, { "epoch": 1.7455403213917147, "grad_norm": 2.9578917678244006, "learning_rate": 3.334014493495041e-06, "loss": 1.3865, "step": 59200 }, { "epoch": 1.7456877487837241, "grad_norm": 2.7797661278277497, "learning_rate": 3.3333751928803158e-06, "loss": 1.2991, "step": 59205 }, { "epoch": 1.7458351761757336, "grad_norm": 3.009191464565064, "learning_rate": 3.332735904525253e-06, "loss": 1.3205, "step": 59210 }, { "epoch": 1.745982603567743, "grad_norm": 2.7449459385654746, "learning_rate": 3.332096628448661e-06, "loss": 1.2666, "step": 59215 }, { "epoch": 1.7461300309597523, "grad_norm": 2.8206030891877574, "learning_rate": 3.3314573646693544e-06, "loss": 1.3282, "step": 59220 }, { "epoch": 1.7462774583517617, "grad_norm": 2.9113753691699964, "learning_rate": 3.330818113206142e-06, "loss": 1.317, "step": 59225 }, { "epoch": 1.7464248857437712, "grad_norm": 2.721982414888675, "learning_rate": 3.330178874077836e-06, "loss": 1.2644, "step": 59230 }, { "epoch": 1.7465723131357807, "grad_norm": 2.88859893011472, "learning_rate": 3.3295396473032467e-06, "loss": 1.321, "step": 59235 }, { "epoch": 1.74671974052779, "grad_norm": 2.921932879779936, "learning_rate": 3.328900432901181e-06, "loss": 1.339, "step": 59240 }, { "epoch": 1.7468671679197993, "grad_norm": 2.844635798463832, "learning_rate": 3.328261230890453e-06, "loss": 1.27, "step": 59245 }, { "epoch": 1.7470145953118088, "grad_norm": 2.6667466299176557, "learning_rate": 3.3276220412898686e-06, "loss": 1.2959, "step": 59250 }, { "epoch": 1.7471620227038183, "grad_norm": 2.8837303419434637, "learning_rate": 3.326982864118239e-06, "loss": 1.2792, "step": 59255 }, { "epoch": 1.7473094500958277, "grad_norm": 2.8443668566109377, "learning_rate": 3.32634369939437e-06, "loss": 1.3562, "step": 59260 }, { "epoch": 1.7474568774878372, "grad_norm": 2.7227631701334247, "learning_rate": 3.3257045471370735e-06, "loss": 1.3243, "step": 59265 }, { "epoch": 1.7476043048798466, "grad_norm": 2.8000193081537312, "learning_rate": 3.3250654073651526e-06, "loss": 1.2748, "step": 59270 }, { "epoch": 1.747751732271856, "grad_norm": 2.6950647622681867, "learning_rate": 3.3244262800974197e-06, "loss": 1.3234, "step": 59275 }, { "epoch": 1.7478991596638656, "grad_norm": 2.9185054446022005, "learning_rate": 3.3237871653526784e-06, "loss": 1.2765, "step": 59280 }, { "epoch": 1.748046587055875, "grad_norm": 2.7961841604545095, "learning_rate": 3.323148063149737e-06, "loss": 1.3316, "step": 59285 }, { "epoch": 1.7481940144478845, "grad_norm": 2.6457689750564364, "learning_rate": 3.3225089735074e-06, "loss": 1.3007, "step": 59290 }, { "epoch": 1.748341441839894, "grad_norm": 2.8364027422557934, "learning_rate": 3.3218698964444767e-06, "loss": 1.2772, "step": 59295 }, { "epoch": 1.7484888692319034, "grad_norm": 2.6729768542005576, "learning_rate": 3.3212308319797696e-06, "loss": 1.3552, "step": 59300 }, { "epoch": 1.7486362966239128, "grad_norm": 4.318577611430343, "learning_rate": 3.3205917801320845e-06, "loss": 1.3076, "step": 59305 }, { "epoch": 1.7487837240159223, "grad_norm": 2.8910744329795577, "learning_rate": 3.3199527409202277e-06, "loss": 1.3155, "step": 59310 }, { "epoch": 1.7489311514079315, "grad_norm": 2.960164955936572, "learning_rate": 3.3193137143630003e-06, "loss": 1.2793, "step": 59315 }, { "epoch": 1.749078578799941, "grad_norm": 2.764970302690837, "learning_rate": 3.318674700479211e-06, "loss": 1.2913, "step": 59320 }, { "epoch": 1.7492260061919505, "grad_norm": 2.7070294320302124, "learning_rate": 3.3180356992876594e-06, "loss": 1.2851, "step": 59325 }, { "epoch": 1.74937343358396, "grad_norm": 2.894808719562947, "learning_rate": 3.3173967108071524e-06, "loss": 1.3344, "step": 59330 }, { "epoch": 1.7495208609759694, "grad_norm": 2.704396651112799, "learning_rate": 3.316757735056489e-06, "loss": 1.2781, "step": 59335 }, { "epoch": 1.7496682883679786, "grad_norm": 2.9227792871906453, "learning_rate": 3.3161187720544755e-06, "loss": 1.2897, "step": 59340 }, { "epoch": 1.749815715759988, "grad_norm": 2.7676769714345903, "learning_rate": 3.315479821819911e-06, "loss": 1.2917, "step": 59345 }, { "epoch": 1.7499631431519975, "grad_norm": 2.7816755959216284, "learning_rate": 3.3148408843716006e-06, "loss": 1.2726, "step": 59350 }, { "epoch": 1.750110570544007, "grad_norm": 2.6945256509241537, "learning_rate": 3.3142019597283437e-06, "loss": 1.2541, "step": 59355 }, { "epoch": 1.7502579979360164, "grad_norm": 2.9067462741770056, "learning_rate": 3.313563047908942e-06, "loss": 1.2817, "step": 59360 }, { "epoch": 1.750405425328026, "grad_norm": 2.82800629249333, "learning_rate": 3.312924148932196e-06, "loss": 1.3259, "step": 59365 }, { "epoch": 1.7505528527200354, "grad_norm": 2.8055597886293886, "learning_rate": 3.312285262816905e-06, "loss": 1.3112, "step": 59370 }, { "epoch": 1.7507002801120448, "grad_norm": 2.7136177521819724, "learning_rate": 3.3116463895818708e-06, "loss": 1.3117, "step": 59375 }, { "epoch": 1.7508477075040543, "grad_norm": 3.0410530415503296, "learning_rate": 3.311007529245892e-06, "loss": 1.371, "step": 59380 }, { "epoch": 1.7509951348960637, "grad_norm": 2.7727458353049537, "learning_rate": 3.3103686818277683e-06, "loss": 1.241, "step": 59385 }, { "epoch": 1.7511425622880732, "grad_norm": 2.747696821616918, "learning_rate": 3.3097298473462965e-06, "loss": 1.2811, "step": 59390 }, { "epoch": 1.7512899896800826, "grad_norm": 2.845888605890471, "learning_rate": 3.3090910258202783e-06, "loss": 1.2794, "step": 59395 }, { "epoch": 1.751437417072092, "grad_norm": 2.9468334573179424, "learning_rate": 3.308452217268509e-06, "loss": 1.2923, "step": 59400 }, { "epoch": 1.7515848444641016, "grad_norm": 2.7176370781622325, "learning_rate": 3.3078134217097884e-06, "loss": 1.2434, "step": 59405 }, { "epoch": 1.751732271856111, "grad_norm": 2.8887630732001086, "learning_rate": 3.3071746391629123e-06, "loss": 1.2588, "step": 59410 }, { "epoch": 1.7518796992481203, "grad_norm": 2.75624407754728, "learning_rate": 3.3065358696466783e-06, "loss": 1.3091, "step": 59415 }, { "epoch": 1.7520271266401297, "grad_norm": 2.8384901913584843, "learning_rate": 3.3058971131798823e-06, "loss": 1.3138, "step": 59420 }, { "epoch": 1.7521745540321392, "grad_norm": 2.7275952272821335, "learning_rate": 3.305258369781322e-06, "loss": 1.3097, "step": 59425 }, { "epoch": 1.7523219814241486, "grad_norm": 2.758507023402655, "learning_rate": 3.3046196394697914e-06, "loss": 1.3237, "step": 59430 }, { "epoch": 1.752469408816158, "grad_norm": 2.8616599853062463, "learning_rate": 3.3039809222640863e-06, "loss": 1.2649, "step": 59435 }, { "epoch": 1.7526168362081673, "grad_norm": 2.822651920869141, "learning_rate": 3.3033422181830026e-06, "loss": 1.2794, "step": 59440 }, { "epoch": 1.7527642636001768, "grad_norm": 2.835940048169921, "learning_rate": 3.302703527245332e-06, "loss": 1.3092, "step": 59445 }, { "epoch": 1.7529116909921862, "grad_norm": 2.754388986566085, "learning_rate": 3.3020648494698733e-06, "loss": 1.2738, "step": 59450 }, { "epoch": 1.7530591183841957, "grad_norm": 2.7894212295944905, "learning_rate": 3.3014261848754166e-06, "loss": 1.2835, "step": 59455 }, { "epoch": 1.7532065457762052, "grad_norm": 2.802844660862277, "learning_rate": 3.300787533480757e-06, "loss": 1.2622, "step": 59460 }, { "epoch": 1.7533539731682146, "grad_norm": 2.710173526198384, "learning_rate": 3.300148895304686e-06, "loss": 1.2503, "step": 59465 }, { "epoch": 1.753501400560224, "grad_norm": 2.8237418861471144, "learning_rate": 3.2995102703659997e-06, "loss": 1.2624, "step": 59470 }, { "epoch": 1.7536488279522335, "grad_norm": 2.7497834984216505, "learning_rate": 3.2988716586834855e-06, "loss": 1.3163, "step": 59475 }, { "epoch": 1.753796255344243, "grad_norm": 2.8249088890069793, "learning_rate": 3.29823306027594e-06, "loss": 1.2645, "step": 59480 }, { "epoch": 1.7539436827362525, "grad_norm": 2.8628971221264727, "learning_rate": 3.2975944751621517e-06, "loss": 1.2723, "step": 59485 }, { "epoch": 1.754091110128262, "grad_norm": 2.84419753678633, "learning_rate": 3.2969559033609137e-06, "loss": 1.3255, "step": 59490 }, { "epoch": 1.7542385375202714, "grad_norm": 2.7619344251933797, "learning_rate": 3.2963173448910146e-06, "loss": 1.3013, "step": 59495 }, { "epoch": 1.7543859649122808, "grad_norm": 2.7507441166543005, "learning_rate": 3.295678799771245e-06, "loss": 1.2632, "step": 59500 }, { "epoch": 1.7543859649122808, "eval_loss": 1.0771989822387695, "eval_runtime": 4.2076, "eval_samples_per_second": 94.115, "eval_steps_per_second": 3.09, "step": 59500 }, { "epoch": 1.7545333923042903, "grad_norm": 3.0767128345762345, "learning_rate": 3.2950402680203967e-06, "loss": 1.3083, "step": 59505 }, { "epoch": 1.7546808196962995, "grad_norm": 2.8496954409947417, "learning_rate": 3.2944017496572574e-06, "loss": 1.3744, "step": 59510 }, { "epoch": 1.754828247088309, "grad_norm": 2.7444135166529744, "learning_rate": 3.2937632447006176e-06, "loss": 1.2859, "step": 59515 }, { "epoch": 1.7549756744803184, "grad_norm": 2.8054839364708517, "learning_rate": 3.293124753169263e-06, "loss": 1.2918, "step": 59520 }, { "epoch": 1.755123101872328, "grad_norm": 2.845377298458192, "learning_rate": 3.2924862750819865e-06, "loss": 1.2987, "step": 59525 }, { "epoch": 1.7552705292643374, "grad_norm": 2.7215614084756585, "learning_rate": 3.2918478104575727e-06, "loss": 1.3039, "step": 59530 }, { "epoch": 1.7554179566563466, "grad_norm": 2.7742185262168353, "learning_rate": 3.291209359314811e-06, "loss": 1.2633, "step": 59535 }, { "epoch": 1.755565384048356, "grad_norm": 2.7225695714467713, "learning_rate": 3.2905709216724876e-06, "loss": 1.3317, "step": 59540 }, { "epoch": 1.7557128114403655, "grad_norm": 2.7547118738991734, "learning_rate": 3.28993249754939e-06, "loss": 1.3079, "step": 59545 }, { "epoch": 1.755860238832375, "grad_norm": 2.851361672418525, "learning_rate": 3.289294086964302e-06, "loss": 1.3101, "step": 59550 }, { "epoch": 1.7560076662243844, "grad_norm": 2.853372559139733, "learning_rate": 3.2886556899360134e-06, "loss": 1.3471, "step": 59555 }, { "epoch": 1.7561550936163939, "grad_norm": 2.8747137051616645, "learning_rate": 3.288017306483308e-06, "loss": 1.2516, "step": 59560 }, { "epoch": 1.7563025210084033, "grad_norm": 2.984570760950003, "learning_rate": 3.2873789366249704e-06, "loss": 1.2874, "step": 59565 }, { "epoch": 1.7564499484004128, "grad_norm": 2.7674427460018607, "learning_rate": 3.286740580379786e-06, "loss": 1.3223, "step": 59570 }, { "epoch": 1.7565973757924223, "grad_norm": 2.8745054459009296, "learning_rate": 3.286102237766538e-06, "loss": 1.3168, "step": 59575 }, { "epoch": 1.7567448031844317, "grad_norm": 2.8094610788664407, "learning_rate": 3.2854639088040126e-06, "loss": 1.3264, "step": 59580 }, { "epoch": 1.7568922305764412, "grad_norm": 2.8809985467586667, "learning_rate": 3.2848255935109915e-06, "loss": 1.332, "step": 59585 }, { "epoch": 1.7570396579684506, "grad_norm": 2.8325178793753834, "learning_rate": 3.284187291906259e-06, "loss": 1.3351, "step": 59590 }, { "epoch": 1.75718708536046, "grad_norm": 2.9011942347156796, "learning_rate": 3.2835490040085964e-06, "loss": 1.3272, "step": 59595 }, { "epoch": 1.7573345127524695, "grad_norm": 2.8858292265000434, "learning_rate": 3.282910729836788e-06, "loss": 1.2561, "step": 59600 }, { "epoch": 1.757481940144479, "grad_norm": 2.9974072779395122, "learning_rate": 3.2822724694096127e-06, "loss": 1.3335, "step": 59605 }, { "epoch": 1.7576293675364882, "grad_norm": 2.8934315043808185, "learning_rate": 3.2816342227458565e-06, "loss": 1.2948, "step": 59610 }, { "epoch": 1.7577767949284977, "grad_norm": 2.9021664916172702, "learning_rate": 3.2809959898642974e-06, "loss": 1.3007, "step": 59615 }, { "epoch": 1.7579242223205072, "grad_norm": 2.8466067125745322, "learning_rate": 3.2803577707837175e-06, "loss": 1.2886, "step": 59620 }, { "epoch": 1.7580716497125166, "grad_norm": 2.7928724315708275, "learning_rate": 3.2797195655228957e-06, "loss": 1.2863, "step": 59625 }, { "epoch": 1.758219077104526, "grad_norm": 2.772230013550754, "learning_rate": 3.279081374100612e-06, "loss": 1.2537, "step": 59630 }, { "epoch": 1.7583665044965353, "grad_norm": 2.895531485846368, "learning_rate": 3.2784431965356478e-06, "loss": 1.2908, "step": 59635 }, { "epoch": 1.7585139318885448, "grad_norm": 2.853009989591648, "learning_rate": 3.27780503284678e-06, "loss": 1.3107, "step": 59640 }, { "epoch": 1.7586613592805542, "grad_norm": 2.9052001969404393, "learning_rate": 3.27716688305279e-06, "loss": 1.2811, "step": 59645 }, { "epoch": 1.7588087866725637, "grad_norm": 2.8522081298335316, "learning_rate": 3.2765287471724518e-06, "loss": 1.2677, "step": 59650 }, { "epoch": 1.7589562140645731, "grad_norm": 2.945542683682973, "learning_rate": 3.2758906252245476e-06, "loss": 1.3313, "step": 59655 }, { "epoch": 1.7591036414565826, "grad_norm": 2.7227216769925837, "learning_rate": 3.275252517227852e-06, "loss": 1.2963, "step": 59660 }, { "epoch": 1.759251068848592, "grad_norm": 2.9219966730523725, "learning_rate": 3.2746144232011444e-06, "loss": 1.2839, "step": 59665 }, { "epoch": 1.7593984962406015, "grad_norm": 2.638965809195526, "learning_rate": 3.2739763431631997e-06, "loss": 1.2763, "step": 59670 }, { "epoch": 1.759545923632611, "grad_norm": 2.8539848728594293, "learning_rate": 3.2733382771327956e-06, "loss": 1.2209, "step": 59675 }, { "epoch": 1.7596933510246204, "grad_norm": 2.834597387228119, "learning_rate": 3.272700225128705e-06, "loss": 1.2904, "step": 59680 }, { "epoch": 1.75984077841663, "grad_norm": 2.7619214695464875, "learning_rate": 3.272062187169707e-06, "loss": 1.2851, "step": 59685 }, { "epoch": 1.7599882058086393, "grad_norm": 2.7581113843282488, "learning_rate": 3.271424163274575e-06, "loss": 1.2774, "step": 59690 }, { "epoch": 1.7601356332006488, "grad_norm": 2.917444324688451, "learning_rate": 3.2707861534620825e-06, "loss": 1.3115, "step": 59695 }, { "epoch": 1.7602830605926583, "grad_norm": 2.851076799179697, "learning_rate": 3.2701481577510056e-06, "loss": 1.2826, "step": 59700 }, { "epoch": 1.7604304879846675, "grad_norm": 3.080948125411113, "learning_rate": 3.269510176160115e-06, "loss": 1.2832, "step": 59705 }, { "epoch": 1.760577915376677, "grad_norm": 2.8667470149076255, "learning_rate": 3.2688722087081883e-06, "loss": 1.2645, "step": 59710 }, { "epoch": 1.7607253427686864, "grad_norm": 2.922294294184245, "learning_rate": 3.268234255413995e-06, "loss": 1.281, "step": 59715 }, { "epoch": 1.7608727701606959, "grad_norm": 2.678555769508818, "learning_rate": 3.2675963162963104e-06, "loss": 1.3433, "step": 59720 }, { "epoch": 1.7610201975527053, "grad_norm": 2.85867573336119, "learning_rate": 3.266958391373903e-06, "loss": 1.3233, "step": 59725 }, { "epoch": 1.7611676249447146, "grad_norm": 3.2548851239226715, "learning_rate": 3.2663204806655482e-06, "loss": 1.2954, "step": 59730 }, { "epoch": 1.761315052336724, "grad_norm": 3.0466532881838213, "learning_rate": 3.2656825841900135e-06, "loss": 1.3726, "step": 59735 }, { "epoch": 1.7614624797287335, "grad_norm": 2.76011238077159, "learning_rate": 3.265044701966074e-06, "loss": 1.3252, "step": 59740 }, { "epoch": 1.761609907120743, "grad_norm": 2.73608752549893, "learning_rate": 3.2644068340124967e-06, "loss": 1.3191, "step": 59745 }, { "epoch": 1.7617573345127524, "grad_norm": 2.8072376157534613, "learning_rate": 3.2637689803480536e-06, "loss": 1.2797, "step": 59750 }, { "epoch": 1.7619047619047619, "grad_norm": 2.9095960276336092, "learning_rate": 3.2631311409915135e-06, "loss": 1.3276, "step": 59755 }, { "epoch": 1.7620521892967713, "grad_norm": 2.97472971552703, "learning_rate": 3.2624933159616442e-06, "loss": 1.2731, "step": 59760 }, { "epoch": 1.7621996166887808, "grad_norm": 2.660139853946428, "learning_rate": 3.261855505277217e-06, "loss": 1.2514, "step": 59765 }, { "epoch": 1.7623470440807902, "grad_norm": 2.8562833112995407, "learning_rate": 3.2612177089569987e-06, "loss": 1.3485, "step": 59770 }, { "epoch": 1.7624944714727997, "grad_norm": 2.889840043550667, "learning_rate": 3.2605799270197574e-06, "loss": 1.3442, "step": 59775 }, { "epoch": 1.7626418988648092, "grad_norm": 2.8329364768635594, "learning_rate": 3.25994215948426e-06, "loss": 1.2934, "step": 59780 }, { "epoch": 1.7627893262568186, "grad_norm": 2.9116496510863303, "learning_rate": 3.2593044063692754e-06, "loss": 1.3098, "step": 59785 }, { "epoch": 1.762936753648828, "grad_norm": 2.8377113021851628, "learning_rate": 3.258666667693567e-06, "loss": 1.2889, "step": 59790 }, { "epoch": 1.7630841810408375, "grad_norm": 2.8817568772205666, "learning_rate": 3.2580289434759047e-06, "loss": 1.3177, "step": 59795 }, { "epoch": 1.763231608432847, "grad_norm": 2.8218595102692965, "learning_rate": 3.257391233735052e-06, "loss": 1.2974, "step": 59800 }, { "epoch": 1.7633790358248562, "grad_norm": 2.7853254147353588, "learning_rate": 3.256753538489775e-06, "loss": 1.3072, "step": 59805 }, { "epoch": 1.7635264632168657, "grad_norm": 2.8373936136131834, "learning_rate": 3.256115857758837e-06, "loss": 1.2931, "step": 59810 }, { "epoch": 1.7636738906088751, "grad_norm": 2.779039058812747, "learning_rate": 3.2554781915610058e-06, "loss": 1.3204, "step": 59815 }, { "epoch": 1.7638213180008846, "grad_norm": 2.76960657351732, "learning_rate": 3.2548405399150433e-06, "loss": 1.243, "step": 59820 }, { "epoch": 1.7639687453928938, "grad_norm": 2.8305580049406203, "learning_rate": 3.2542029028397123e-06, "loss": 1.2557, "step": 59825 }, { "epoch": 1.7641161727849033, "grad_norm": 2.7049075280143486, "learning_rate": 3.253565280353778e-06, "loss": 1.2806, "step": 59830 }, { "epoch": 1.7642636001769127, "grad_norm": 2.7365300021326675, "learning_rate": 3.252927672476e-06, "loss": 1.3205, "step": 59835 }, { "epoch": 1.7644110275689222, "grad_norm": 2.7890223723260266, "learning_rate": 3.2522900792251455e-06, "loss": 1.2528, "step": 59840 }, { "epoch": 1.7645584549609317, "grad_norm": 2.899562312230704, "learning_rate": 3.251652500619972e-06, "loss": 1.281, "step": 59845 }, { "epoch": 1.7647058823529411, "grad_norm": 3.0032905147685147, "learning_rate": 3.2510149366792445e-06, "loss": 1.327, "step": 59850 }, { "epoch": 1.7648533097449506, "grad_norm": 2.738679670905689, "learning_rate": 3.2503773874217196e-06, "loss": 1.3241, "step": 59855 }, { "epoch": 1.76500073713696, "grad_norm": 2.7475303735769594, "learning_rate": 3.249739852866163e-06, "loss": 1.2952, "step": 59860 }, { "epoch": 1.7651481645289695, "grad_norm": 2.7116482439610006, "learning_rate": 3.2491023330313303e-06, "loss": 1.2792, "step": 59865 }, { "epoch": 1.765295591920979, "grad_norm": 2.8586893943626315, "learning_rate": 3.2484648279359853e-06, "loss": 1.3135, "step": 59870 }, { "epoch": 1.7654430193129884, "grad_norm": 2.714126980239425, "learning_rate": 3.2478273375988845e-06, "loss": 1.2813, "step": 59875 }, { "epoch": 1.7655904467049979, "grad_norm": 2.799135917329248, "learning_rate": 3.247189862038789e-06, "loss": 1.2796, "step": 59880 }, { "epoch": 1.7657378740970073, "grad_norm": 2.776334699141347, "learning_rate": 3.2465524012744556e-06, "loss": 1.3061, "step": 59885 }, { "epoch": 1.7658853014890168, "grad_norm": 2.782613762819845, "learning_rate": 3.2459149553246413e-06, "loss": 1.238, "step": 59890 }, { "epoch": 1.7660327288810262, "grad_norm": 2.8477974301282036, "learning_rate": 3.245277524208106e-06, "loss": 1.2487, "step": 59895 }, { "epoch": 1.7661801562730355, "grad_norm": 2.960654261969316, "learning_rate": 3.244640107943606e-06, "loss": 1.3135, "step": 59900 }, { "epoch": 1.766327583665045, "grad_norm": 2.771774092737828, "learning_rate": 3.2440027065498977e-06, "loss": 1.2612, "step": 59905 }, { "epoch": 1.7664750110570544, "grad_norm": 2.87505960849863, "learning_rate": 3.2433653200457365e-06, "loss": 1.3444, "step": 59910 }, { "epoch": 1.7666224384490639, "grad_norm": 2.9173820848732177, "learning_rate": 3.242727948449881e-06, "loss": 1.3021, "step": 59915 }, { "epoch": 1.7667698658410733, "grad_norm": 2.7146464586083243, "learning_rate": 3.2420905917810827e-06, "loss": 1.2849, "step": 59920 }, { "epoch": 1.7669172932330826, "grad_norm": 2.9482477050167395, "learning_rate": 3.2414532500581008e-06, "loss": 1.2731, "step": 59925 }, { "epoch": 1.767064720625092, "grad_norm": 2.7240416476919704, "learning_rate": 3.2408159232996864e-06, "loss": 1.2882, "step": 59930 }, { "epoch": 1.7672121480171015, "grad_norm": 2.729166682249024, "learning_rate": 3.2401786115245962e-06, "loss": 1.3044, "step": 59935 }, { "epoch": 1.767359575409111, "grad_norm": 2.8324000094664106, "learning_rate": 3.239541314751581e-06, "loss": 1.2892, "step": 59940 }, { "epoch": 1.7675070028011204, "grad_norm": 2.9376756367472856, "learning_rate": 3.238904032999397e-06, "loss": 1.2887, "step": 59945 }, { "epoch": 1.7676544301931298, "grad_norm": 2.8815527488130988, "learning_rate": 3.2382667662867955e-06, "loss": 1.316, "step": 59950 }, { "epoch": 1.7678018575851393, "grad_norm": 2.8247426338556836, "learning_rate": 3.237629514632527e-06, "loss": 1.3064, "step": 59955 }, { "epoch": 1.7679492849771488, "grad_norm": 2.801459411852956, "learning_rate": 3.236992278055347e-06, "loss": 1.2835, "step": 59960 }, { "epoch": 1.7680967123691582, "grad_norm": 2.858979566648295, "learning_rate": 3.236355056574003e-06, "loss": 1.2561, "step": 59965 }, { "epoch": 1.7682441397611677, "grad_norm": 2.8393530901831117, "learning_rate": 3.2357178502072494e-06, "loss": 1.3053, "step": 59970 }, { "epoch": 1.7683915671531771, "grad_norm": 2.8466911619609965, "learning_rate": 3.235080658973835e-06, "loss": 1.2848, "step": 59975 }, { "epoch": 1.7685389945451866, "grad_norm": 2.81213382711166, "learning_rate": 3.234443482892511e-06, "loss": 1.3087, "step": 59980 }, { "epoch": 1.768686421937196, "grad_norm": 2.9449388380717636, "learning_rate": 3.2338063219820244e-06, "loss": 1.3834, "step": 59985 }, { "epoch": 1.7688338493292055, "grad_norm": 2.8662542974743315, "learning_rate": 3.2331691762611274e-06, "loss": 1.3611, "step": 59990 }, { "epoch": 1.768981276721215, "grad_norm": 2.82610948965911, "learning_rate": 3.2325320457485663e-06, "loss": 1.2825, "step": 59995 }, { "epoch": 1.7691287041132242, "grad_norm": 2.7494297500758567, "learning_rate": 3.2318949304630923e-06, "loss": 1.2836, "step": 60000 }, { "epoch": 1.7691287041132242, "eval_loss": 1.0754992961883545, "eval_runtime": 4.2711, "eval_samples_per_second": 92.717, "eval_steps_per_second": 3.044, "step": 60000 }, { "epoch": 1.7692761315052337, "grad_norm": 2.9065630892297873, "learning_rate": 3.231257830423451e-06, "loss": 1.331, "step": 60005 }, { "epoch": 1.7694235588972431, "grad_norm": 2.8227605495927977, "learning_rate": 3.2306207456483904e-06, "loss": 1.2894, "step": 60010 }, { "epoch": 1.7695709862892526, "grad_norm": 2.7722348127439638, "learning_rate": 3.2299836761566584e-06, "loss": 1.281, "step": 60015 }, { "epoch": 1.7697184136812618, "grad_norm": 2.9522411285284953, "learning_rate": 3.229346621966998e-06, "loss": 1.271, "step": 60020 }, { "epoch": 1.7698658410732713, "grad_norm": 2.846921221751313, "learning_rate": 3.22870958309816e-06, "loss": 1.3452, "step": 60025 }, { "epoch": 1.7700132684652807, "grad_norm": 2.6810110802338163, "learning_rate": 3.228072559568887e-06, "loss": 1.3139, "step": 60030 }, { "epoch": 1.7701606958572902, "grad_norm": 2.8731991397558323, "learning_rate": 3.2274355513979256e-06, "loss": 1.2343, "step": 60035 }, { "epoch": 1.7703081232492996, "grad_norm": 2.7101474621807458, "learning_rate": 3.226798558604018e-06, "loss": 1.3239, "step": 60040 }, { "epoch": 1.770455550641309, "grad_norm": 2.7684897740904564, "learning_rate": 3.226161581205912e-06, "loss": 1.3144, "step": 60045 }, { "epoch": 1.7706029780333186, "grad_norm": 2.8626335949325052, "learning_rate": 3.2255246192223478e-06, "loss": 1.2818, "step": 60050 }, { "epoch": 1.770750405425328, "grad_norm": 2.8824911339563632, "learning_rate": 3.2248876726720728e-06, "loss": 1.3182, "step": 60055 }, { "epoch": 1.7708978328173375, "grad_norm": 2.8698365709405373, "learning_rate": 3.2242507415738264e-06, "loss": 1.2922, "step": 60060 }, { "epoch": 1.771045260209347, "grad_norm": 2.794381632640741, "learning_rate": 3.2236138259463526e-06, "loss": 1.3049, "step": 60065 }, { "epoch": 1.7711926876013564, "grad_norm": 2.8983611483781098, "learning_rate": 3.2229769258083925e-06, "loss": 1.2634, "step": 60070 }, { "epoch": 1.7713401149933659, "grad_norm": 2.7659451034895177, "learning_rate": 3.2223400411786887e-06, "loss": 1.2954, "step": 60075 }, { "epoch": 1.7714875423853753, "grad_norm": 2.750153529025775, "learning_rate": 3.2217031720759823e-06, "loss": 1.3321, "step": 60080 }, { "epoch": 1.7716349697773848, "grad_norm": 2.9573874068354673, "learning_rate": 3.221066318519012e-06, "loss": 1.2971, "step": 60085 }, { "epoch": 1.7717823971693942, "grad_norm": 2.899688295201498, "learning_rate": 3.220429480526521e-06, "loss": 1.2346, "step": 60090 }, { "epoch": 1.7719298245614035, "grad_norm": 2.874795265769977, "learning_rate": 3.219792658117245e-06, "loss": 1.2734, "step": 60095 }, { "epoch": 1.772077251953413, "grad_norm": 2.70736819473976, "learning_rate": 3.2191558513099273e-06, "loss": 1.3069, "step": 60100 }, { "epoch": 1.7722246793454224, "grad_norm": 2.621017718840063, "learning_rate": 3.218519060123304e-06, "loss": 1.2353, "step": 60105 }, { "epoch": 1.7723721067374318, "grad_norm": 2.795208837641612, "learning_rate": 3.217882284576115e-06, "loss": 1.2744, "step": 60110 }, { "epoch": 1.7725195341294413, "grad_norm": 2.892970439204914, "learning_rate": 3.2172455246870956e-06, "loss": 1.2595, "step": 60115 }, { "epoch": 1.7726669615214505, "grad_norm": 2.83545555128128, "learning_rate": 3.2166087804749868e-06, "loss": 1.3063, "step": 60120 }, { "epoch": 1.77281438891346, "grad_norm": 2.8477036433076424, "learning_rate": 3.215972051958522e-06, "loss": 1.3051, "step": 60125 }, { "epoch": 1.7729618163054695, "grad_norm": 2.7083047179940465, "learning_rate": 3.215335339156441e-06, "loss": 1.2371, "step": 60130 }, { "epoch": 1.773109243697479, "grad_norm": 2.960457905350582, "learning_rate": 3.214698642087477e-06, "loss": 1.3165, "step": 60135 }, { "epoch": 1.7732566710894884, "grad_norm": 2.8391881105475743, "learning_rate": 3.2140619607703684e-06, "loss": 1.2571, "step": 60140 }, { "epoch": 1.7734040984814978, "grad_norm": 2.87804898292376, "learning_rate": 3.213425295223848e-06, "loss": 1.2302, "step": 60145 }, { "epoch": 1.7735515258735073, "grad_norm": 2.7719359753156447, "learning_rate": 3.2127886454666493e-06, "loss": 1.318, "step": 60150 }, { "epoch": 1.7736989532655167, "grad_norm": 2.7761799725354, "learning_rate": 3.21215201151751e-06, "loss": 1.2433, "step": 60155 }, { "epoch": 1.7738463806575262, "grad_norm": 2.745021678649681, "learning_rate": 3.2115153933951607e-06, "loss": 1.2868, "step": 60160 }, { "epoch": 1.7739938080495357, "grad_norm": 2.79331062815789, "learning_rate": 3.210878791118337e-06, "loss": 1.2914, "step": 60165 }, { "epoch": 1.7741412354415451, "grad_norm": 2.705397555674579, "learning_rate": 3.210242204705769e-06, "loss": 1.3187, "step": 60170 }, { "epoch": 1.7742886628335546, "grad_norm": 2.700270515680725, "learning_rate": 3.209605634176192e-06, "loss": 1.3027, "step": 60175 }, { "epoch": 1.774436090225564, "grad_norm": 2.828538809131435, "learning_rate": 3.2089690795483343e-06, "loss": 1.3048, "step": 60180 }, { "epoch": 1.7745835176175735, "grad_norm": 2.9114169897663316, "learning_rate": 3.208332540840931e-06, "loss": 1.2902, "step": 60185 }, { "epoch": 1.7747309450095827, "grad_norm": 2.8731816780106247, "learning_rate": 3.207696018072711e-06, "loss": 1.2937, "step": 60190 }, { "epoch": 1.7748783724015922, "grad_norm": 2.7018524731771705, "learning_rate": 3.207059511262405e-06, "loss": 1.2893, "step": 60195 }, { "epoch": 1.7750257997936016, "grad_norm": 2.912484960232828, "learning_rate": 3.2064230204287414e-06, "loss": 1.2596, "step": 60200 }, { "epoch": 1.775173227185611, "grad_norm": 2.842230255811964, "learning_rate": 3.2057865455904528e-06, "loss": 1.3187, "step": 60205 }, { "epoch": 1.7753206545776206, "grad_norm": 2.8026074030630226, "learning_rate": 3.2051500867662665e-06, "loss": 1.2811, "step": 60210 }, { "epoch": 1.7754680819696298, "grad_norm": 2.7913513030697117, "learning_rate": 3.20451364397491e-06, "loss": 1.3168, "step": 60215 }, { "epoch": 1.7756155093616393, "grad_norm": 2.8362535151625687, "learning_rate": 3.2038772172351137e-06, "loss": 1.3139, "step": 60220 }, { "epoch": 1.7757629367536487, "grad_norm": 2.883173595595197, "learning_rate": 3.203240806565602e-06, "loss": 1.3007, "step": 60225 }, { "epoch": 1.7759103641456582, "grad_norm": 2.94829346647818, "learning_rate": 3.2026044119851057e-06, "loss": 1.3217, "step": 60230 }, { "epoch": 1.7760577915376676, "grad_norm": 2.769149939869898, "learning_rate": 3.2019680335123486e-06, "loss": 1.3046, "step": 60235 }, { "epoch": 1.776205218929677, "grad_norm": 2.9233844118014956, "learning_rate": 3.201331671166059e-06, "loss": 1.3065, "step": 60240 }, { "epoch": 1.7763526463216865, "grad_norm": 2.8714202908439024, "learning_rate": 3.2006953249649595e-06, "loss": 1.3151, "step": 60245 }, { "epoch": 1.776500073713696, "grad_norm": 2.8975580331905486, "learning_rate": 3.200058994927779e-06, "loss": 1.3197, "step": 60250 }, { "epoch": 1.7766475011057055, "grad_norm": 2.9127198594456774, "learning_rate": 3.1994226810732394e-06, "loss": 1.2982, "step": 60255 }, { "epoch": 1.776794928497715, "grad_norm": 2.8358111688494363, "learning_rate": 3.198786383420067e-06, "loss": 1.2985, "step": 60260 }, { "epoch": 1.7769423558897244, "grad_norm": 2.7366477377456673, "learning_rate": 3.1981501019869843e-06, "loss": 1.3441, "step": 60265 }, { "epoch": 1.7770897832817338, "grad_norm": 2.7268455624484864, "learning_rate": 3.1975138367927158e-06, "loss": 1.3129, "step": 60270 }, { "epoch": 1.7772372106737433, "grad_norm": 2.7905314180096554, "learning_rate": 3.1968775878559835e-06, "loss": 1.2368, "step": 60275 }, { "epoch": 1.7773846380657528, "grad_norm": 2.8371503033539067, "learning_rate": 3.196241355195508e-06, "loss": 1.3063, "step": 60280 }, { "epoch": 1.7775320654577622, "grad_norm": 2.9539044499923506, "learning_rate": 3.1956051388300145e-06, "loss": 1.2786, "step": 60285 }, { "epoch": 1.7776794928497714, "grad_norm": 2.912440592114576, "learning_rate": 3.1949689387782226e-06, "loss": 1.2973, "step": 60290 }, { "epoch": 1.777826920241781, "grad_norm": 2.8373326264417047, "learning_rate": 3.1943327550588535e-06, "loss": 1.3007, "step": 60295 }, { "epoch": 1.7779743476337904, "grad_norm": 2.8485670881073535, "learning_rate": 3.1936965876906262e-06, "loss": 1.2603, "step": 60300 }, { "epoch": 1.7781217750257998, "grad_norm": 2.8166938675412547, "learning_rate": 3.193060436692264e-06, "loss": 1.2977, "step": 60305 }, { "epoch": 1.7782692024178093, "grad_norm": 2.894044684998454, "learning_rate": 3.1924243020824828e-06, "loss": 1.3003, "step": 60310 }, { "epoch": 1.7784166298098185, "grad_norm": 2.9098088204694577, "learning_rate": 3.1917881838800047e-06, "loss": 1.2724, "step": 60315 }, { "epoch": 1.778564057201828, "grad_norm": 2.835981803775011, "learning_rate": 3.1911520821035456e-06, "loss": 1.3051, "step": 60320 }, { "epoch": 1.7787114845938374, "grad_norm": 2.8403194375108898, "learning_rate": 3.1905159967718255e-06, "loss": 1.271, "step": 60325 }, { "epoch": 1.778858911985847, "grad_norm": 2.7742936175146373, "learning_rate": 3.1898799279035596e-06, "loss": 1.235, "step": 60330 }, { "epoch": 1.7790063393778563, "grad_norm": 2.811020304448214, "learning_rate": 3.1892438755174684e-06, "loss": 1.3111, "step": 60335 }, { "epoch": 1.7791537667698658, "grad_norm": 2.724769196932589, "learning_rate": 3.188607839632266e-06, "loss": 1.268, "step": 60340 }, { "epoch": 1.7793011941618753, "grad_norm": 2.9363492123965638, "learning_rate": 3.187971820266668e-06, "loss": 1.3172, "step": 60345 }, { "epoch": 1.7794486215538847, "grad_norm": 2.7135994986052037, "learning_rate": 3.1873358174393917e-06, "loss": 1.2259, "step": 60350 }, { "epoch": 1.7795960489458942, "grad_norm": 2.789769893047906, "learning_rate": 3.1866998311691506e-06, "loss": 1.2821, "step": 60355 }, { "epoch": 1.7797434763379036, "grad_norm": 2.897858315372551, "learning_rate": 3.186063861474661e-06, "loss": 1.3059, "step": 60360 }, { "epoch": 1.779890903729913, "grad_norm": 2.9077529423590645, "learning_rate": 3.185427908374636e-06, "loss": 1.2881, "step": 60365 }, { "epoch": 1.7800383311219226, "grad_norm": 2.9012505133467754, "learning_rate": 3.1847919718877902e-06, "loss": 1.2844, "step": 60370 }, { "epoch": 1.780185758513932, "grad_norm": 2.8345598296283074, "learning_rate": 3.184156052032834e-06, "loss": 1.3231, "step": 60375 }, { "epoch": 1.7803331859059415, "grad_norm": 2.7679133498773494, "learning_rate": 3.183520148828484e-06, "loss": 1.3354, "step": 60380 }, { "epoch": 1.7804806132979507, "grad_norm": 3.168447362027034, "learning_rate": 3.182884262293449e-06, "loss": 1.2834, "step": 60385 }, { "epoch": 1.7806280406899602, "grad_norm": 2.647018903998518, "learning_rate": 3.182248392446443e-06, "loss": 1.2837, "step": 60390 }, { "epoch": 1.7807754680819696, "grad_norm": 2.8090472309918026, "learning_rate": 3.1816125393061766e-06, "loss": 1.3214, "step": 60395 }, { "epoch": 1.780922895473979, "grad_norm": 2.7911049954239466, "learning_rate": 3.1809767028913604e-06, "loss": 1.2945, "step": 60400 }, { "epoch": 1.7810703228659885, "grad_norm": 2.705074197259929, "learning_rate": 3.180340883220705e-06, "loss": 1.2902, "step": 60405 }, { "epoch": 1.7812177502579978, "grad_norm": 2.8229829642238333, "learning_rate": 3.1797050803129177e-06, "loss": 1.3031, "step": 60410 }, { "epoch": 1.7813651776500072, "grad_norm": 2.885595822796245, "learning_rate": 3.1790692941867115e-06, "loss": 1.3021, "step": 60415 }, { "epoch": 1.7815126050420167, "grad_norm": 2.837687666609553, "learning_rate": 3.1784335248607917e-06, "loss": 1.3024, "step": 60420 }, { "epoch": 1.7816600324340262, "grad_norm": 2.7851144554099574, "learning_rate": 3.1777977723538695e-06, "loss": 1.2783, "step": 60425 }, { "epoch": 1.7818074598260356, "grad_norm": 2.813422252248157, "learning_rate": 3.1771620366846493e-06, "loss": 1.332, "step": 60430 }, { "epoch": 1.781954887218045, "grad_norm": 2.8247487683303167, "learning_rate": 3.176526317871842e-06, "loss": 1.2593, "step": 60435 }, { "epoch": 1.7821023146100545, "grad_norm": 2.778855506639651, "learning_rate": 3.175890615934151e-06, "loss": 1.2806, "step": 60440 }, { "epoch": 1.782249742002064, "grad_norm": 2.9135579796146542, "learning_rate": 3.1752549308902864e-06, "loss": 1.3407, "step": 60445 }, { "epoch": 1.7823971693940734, "grad_norm": 2.846402611373269, "learning_rate": 3.17461926275895e-06, "loss": 1.2749, "step": 60450 }, { "epoch": 1.782544596786083, "grad_norm": 2.82717157787564, "learning_rate": 3.1739836115588503e-06, "loss": 1.3256, "step": 60455 }, { "epoch": 1.7826920241780924, "grad_norm": 2.819679632462102, "learning_rate": 3.1733479773086885e-06, "loss": 1.2522, "step": 60460 }, { "epoch": 1.7828394515701018, "grad_norm": 2.923032142926125, "learning_rate": 3.1727123600271725e-06, "loss": 1.2571, "step": 60465 }, { "epoch": 1.7829868789621113, "grad_norm": 2.750173691914383, "learning_rate": 3.172076759733005e-06, "loss": 1.2893, "step": 60470 }, { "epoch": 1.7831343063541207, "grad_norm": 2.792047710220229, "learning_rate": 3.1714411764448877e-06, "loss": 1.2904, "step": 60475 }, { "epoch": 1.7832817337461302, "grad_norm": 2.7855619895097186, "learning_rate": 3.170805610181525e-06, "loss": 1.2701, "step": 60480 }, { "epoch": 1.7834291611381394, "grad_norm": 2.8022056931687693, "learning_rate": 3.170170060961618e-06, "loss": 1.2676, "step": 60485 }, { "epoch": 1.7835765885301489, "grad_norm": 2.7665518150203523, "learning_rate": 3.169534528803869e-06, "loss": 1.2781, "step": 60490 }, { "epoch": 1.7837240159221583, "grad_norm": 3.0660011669114753, "learning_rate": 3.16889901372698e-06, "loss": 1.3543, "step": 60495 }, { "epoch": 1.7838714433141678, "grad_norm": 2.804744560901205, "learning_rate": 3.1682635157496514e-06, "loss": 1.3261, "step": 60500 }, { "epoch": 1.7838714433141678, "eval_loss": 1.0740876197814941, "eval_runtime": 4.1508, "eval_samples_per_second": 95.402, "eval_steps_per_second": 3.132, "step": 60500 }, { "epoch": 1.7840188707061773, "grad_norm": 2.9104905804615018, "learning_rate": 3.167628034890581e-06, "loss": 1.3185, "step": 60505 }, { "epoch": 1.7841662980981865, "grad_norm": 2.9871524283118607, "learning_rate": 3.166992571168473e-06, "loss": 1.3357, "step": 60510 }, { "epoch": 1.784313725490196, "grad_norm": 2.903764614679074, "learning_rate": 3.166357124602022e-06, "loss": 1.3044, "step": 60515 }, { "epoch": 1.7844611528822054, "grad_norm": 2.735609819186151, "learning_rate": 3.1657216952099312e-06, "loss": 1.3022, "step": 60520 }, { "epoch": 1.7846085802742149, "grad_norm": 2.675246608267676, "learning_rate": 3.1650862830108957e-06, "loss": 1.2868, "step": 60525 }, { "epoch": 1.7847560076662243, "grad_norm": 2.8375738898420626, "learning_rate": 3.1644508880236154e-06, "loss": 1.2965, "step": 60530 }, { "epoch": 1.7849034350582338, "grad_norm": 2.9079722685240657, "learning_rate": 3.163815510266786e-06, "loss": 1.2982, "step": 60535 }, { "epoch": 1.7850508624502432, "grad_norm": 2.8810639856050426, "learning_rate": 3.1631801497591024e-06, "loss": 1.3142, "step": 60540 }, { "epoch": 1.7851982898422527, "grad_norm": 2.842538328971941, "learning_rate": 3.1625448065192654e-06, "loss": 1.3073, "step": 60545 }, { "epoch": 1.7853457172342622, "grad_norm": 2.7938159328815835, "learning_rate": 3.1619094805659673e-06, "loss": 1.3299, "step": 60550 }, { "epoch": 1.7854931446262716, "grad_norm": 2.945061811819894, "learning_rate": 3.1612741719179048e-06, "loss": 1.2997, "step": 60555 }, { "epoch": 1.785640572018281, "grad_norm": 2.8046410836940248, "learning_rate": 3.160638880593771e-06, "loss": 1.2498, "step": 60560 }, { "epoch": 1.7857879994102905, "grad_norm": 2.712209971460985, "learning_rate": 3.1600036066122625e-06, "loss": 1.3051, "step": 60565 }, { "epoch": 1.7859354268023, "grad_norm": 2.9619549058821812, "learning_rate": 3.159368349992069e-06, "loss": 1.3037, "step": 60570 }, { "epoch": 1.7860828541943095, "grad_norm": 2.894663950756175, "learning_rate": 3.158733110751889e-06, "loss": 1.3015, "step": 60575 }, { "epoch": 1.7862302815863187, "grad_norm": 2.81658779909792, "learning_rate": 3.158097888910411e-06, "loss": 1.2938, "step": 60580 }, { "epoch": 1.7863777089783281, "grad_norm": 2.7449739698918805, "learning_rate": 3.15746268448633e-06, "loss": 1.275, "step": 60585 }, { "epoch": 1.7865251363703376, "grad_norm": 2.890556561111734, "learning_rate": 3.1568274974983337e-06, "loss": 1.2726, "step": 60590 }, { "epoch": 1.786672563762347, "grad_norm": 2.836702254726731, "learning_rate": 3.156192327965118e-06, "loss": 1.3148, "step": 60595 }, { "epoch": 1.7868199911543565, "grad_norm": 2.8484758061369413, "learning_rate": 3.1555571759053703e-06, "loss": 1.2436, "step": 60600 }, { "epoch": 1.7869674185463658, "grad_norm": 3.5654209808331747, "learning_rate": 3.1549220413377814e-06, "loss": 1.3388, "step": 60605 }, { "epoch": 1.7871148459383752, "grad_norm": 2.800742818182922, "learning_rate": 3.154286924281041e-06, "loss": 1.2641, "step": 60610 }, { "epoch": 1.7872622733303847, "grad_norm": 2.7815355692976254, "learning_rate": 3.153651824753837e-06, "loss": 1.3195, "step": 60615 }, { "epoch": 1.7874097007223941, "grad_norm": 2.6499082979746253, "learning_rate": 3.1530167427748603e-06, "loss": 1.2856, "step": 60620 }, { "epoch": 1.7875571281144036, "grad_norm": 2.94058571795434, "learning_rate": 3.1523816783627972e-06, "loss": 1.3269, "step": 60625 }, { "epoch": 1.787704555506413, "grad_norm": 2.840274897440109, "learning_rate": 3.1517466315363363e-06, "loss": 1.3067, "step": 60630 }, { "epoch": 1.7878519828984225, "grad_norm": 2.908400229929431, "learning_rate": 3.151111602314162e-06, "loss": 1.3185, "step": 60635 }, { "epoch": 1.787999410290432, "grad_norm": 2.8152513912553503, "learning_rate": 3.1504765907149646e-06, "loss": 1.2634, "step": 60640 }, { "epoch": 1.7881468376824414, "grad_norm": 2.6917645239672425, "learning_rate": 3.149841596757426e-06, "loss": 1.2959, "step": 60645 }, { "epoch": 1.7882942650744509, "grad_norm": 2.866374609442744, "learning_rate": 3.149206620460236e-06, "loss": 1.3332, "step": 60650 }, { "epoch": 1.7884416924664603, "grad_norm": 2.9369321734620164, "learning_rate": 3.1485716618420763e-06, "loss": 1.3596, "step": 60655 }, { "epoch": 1.7885891198584698, "grad_norm": 2.935319244792833, "learning_rate": 3.147936720921633e-06, "loss": 1.3209, "step": 60660 }, { "epoch": 1.7887365472504793, "grad_norm": 2.8876640521885197, "learning_rate": 3.147301797717589e-06, "loss": 1.2729, "step": 60665 }, { "epoch": 1.7888839746424887, "grad_norm": 2.7737562712721195, "learning_rate": 3.1466668922486262e-06, "loss": 1.3509, "step": 60670 }, { "epoch": 1.7890314020344982, "grad_norm": 2.8750497570580045, "learning_rate": 3.1460320045334305e-06, "loss": 1.3146, "step": 60675 }, { "epoch": 1.7891788294265074, "grad_norm": 2.6947926340340844, "learning_rate": 3.145397134590682e-06, "loss": 1.2822, "step": 60680 }, { "epoch": 1.7893262568185169, "grad_norm": 2.893311520183216, "learning_rate": 3.1447622824390643e-06, "loss": 1.2912, "step": 60685 }, { "epoch": 1.7894736842105263, "grad_norm": 2.8772868345686096, "learning_rate": 3.144127448097256e-06, "loss": 1.2916, "step": 60690 }, { "epoch": 1.7896211116025358, "grad_norm": 2.7881574264380893, "learning_rate": 3.1434926315839404e-06, "loss": 1.3117, "step": 60695 }, { "epoch": 1.789768538994545, "grad_norm": 2.8109248832000846, "learning_rate": 3.1428578329177953e-06, "loss": 1.2607, "step": 60700 }, { "epoch": 1.7899159663865545, "grad_norm": 2.8687591803533277, "learning_rate": 3.1422230521175037e-06, "loss": 1.2821, "step": 60705 }, { "epoch": 1.790063393778564, "grad_norm": 2.814568020718142, "learning_rate": 3.1415882892017415e-06, "loss": 1.3582, "step": 60710 }, { "epoch": 1.7902108211705734, "grad_norm": 2.8520833405610384, "learning_rate": 3.14095354418919e-06, "loss": 1.26, "step": 60715 }, { "epoch": 1.7903582485625829, "grad_norm": 2.826841674895938, "learning_rate": 3.1403188170985237e-06, "loss": 1.2916, "step": 60720 }, { "epoch": 1.7905056759545923, "grad_norm": 2.8129657059278466, "learning_rate": 3.1396841079484243e-06, "loss": 1.2726, "step": 60725 }, { "epoch": 1.7906531033466018, "grad_norm": 2.7218457397596745, "learning_rate": 3.1390494167575672e-06, "loss": 1.274, "step": 60730 }, { "epoch": 1.7908005307386112, "grad_norm": 2.7595415560515466, "learning_rate": 3.1384147435446275e-06, "loss": 1.2746, "step": 60735 }, { "epoch": 1.7909479581306207, "grad_norm": 2.874433111449831, "learning_rate": 3.1377800883282833e-06, "loss": 1.2721, "step": 60740 }, { "epoch": 1.7910953855226301, "grad_norm": 2.824548458933401, "learning_rate": 3.137145451127207e-06, "loss": 1.2819, "step": 60745 }, { "epoch": 1.7912428129146396, "grad_norm": 2.7414183578915625, "learning_rate": 3.1365108319600775e-06, "loss": 1.3025, "step": 60750 }, { "epoch": 1.791390240306649, "grad_norm": 2.9055755985087135, "learning_rate": 3.1358762308455666e-06, "loss": 1.3282, "step": 60755 }, { "epoch": 1.7915376676986585, "grad_norm": 2.788689449297638, "learning_rate": 3.13524164780235e-06, "loss": 1.2416, "step": 60760 }, { "epoch": 1.791685095090668, "grad_norm": 2.73090261687559, "learning_rate": 3.1346070828490977e-06, "loss": 1.2573, "step": 60765 }, { "epoch": 1.7918325224826774, "grad_norm": 2.7836826301311803, "learning_rate": 3.1339725360044866e-06, "loss": 1.3338, "step": 60770 }, { "epoch": 1.7919799498746867, "grad_norm": 2.9038268444389166, "learning_rate": 3.133338007287185e-06, "loss": 1.3221, "step": 60775 }, { "epoch": 1.7921273772666961, "grad_norm": 2.850975162670276, "learning_rate": 3.1327034967158687e-06, "loss": 1.2415, "step": 60780 }, { "epoch": 1.7922748046587056, "grad_norm": 2.7099610642822167, "learning_rate": 3.1320690043092067e-06, "loss": 1.2944, "step": 60785 }, { "epoch": 1.792422232050715, "grad_norm": 2.830045757998327, "learning_rate": 3.13143453008587e-06, "loss": 1.2668, "step": 60790 }, { "epoch": 1.7925696594427245, "grad_norm": 2.906223437483613, "learning_rate": 3.130800074064528e-06, "loss": 1.3077, "step": 60795 }, { "epoch": 1.7927170868347337, "grad_norm": 2.766816656530484, "learning_rate": 3.1301656362638503e-06, "loss": 1.3091, "step": 60800 }, { "epoch": 1.7928645142267432, "grad_norm": 2.7327246394529365, "learning_rate": 3.129531216702507e-06, "loss": 1.3157, "step": 60805 }, { "epoch": 1.7930119416187527, "grad_norm": 2.8553600230463103, "learning_rate": 3.1288968153991666e-06, "loss": 1.2618, "step": 60810 }, { "epoch": 1.7931593690107621, "grad_norm": 2.9232704582116646, "learning_rate": 3.1282624323724966e-06, "loss": 1.3396, "step": 60815 }, { "epoch": 1.7933067964027716, "grad_norm": 2.8931618518630797, "learning_rate": 3.127628067641163e-06, "loss": 1.3147, "step": 60820 }, { "epoch": 1.793454223794781, "grad_norm": 2.631579392782339, "learning_rate": 3.1269937212238364e-06, "loss": 1.3274, "step": 60825 }, { "epoch": 1.7936016511867905, "grad_norm": 2.7990743416446704, "learning_rate": 3.1263593931391788e-06, "loss": 1.2873, "step": 60830 }, { "epoch": 1.7937490785788, "grad_norm": 2.7248151407581136, "learning_rate": 3.1257250834058597e-06, "loss": 1.3096, "step": 60835 }, { "epoch": 1.7938965059708094, "grad_norm": 2.8309369065434358, "learning_rate": 3.1250907920425422e-06, "loss": 1.2746, "step": 60840 }, { "epoch": 1.7940439333628189, "grad_norm": 2.8375959116458103, "learning_rate": 3.1244565190678925e-06, "loss": 1.2587, "step": 60845 }, { "epoch": 1.7941913607548283, "grad_norm": 2.7966821056567412, "learning_rate": 3.1238222645005727e-06, "loss": 1.2898, "step": 60850 }, { "epoch": 1.7943387881468378, "grad_norm": 2.76718241254255, "learning_rate": 3.1231880283592493e-06, "loss": 1.3004, "step": 60855 }, { "epoch": 1.7944862155388472, "grad_norm": 2.8706305126942846, "learning_rate": 3.1225538106625834e-06, "loss": 1.305, "step": 60860 }, { "epoch": 1.7946336429308567, "grad_norm": 2.8077887898371565, "learning_rate": 3.121919611429238e-06, "loss": 1.3347, "step": 60865 }, { "epoch": 1.794781070322866, "grad_norm": 2.933229427139599, "learning_rate": 3.1212854306778758e-06, "loss": 1.3105, "step": 60870 }, { "epoch": 1.7949284977148754, "grad_norm": 2.7520554790411462, "learning_rate": 3.120651268427156e-06, "loss": 1.2991, "step": 60875 }, { "epoch": 1.7950759251068849, "grad_norm": 2.915528289055127, "learning_rate": 3.1200171246957428e-06, "loss": 1.289, "step": 60880 }, { "epoch": 1.7952233524988943, "grad_norm": 2.9236308031095692, "learning_rate": 3.1193829995022943e-06, "loss": 1.2545, "step": 60885 }, { "epoch": 1.7953707798909038, "grad_norm": 2.8850869770763747, "learning_rate": 3.118748892865472e-06, "loss": 1.3537, "step": 60890 }, { "epoch": 1.795518207282913, "grad_norm": 2.817692530502732, "learning_rate": 3.1181148048039323e-06, "loss": 1.311, "step": 60895 }, { "epoch": 1.7956656346749225, "grad_norm": 2.8570570039936714, "learning_rate": 3.117480735336338e-06, "loss": 1.2768, "step": 60900 }, { "epoch": 1.795813062066932, "grad_norm": 2.8242336461167907, "learning_rate": 3.1168466844813437e-06, "loss": 1.238, "step": 60905 }, { "epoch": 1.7959604894589414, "grad_norm": 2.950795899887588, "learning_rate": 3.1162126522576103e-06, "loss": 1.2974, "step": 60910 }, { "epoch": 1.7961079168509508, "grad_norm": 2.7668306234539934, "learning_rate": 3.115578638683793e-06, "loss": 1.3286, "step": 60915 }, { "epoch": 1.7962553442429603, "grad_norm": 2.8059614345048476, "learning_rate": 3.114944643778549e-06, "loss": 1.2642, "step": 60920 }, { "epoch": 1.7964027716349698, "grad_norm": 2.78528101698952, "learning_rate": 3.1143106675605336e-06, "loss": 1.2565, "step": 60925 }, { "epoch": 1.7965501990269792, "grad_norm": 2.773359301668767, "learning_rate": 3.113676710048402e-06, "loss": 1.2984, "step": 60930 }, { "epoch": 1.7966976264189887, "grad_norm": 2.712532573586233, "learning_rate": 3.1130427712608107e-06, "loss": 1.3058, "step": 60935 }, { "epoch": 1.7968450538109981, "grad_norm": 2.7624574299502602, "learning_rate": 3.1124088512164128e-06, "loss": 1.2559, "step": 60940 }, { "epoch": 1.7969924812030076, "grad_norm": 2.8474934326758388, "learning_rate": 3.1117749499338632e-06, "loss": 1.3396, "step": 60945 }, { "epoch": 1.797139908595017, "grad_norm": 2.7740866021916335, "learning_rate": 3.1111410674318125e-06, "loss": 1.2383, "step": 60950 }, { "epoch": 1.7972873359870265, "grad_norm": 2.8179171991233454, "learning_rate": 3.1105072037289177e-06, "loss": 1.2941, "step": 60955 }, { "epoch": 1.797434763379036, "grad_norm": 2.779908417423555, "learning_rate": 3.1098733588438263e-06, "loss": 1.2871, "step": 60960 }, { "epoch": 1.7975821907710454, "grad_norm": 2.810252483631583, "learning_rate": 3.1092395327951946e-06, "loss": 1.2699, "step": 60965 }, { "epoch": 1.7977296181630547, "grad_norm": 2.75992944417235, "learning_rate": 3.10860572560167e-06, "loss": 1.2708, "step": 60970 }, { "epoch": 1.7978770455550641, "grad_norm": 2.969798337522005, "learning_rate": 3.107971937281905e-06, "loss": 1.3478, "step": 60975 }, { "epoch": 1.7980244729470736, "grad_norm": 2.7787958395164343, "learning_rate": 3.1073381678545478e-06, "loss": 1.2859, "step": 60980 }, { "epoch": 1.798171900339083, "grad_norm": 2.6957803013142456, "learning_rate": 3.10670441733825e-06, "loss": 1.2616, "step": 60985 }, { "epoch": 1.7983193277310925, "grad_norm": 2.8511598082101823, "learning_rate": 3.1060706857516596e-06, "loss": 1.352, "step": 60990 }, { "epoch": 1.7984667551231017, "grad_norm": 2.73830757699629, "learning_rate": 3.1054369731134238e-06, "loss": 1.3002, "step": 60995 }, { "epoch": 1.7986141825151112, "grad_norm": 2.93889617547866, "learning_rate": 3.104803279442192e-06, "loss": 1.3267, "step": 61000 }, { "epoch": 1.7986141825151112, "eval_loss": 1.0726953744888306, "eval_runtime": 4.3226, "eval_samples_per_second": 91.612, "eval_steps_per_second": 3.007, "step": 61000 }, { "epoch": 1.7987616099071206, "grad_norm": 2.983341861308713, "learning_rate": 3.104169604756608e-06, "loss": 1.3491, "step": 61005 }, { "epoch": 1.79890903729913, "grad_norm": 2.904337533605737, "learning_rate": 3.1035359490753227e-06, "loss": 1.3104, "step": 61010 }, { "epoch": 1.7990564646911396, "grad_norm": 2.928934675738364, "learning_rate": 3.1029023124169802e-06, "loss": 1.3305, "step": 61015 }, { "epoch": 1.799203892083149, "grad_norm": 2.807408598986689, "learning_rate": 3.1022686948002264e-06, "loss": 1.2997, "step": 61020 }, { "epoch": 1.7993513194751585, "grad_norm": 2.9496398570500078, "learning_rate": 3.1016350962437044e-06, "loss": 1.3222, "step": 61025 }, { "epoch": 1.799498746867168, "grad_norm": 2.7566521328614098, "learning_rate": 3.1010015167660613e-06, "loss": 1.3, "step": 61030 }, { "epoch": 1.7996461742591774, "grad_norm": 2.792640729081099, "learning_rate": 3.1003679563859385e-06, "loss": 1.3251, "step": 61035 }, { "epoch": 1.7997936016511868, "grad_norm": 2.7063944078596394, "learning_rate": 3.099734415121982e-06, "loss": 1.2747, "step": 61040 }, { "epoch": 1.7999410290431963, "grad_norm": 2.8035332350237185, "learning_rate": 3.099100892992832e-06, "loss": 1.2947, "step": 61045 }, { "epoch": 1.8000884564352058, "grad_norm": 2.673468392981562, "learning_rate": 3.0984673900171322e-06, "loss": 1.3337, "step": 61050 }, { "epoch": 1.8002358838272152, "grad_norm": 2.8291976403359365, "learning_rate": 3.0978339062135232e-06, "loss": 1.2748, "step": 61055 }, { "epoch": 1.8003833112192247, "grad_norm": 2.806972077280891, "learning_rate": 3.0972004416006446e-06, "loss": 1.2674, "step": 61060 }, { "epoch": 1.800530738611234, "grad_norm": 2.855884489469887, "learning_rate": 3.0965669961971404e-06, "loss": 1.3451, "step": 61065 }, { "epoch": 1.8006781660032434, "grad_norm": 2.8790941185868855, "learning_rate": 3.0959335700216475e-06, "loss": 1.2501, "step": 61070 }, { "epoch": 1.8008255933952528, "grad_norm": 2.7979765015740417, "learning_rate": 3.095300163092807e-06, "loss": 1.2614, "step": 61075 }, { "epoch": 1.8009730207872623, "grad_norm": 2.7804015899781453, "learning_rate": 3.0946667754292548e-06, "loss": 1.328, "step": 61080 }, { "epoch": 1.8011204481792717, "grad_norm": 2.6928774215658673, "learning_rate": 3.0940334070496333e-06, "loss": 1.301, "step": 61085 }, { "epoch": 1.801267875571281, "grad_norm": 2.8267934039706475, "learning_rate": 3.0934000579725755e-06, "loss": 1.3322, "step": 61090 }, { "epoch": 1.8014153029632904, "grad_norm": 2.6316107419054293, "learning_rate": 3.0927667282167226e-06, "loss": 1.2814, "step": 61095 }, { "epoch": 1.8015627303553, "grad_norm": 2.8245891417786817, "learning_rate": 3.092133417800709e-06, "loss": 1.2662, "step": 61100 }, { "epoch": 1.8017101577473094, "grad_norm": 2.857685995292322, "learning_rate": 3.0915001267431712e-06, "loss": 1.2957, "step": 61105 }, { "epoch": 1.8018575851393188, "grad_norm": 2.874209590298089, "learning_rate": 3.0908668550627423e-06, "loss": 1.2989, "step": 61110 }, { "epoch": 1.8020050125313283, "grad_norm": 2.882317370907071, "learning_rate": 3.0902336027780605e-06, "loss": 1.3308, "step": 61115 }, { "epoch": 1.8021524399233377, "grad_norm": 2.889524823288411, "learning_rate": 3.0896003699077584e-06, "loss": 1.2734, "step": 61120 }, { "epoch": 1.8022998673153472, "grad_norm": 2.836202427122682, "learning_rate": 3.0889671564704686e-06, "loss": 1.253, "step": 61125 }, { "epoch": 1.8024472947073567, "grad_norm": 2.6838051553326205, "learning_rate": 3.0883339624848263e-06, "loss": 1.3115, "step": 61130 }, { "epoch": 1.802594722099366, "grad_norm": 2.7798976942175866, "learning_rate": 3.0877007879694606e-06, "loss": 1.2718, "step": 61135 }, { "epoch": 1.8027421494913756, "grad_norm": 2.623161034571148, "learning_rate": 3.0870676329430072e-06, "loss": 1.2726, "step": 61140 }, { "epoch": 1.802889576883385, "grad_norm": 2.9325095442251805, "learning_rate": 3.086434497424095e-06, "loss": 1.3257, "step": 61145 }, { "epoch": 1.8030370042753945, "grad_norm": 2.7210178406323, "learning_rate": 3.085801381431356e-06, "loss": 1.2484, "step": 61150 }, { "epoch": 1.803184431667404, "grad_norm": 2.755874307497634, "learning_rate": 3.0851682849834182e-06, "loss": 1.2974, "step": 61155 }, { "epoch": 1.8033318590594134, "grad_norm": 2.6638979177096194, "learning_rate": 3.084535208098915e-06, "loss": 1.2224, "step": 61160 }, { "epoch": 1.8034792864514226, "grad_norm": 2.810640605346366, "learning_rate": 3.083902150796471e-06, "loss": 1.2962, "step": 61165 }, { "epoch": 1.803626713843432, "grad_norm": 2.8549510676199623, "learning_rate": 3.0832691130947185e-06, "loss": 1.2252, "step": 61170 }, { "epoch": 1.8037741412354416, "grad_norm": 2.8459175077511523, "learning_rate": 3.0826360950122825e-06, "loss": 1.299, "step": 61175 }, { "epoch": 1.803921568627451, "grad_norm": 2.8792217066316956, "learning_rate": 3.082003096567793e-06, "loss": 1.2303, "step": 61180 }, { "epoch": 1.8040689960194605, "grad_norm": 2.7491835681968766, "learning_rate": 3.081370117779873e-06, "loss": 1.325, "step": 61185 }, { "epoch": 1.8042164234114697, "grad_norm": 2.9717572615127303, "learning_rate": 3.0807371586671527e-06, "loss": 1.3507, "step": 61190 }, { "epoch": 1.8043638508034792, "grad_norm": 2.839586530697852, "learning_rate": 3.0801042192482553e-06, "loss": 1.3213, "step": 61195 }, { "epoch": 1.8045112781954886, "grad_norm": 2.934705146894725, "learning_rate": 3.079471299541806e-06, "loss": 1.2693, "step": 61200 }, { "epoch": 1.804658705587498, "grad_norm": 2.8097001888048627, "learning_rate": 3.0788383995664297e-06, "loss": 1.2812, "step": 61205 }, { "epoch": 1.8048061329795075, "grad_norm": 2.8084474920140585, "learning_rate": 3.0782055193407485e-06, "loss": 1.2846, "step": 61210 }, { "epoch": 1.804953560371517, "grad_norm": 2.9073895582063676, "learning_rate": 3.0775726588833886e-06, "loss": 1.334, "step": 61215 }, { "epoch": 1.8051009877635265, "grad_norm": 2.73361575953422, "learning_rate": 3.076939818212969e-06, "loss": 1.2868, "step": 61220 }, { "epoch": 1.805248415155536, "grad_norm": 2.67603522192156, "learning_rate": 3.076306997348116e-06, "loss": 1.2962, "step": 61225 }, { "epoch": 1.8053958425475454, "grad_norm": 2.757969675180692, "learning_rate": 3.075674196307448e-06, "loss": 1.2712, "step": 61230 }, { "epoch": 1.8055432699395548, "grad_norm": 2.922046361697154, "learning_rate": 3.0750414151095876e-06, "loss": 1.2802, "step": 61235 }, { "epoch": 1.8056906973315643, "grad_norm": 2.834674068736592, "learning_rate": 3.0744086537731523e-06, "loss": 1.3027, "step": 61240 }, { "epoch": 1.8058381247235737, "grad_norm": 2.7697731570948196, "learning_rate": 3.0737759123167657e-06, "loss": 1.284, "step": 61245 }, { "epoch": 1.8059855521155832, "grad_norm": 2.8278152678208253, "learning_rate": 3.073143190759044e-06, "loss": 1.2455, "step": 61250 }, { "epoch": 1.8061329795075927, "grad_norm": 2.913438637462346, "learning_rate": 3.072510489118608e-06, "loss": 1.33, "step": 61255 }, { "epoch": 1.806280406899602, "grad_norm": 2.753962854612552, "learning_rate": 3.0718778074140744e-06, "loss": 1.2816, "step": 61260 }, { "epoch": 1.8064278342916114, "grad_norm": 2.8103442816504973, "learning_rate": 3.0712451456640587e-06, "loss": 1.2792, "step": 61265 }, { "epoch": 1.8065752616836208, "grad_norm": 2.8744286690452396, "learning_rate": 3.0706125038871815e-06, "loss": 1.2637, "step": 61270 }, { "epoch": 1.8067226890756303, "grad_norm": 2.83453164351263, "learning_rate": 3.0699798821020564e-06, "loss": 1.2721, "step": 61275 }, { "epoch": 1.8068701164676397, "grad_norm": 2.7701665179642383, "learning_rate": 3.0693472803273003e-06, "loss": 1.2892, "step": 61280 }, { "epoch": 1.807017543859649, "grad_norm": 2.842922856011243, "learning_rate": 3.0687146985815263e-06, "loss": 1.2807, "step": 61285 }, { "epoch": 1.8071649712516584, "grad_norm": 2.9042470753312872, "learning_rate": 3.0680821368833515e-06, "loss": 1.2751, "step": 61290 }, { "epoch": 1.8073123986436679, "grad_norm": 2.702926171810599, "learning_rate": 3.0674495952513876e-06, "loss": 1.2442, "step": 61295 }, { "epoch": 1.8074598260356773, "grad_norm": 2.7815318751857143, "learning_rate": 3.06681707370425e-06, "loss": 1.3108, "step": 61300 }, { "epoch": 1.8076072534276868, "grad_norm": 2.796412685309115, "learning_rate": 3.0661845722605494e-06, "loss": 1.299, "step": 61305 }, { "epoch": 1.8077546808196963, "grad_norm": 2.803322535032255, "learning_rate": 3.065552090938899e-06, "loss": 1.279, "step": 61310 }, { "epoch": 1.8079021082117057, "grad_norm": 2.7943513093238663, "learning_rate": 3.064919629757908e-06, "loss": 1.3033, "step": 61315 }, { "epoch": 1.8080495356037152, "grad_norm": 2.900741069612395, "learning_rate": 3.064287188736192e-06, "loss": 1.2752, "step": 61320 }, { "epoch": 1.8081969629957246, "grad_norm": 2.8648084610378692, "learning_rate": 3.0636547678923576e-06, "loss": 1.2945, "step": 61325 }, { "epoch": 1.808344390387734, "grad_norm": 2.82142712692126, "learning_rate": 3.063022367245015e-06, "loss": 1.2909, "step": 61330 }, { "epoch": 1.8084918177797435, "grad_norm": 2.784957016706441, "learning_rate": 3.0623899868127743e-06, "loss": 1.2795, "step": 61335 }, { "epoch": 1.808639245171753, "grad_norm": 2.8301034731171906, "learning_rate": 3.0617576266142415e-06, "loss": 1.3027, "step": 61340 }, { "epoch": 1.8087866725637625, "grad_norm": 2.9351088515780575, "learning_rate": 3.061125286668028e-06, "loss": 1.3104, "step": 61345 }, { "epoch": 1.808934099955772, "grad_norm": 2.8780396057927806, "learning_rate": 3.0604929669927386e-06, "loss": 1.3204, "step": 61350 }, { "epoch": 1.8090815273477814, "grad_norm": 2.937811161332193, "learning_rate": 3.059860667606982e-06, "loss": 1.2856, "step": 61355 }, { "epoch": 1.8092289547397906, "grad_norm": 2.8855171112677342, "learning_rate": 3.0592283885293633e-06, "loss": 1.2841, "step": 61360 }, { "epoch": 1.8093763821318, "grad_norm": 2.767539926885318, "learning_rate": 3.0585961297784877e-06, "loss": 1.2938, "step": 61365 }, { "epoch": 1.8095238095238095, "grad_norm": 2.870052481364022, "learning_rate": 3.05796389137296e-06, "loss": 1.3375, "step": 61370 }, { "epoch": 1.809671236915819, "grad_norm": 2.760199006228666, "learning_rate": 3.0573316733313855e-06, "loss": 1.2365, "step": 61375 }, { "epoch": 1.8098186643078282, "grad_norm": 2.8101216290072726, "learning_rate": 3.0566994756723673e-06, "loss": 1.2685, "step": 61380 }, { "epoch": 1.8099660916998377, "grad_norm": 2.8739745984152716, "learning_rate": 3.05606729841451e-06, "loss": 1.3542, "step": 61385 }, { "epoch": 1.8101135190918471, "grad_norm": 2.696160306132735, "learning_rate": 3.055435141576414e-06, "loss": 1.269, "step": 61390 }, { "epoch": 1.8102609464838566, "grad_norm": 2.8585099482722027, "learning_rate": 3.0548030051766806e-06, "loss": 1.2848, "step": 61395 }, { "epoch": 1.810408373875866, "grad_norm": 2.826283536336441, "learning_rate": 3.0541708892339146e-06, "loss": 1.3121, "step": 61400 }, { "epoch": 1.8105558012678755, "grad_norm": 2.8640711971137547, "learning_rate": 3.0535387937667136e-06, "loss": 1.2836, "step": 61405 }, { "epoch": 1.810703228659885, "grad_norm": 2.8222139673121176, "learning_rate": 3.0529067187936797e-06, "loss": 1.3163, "step": 61410 }, { "epoch": 1.8108506560518944, "grad_norm": 2.705806088810047, "learning_rate": 3.05227466433341e-06, "loss": 1.2632, "step": 61415 }, { "epoch": 1.810998083443904, "grad_norm": 2.8806550818483307, "learning_rate": 3.0516426304045065e-06, "loss": 1.3013, "step": 61420 }, { "epoch": 1.8111455108359134, "grad_norm": 2.8453522549371875, "learning_rate": 3.051010617025565e-06, "loss": 1.2814, "step": 61425 }, { "epoch": 1.8112929382279228, "grad_norm": 2.684509132923121, "learning_rate": 3.0503786242151853e-06, "loss": 1.2568, "step": 61430 }, { "epoch": 1.8114403656199323, "grad_norm": 2.8674774906655327, "learning_rate": 3.0497466519919625e-06, "loss": 1.3311, "step": 61435 }, { "epoch": 1.8115877930119417, "grad_norm": 2.785356409312432, "learning_rate": 3.049114700374496e-06, "loss": 1.3631, "step": 61440 }, { "epoch": 1.8117352204039512, "grad_norm": 2.7361630019751653, "learning_rate": 3.0484827693813768e-06, "loss": 1.2552, "step": 61445 }, { "epoch": 1.8118826477959606, "grad_norm": 2.821791287622846, "learning_rate": 3.0478508590312053e-06, "loss": 1.28, "step": 61450 }, { "epoch": 1.8120300751879699, "grad_norm": 2.8291834665418736, "learning_rate": 3.047218969342574e-06, "loss": 1.2686, "step": 61455 }, { "epoch": 1.8121775025799793, "grad_norm": 2.777055858202849, "learning_rate": 3.0465871003340766e-06, "loss": 1.2701, "step": 61460 }, { "epoch": 1.8123249299719888, "grad_norm": 3.0071955095476937, "learning_rate": 3.0459552520243073e-06, "loss": 1.3307, "step": 61465 }, { "epoch": 1.8124723573639983, "grad_norm": 2.7894783337780065, "learning_rate": 3.0453234244318574e-06, "loss": 1.2839, "step": 61470 }, { "epoch": 1.8126197847560077, "grad_norm": 2.8284641946946367, "learning_rate": 3.0446916175753215e-06, "loss": 1.3, "step": 61475 }, { "epoch": 1.812767212148017, "grad_norm": 2.770495602111662, "learning_rate": 3.0440598314732893e-06, "loss": 1.2579, "step": 61480 }, { "epoch": 1.8129146395400264, "grad_norm": 2.686568312844134, "learning_rate": 3.0434280661443534e-06, "loss": 1.3359, "step": 61485 }, { "epoch": 1.8130620669320359, "grad_norm": 2.66079014664532, "learning_rate": 3.0427963216071032e-06, "loss": 1.2389, "step": 61490 }, { "epoch": 1.8132094943240453, "grad_norm": 2.942951771518885, "learning_rate": 3.0421645978801296e-06, "loss": 1.3311, "step": 61495 }, { "epoch": 1.8133569217160548, "grad_norm": 2.775759002600868, "learning_rate": 3.0415328949820188e-06, "loss": 1.2277, "step": 61500 }, { "epoch": 1.8133569217160548, "eval_loss": 1.0722228288650513, "eval_runtime": 4.1724, "eval_samples_per_second": 94.91, "eval_steps_per_second": 3.116, "step": 61500 }, { "epoch": 1.8135043491080642, "grad_norm": 2.754311986416621, "learning_rate": 3.0409012129313637e-06, "loss": 1.2551, "step": 61505 }, { "epoch": 1.8136517765000737, "grad_norm": 3.043601113168611, "learning_rate": 3.0402695517467494e-06, "loss": 1.2179, "step": 61510 }, { "epoch": 1.8137992038920832, "grad_norm": 2.8549246986739263, "learning_rate": 3.039637911446764e-06, "loss": 1.2371, "step": 61515 }, { "epoch": 1.8139466312840926, "grad_norm": 2.7209982822218732, "learning_rate": 3.0390062920499953e-06, "loss": 1.2823, "step": 61520 }, { "epoch": 1.814094058676102, "grad_norm": 2.719274007204287, "learning_rate": 3.038374693575026e-06, "loss": 1.2781, "step": 61525 }, { "epoch": 1.8142414860681115, "grad_norm": 2.821567332930527, "learning_rate": 3.0377431160404457e-06, "loss": 1.3154, "step": 61530 }, { "epoch": 1.814388913460121, "grad_norm": 2.7721295963608745, "learning_rate": 3.037111559464837e-06, "loss": 1.2729, "step": 61535 }, { "epoch": 1.8145363408521304, "grad_norm": 2.8025329165928308, "learning_rate": 3.0364800238667846e-06, "loss": 1.306, "step": 61540 }, { "epoch": 1.81468376824414, "grad_norm": 2.834085255948328, "learning_rate": 3.0358485092648718e-06, "loss": 1.3402, "step": 61545 }, { "epoch": 1.8148311956361491, "grad_norm": 2.943627965623524, "learning_rate": 3.0352170156776833e-06, "loss": 1.289, "step": 61550 }, { "epoch": 1.8149786230281586, "grad_norm": 2.8770497655384757, "learning_rate": 3.0345855431237984e-06, "loss": 1.3239, "step": 61555 }, { "epoch": 1.815126050420168, "grad_norm": 2.8130838986651496, "learning_rate": 3.0339540916218023e-06, "loss": 1.3185, "step": 61560 }, { "epoch": 1.8152734778121775, "grad_norm": 2.7386434921330594, "learning_rate": 3.0333226611902747e-06, "loss": 1.3011, "step": 61565 }, { "epoch": 1.815420905204187, "grad_norm": 2.8155312874107583, "learning_rate": 3.032691251847796e-06, "loss": 1.3487, "step": 61570 }, { "epoch": 1.8155683325961962, "grad_norm": 2.6986231530930134, "learning_rate": 3.0320598636129456e-06, "loss": 1.3006, "step": 61575 }, { "epoch": 1.8157157599882057, "grad_norm": 2.835963530984602, "learning_rate": 3.0314284965043043e-06, "loss": 1.2791, "step": 61580 }, { "epoch": 1.8158631873802151, "grad_norm": 2.7944496402860577, "learning_rate": 3.0307971505404505e-06, "loss": 1.275, "step": 61585 }, { "epoch": 1.8160106147722246, "grad_norm": 2.82996411548329, "learning_rate": 3.030165825739961e-06, "loss": 1.31, "step": 61590 }, { "epoch": 1.816158042164234, "grad_norm": 3.24604565905901, "learning_rate": 3.0295345221214146e-06, "loss": 1.3048, "step": 61595 }, { "epoch": 1.8163054695562435, "grad_norm": 2.6940634943161044, "learning_rate": 3.0289032397033862e-06, "loss": 1.2587, "step": 61600 }, { "epoch": 1.816452896948253, "grad_norm": 2.9114653952302376, "learning_rate": 3.0282719785044548e-06, "loss": 1.2897, "step": 61605 }, { "epoch": 1.8166003243402624, "grad_norm": 2.7316853331007263, "learning_rate": 3.027640738543193e-06, "loss": 1.2528, "step": 61610 }, { "epoch": 1.8167477517322719, "grad_norm": 2.720173899012081, "learning_rate": 3.0270095198381792e-06, "loss": 1.2833, "step": 61615 }, { "epoch": 1.8168951791242813, "grad_norm": 2.6807466318429567, "learning_rate": 3.0263783224079843e-06, "loss": 1.294, "step": 61620 }, { "epoch": 1.8170426065162908, "grad_norm": 2.806491553336866, "learning_rate": 3.025747146271185e-06, "loss": 1.3061, "step": 61625 }, { "epoch": 1.8171900339083003, "grad_norm": 2.921216724237267, "learning_rate": 3.025115991446351e-06, "loss": 1.3113, "step": 61630 }, { "epoch": 1.8173374613003097, "grad_norm": 2.7912167351953037, "learning_rate": 3.0244848579520587e-06, "loss": 1.3086, "step": 61635 }, { "epoch": 1.8174848886923192, "grad_norm": 3.1029928581074535, "learning_rate": 3.0238537458068766e-06, "loss": 1.2822, "step": 61640 }, { "epoch": 1.8176323160843286, "grad_norm": 2.7468596533294147, "learning_rate": 3.0232226550293787e-06, "loss": 1.2714, "step": 61645 }, { "epoch": 1.8177797434763379, "grad_norm": 2.8876703993357524, "learning_rate": 3.022591585638134e-06, "loss": 1.3329, "step": 61650 }, { "epoch": 1.8179271708683473, "grad_norm": 2.8369838476741034, "learning_rate": 3.02196053765171e-06, "loss": 1.3611, "step": 61655 }, { "epoch": 1.8180745982603568, "grad_norm": 2.882108033668155, "learning_rate": 3.0213295110886807e-06, "loss": 1.3495, "step": 61660 }, { "epoch": 1.8182220256523662, "grad_norm": 2.814094654126329, "learning_rate": 3.0206985059676116e-06, "loss": 1.2672, "step": 61665 }, { "epoch": 1.8183694530443757, "grad_norm": 2.89173048549664, "learning_rate": 3.020067522307072e-06, "loss": 1.2668, "step": 61670 }, { "epoch": 1.818516880436385, "grad_norm": 2.7949170314921115, "learning_rate": 3.019436560125629e-06, "loss": 1.27, "step": 61675 }, { "epoch": 1.8186643078283944, "grad_norm": 2.733876476410242, "learning_rate": 3.01880561944185e-06, "loss": 1.2953, "step": 61680 }, { "epoch": 1.8188117352204038, "grad_norm": 2.754069064834533, "learning_rate": 3.0181747002742992e-06, "loss": 1.3497, "step": 61685 }, { "epoch": 1.8189591626124133, "grad_norm": 2.7429014162801875, "learning_rate": 3.0175438026415446e-06, "loss": 1.2373, "step": 61690 }, { "epoch": 1.8191065900044228, "grad_norm": 2.9100546986391214, "learning_rate": 3.01691292656215e-06, "loss": 1.2916, "step": 61695 }, { "epoch": 1.8192540173964322, "grad_norm": 2.7920124171057155, "learning_rate": 3.0162820720546802e-06, "loss": 1.3141, "step": 61700 }, { "epoch": 1.8194014447884417, "grad_norm": 2.815306148021603, "learning_rate": 3.0156512391376962e-06, "loss": 1.2726, "step": 61705 }, { "epoch": 1.8195488721804511, "grad_norm": 2.6878027896182513, "learning_rate": 3.015020427829765e-06, "loss": 1.2588, "step": 61710 }, { "epoch": 1.8196962995724606, "grad_norm": 2.8194258875349947, "learning_rate": 3.014389638149447e-06, "loss": 1.2996, "step": 61715 }, { "epoch": 1.81984372696447, "grad_norm": 2.7455607503823813, "learning_rate": 3.013758870115303e-06, "loss": 1.295, "step": 61720 }, { "epoch": 1.8199911543564795, "grad_norm": 2.86905101141551, "learning_rate": 3.0131281237458956e-06, "loss": 1.282, "step": 61725 }, { "epoch": 1.820138581748489, "grad_norm": 2.8309051392906177, "learning_rate": 3.012497399059783e-06, "loss": 1.2925, "step": 61730 }, { "epoch": 1.8202860091404984, "grad_norm": 2.7795981030961507, "learning_rate": 3.0118666960755285e-06, "loss": 1.2855, "step": 61735 }, { "epoch": 1.8204334365325079, "grad_norm": 2.7640890955552626, "learning_rate": 3.0112360148116876e-06, "loss": 1.2829, "step": 61740 }, { "epoch": 1.8205808639245171, "grad_norm": 2.8028174631117646, "learning_rate": 3.010605355286822e-06, "loss": 1.3099, "step": 61745 }, { "epoch": 1.8207282913165266, "grad_norm": 2.7422987536358394, "learning_rate": 3.009974717519487e-06, "loss": 1.3122, "step": 61750 }, { "epoch": 1.820875718708536, "grad_norm": 2.8585772100373856, "learning_rate": 3.0093441015282423e-06, "loss": 1.291, "step": 61755 }, { "epoch": 1.8210231461005455, "grad_norm": 2.7932474488635695, "learning_rate": 3.0087135073316414e-06, "loss": 1.3278, "step": 61760 }, { "epoch": 1.821170573492555, "grad_norm": 2.767820659646656, "learning_rate": 3.008082934948244e-06, "loss": 1.3195, "step": 61765 }, { "epoch": 1.8213180008845642, "grad_norm": 2.8678331153895797, "learning_rate": 3.0074523843966017e-06, "loss": 1.3224, "step": 61770 }, { "epoch": 1.8214654282765737, "grad_norm": 2.84395211464656, "learning_rate": 3.006821855695272e-06, "loss": 1.2707, "step": 61775 }, { "epoch": 1.821612855668583, "grad_norm": 2.783484261187549, "learning_rate": 3.0061913488628086e-06, "loss": 1.3126, "step": 61780 }, { "epoch": 1.8217602830605926, "grad_norm": 2.741040669105642, "learning_rate": 3.0055608639177616e-06, "loss": 1.2898, "step": 61785 }, { "epoch": 1.821907710452602, "grad_norm": 2.8076438806769564, "learning_rate": 3.0049304008786882e-06, "loss": 1.3241, "step": 61790 }, { "epoch": 1.8220551378446115, "grad_norm": 2.850170815127978, "learning_rate": 3.0042999597641374e-06, "loss": 1.3154, "step": 61795 }, { "epoch": 1.822202565236621, "grad_norm": 2.858724350656165, "learning_rate": 3.0036695405926624e-06, "loss": 1.2789, "step": 61800 }, { "epoch": 1.8223499926286304, "grad_norm": 2.835423089951422, "learning_rate": 3.003039143382813e-06, "loss": 1.3342, "step": 61805 }, { "epoch": 1.8224974200206399, "grad_norm": 2.706603914921367, "learning_rate": 3.0024087681531405e-06, "loss": 1.2912, "step": 61810 }, { "epoch": 1.8226448474126493, "grad_norm": 2.8208620478301767, "learning_rate": 3.0017784149221923e-06, "loss": 1.2926, "step": 61815 }, { "epoch": 1.8227922748046588, "grad_norm": 2.7987855734260076, "learning_rate": 3.0011480837085198e-06, "loss": 1.3152, "step": 61820 }, { "epoch": 1.8229397021966682, "grad_norm": 2.809642913955094, "learning_rate": 3.0005177745306697e-06, "loss": 1.2694, "step": 61825 }, { "epoch": 1.8230871295886777, "grad_norm": 2.8300901132488856, "learning_rate": 2.9998874874071906e-06, "loss": 1.2835, "step": 61830 }, { "epoch": 1.8232345569806871, "grad_norm": 2.828338784016418, "learning_rate": 2.999257222356627e-06, "loss": 1.3178, "step": 61835 }, { "epoch": 1.8233819843726966, "grad_norm": 2.7581129759929683, "learning_rate": 2.9986269793975283e-06, "loss": 1.3031, "step": 61840 }, { "epoch": 1.8235294117647058, "grad_norm": 2.7814430505272107, "learning_rate": 2.9979967585484392e-06, "loss": 1.241, "step": 61845 }, { "epoch": 1.8236768391567153, "grad_norm": 2.99925187559508, "learning_rate": 2.997366559827903e-06, "loss": 1.358, "step": 61850 }, { "epoch": 1.8238242665487248, "grad_norm": 2.7729049297188335, "learning_rate": 2.996736383254466e-06, "loss": 1.2748, "step": 61855 }, { "epoch": 1.8239716939407342, "grad_norm": 2.810804699105995, "learning_rate": 2.9961062288466697e-06, "loss": 1.3227, "step": 61860 }, { "epoch": 1.8241191213327437, "grad_norm": 2.825128456406798, "learning_rate": 2.99547609662306e-06, "loss": 1.2862, "step": 61865 }, { "epoch": 1.824266548724753, "grad_norm": 2.825481128013151, "learning_rate": 2.9948459866021763e-06, "loss": 1.2554, "step": 61870 }, { "epoch": 1.8244139761167624, "grad_norm": 2.9699155196334615, "learning_rate": 2.994215898802563e-06, "loss": 1.3302, "step": 61875 }, { "epoch": 1.8245614035087718, "grad_norm": 2.7722497646616753, "learning_rate": 2.993585833242759e-06, "loss": 1.2905, "step": 61880 }, { "epoch": 1.8247088309007813, "grad_norm": 2.8254332199287813, "learning_rate": 2.992955789941307e-06, "loss": 1.3423, "step": 61885 }, { "epoch": 1.8248562582927907, "grad_norm": 2.9074216662471866, "learning_rate": 2.9923257689167425e-06, "loss": 1.2639, "step": 61890 }, { "epoch": 1.8250036856848002, "grad_norm": 2.956425190220682, "learning_rate": 2.9916957701876104e-06, "loss": 1.3049, "step": 61895 }, { "epoch": 1.8251511130768097, "grad_norm": 2.852901434938479, "learning_rate": 2.991065793772445e-06, "loss": 1.3132, "step": 61900 }, { "epoch": 1.8252985404688191, "grad_norm": 2.781398506490226, "learning_rate": 2.990435839689786e-06, "loss": 1.2587, "step": 61905 }, { "epoch": 1.8254459678608286, "grad_norm": 3.04848537861135, "learning_rate": 2.9898059079581693e-06, "loss": 1.3272, "step": 61910 }, { "epoch": 1.825593395252838, "grad_norm": 2.7428501240812144, "learning_rate": 2.9891759985961304e-06, "loss": 1.3034, "step": 61915 }, { "epoch": 1.8257408226448475, "grad_norm": 2.8293773342135373, "learning_rate": 2.988546111622208e-06, "loss": 1.2876, "step": 61920 }, { "epoch": 1.825888250036857, "grad_norm": 2.8764198588838537, "learning_rate": 2.9879162470549343e-06, "loss": 1.3027, "step": 61925 }, { "epoch": 1.8260356774288664, "grad_norm": 2.7558379335153007, "learning_rate": 2.9872864049128474e-06, "loss": 1.2651, "step": 61930 }, { "epoch": 1.8261831048208759, "grad_norm": 2.87964490204827, "learning_rate": 2.9866565852144773e-06, "loss": 1.2782, "step": 61935 }, { "epoch": 1.826330532212885, "grad_norm": 2.8920399065510187, "learning_rate": 2.98602678797836e-06, "loss": 1.305, "step": 61940 }, { "epoch": 1.8264779596048946, "grad_norm": 2.8096396072397214, "learning_rate": 2.9853970132230254e-06, "loss": 1.2816, "step": 61945 }, { "epoch": 1.826625386996904, "grad_norm": 2.7282725734790714, "learning_rate": 2.984767260967008e-06, "loss": 1.3069, "step": 61950 }, { "epoch": 1.8267728143889135, "grad_norm": 2.7758340659107343, "learning_rate": 2.9841375312288376e-06, "loss": 1.2674, "step": 61955 }, { "epoch": 1.826920241780923, "grad_norm": 2.830541936402134, "learning_rate": 2.983507824027045e-06, "loss": 1.2819, "step": 61960 }, { "epoch": 1.8270676691729322, "grad_norm": 2.8067254915879545, "learning_rate": 2.9828781393801584e-06, "loss": 1.3013, "step": 61965 }, { "epoch": 1.8272150965649416, "grad_norm": 2.7781749737997594, "learning_rate": 2.98224847730671e-06, "loss": 1.297, "step": 61970 }, { "epoch": 1.827362523956951, "grad_norm": 2.8744142367494203, "learning_rate": 2.9816188378252274e-06, "loss": 1.3279, "step": 61975 }, { "epoch": 1.8275099513489605, "grad_norm": 2.836709317818115, "learning_rate": 2.9809892209542366e-06, "loss": 1.2695, "step": 61980 }, { "epoch": 1.82765737874097, "grad_norm": 2.738420149174798, "learning_rate": 2.980359626712267e-06, "loss": 1.3139, "step": 61985 }, { "epoch": 1.8278048061329795, "grad_norm": 2.918163399872494, "learning_rate": 2.979730055117843e-06, "loss": 1.3339, "step": 61990 }, { "epoch": 1.827952233524989, "grad_norm": 2.8414973406752377, "learning_rate": 2.9791005061894926e-06, "loss": 1.3133, "step": 61995 }, { "epoch": 1.8280996609169984, "grad_norm": 2.77414348478107, "learning_rate": 2.9784709799457388e-06, "loss": 1.2635, "step": 62000 }, { "epoch": 1.8280996609169984, "eval_loss": 1.0710779428482056, "eval_runtime": 4.2626, "eval_samples_per_second": 92.901, "eval_steps_per_second": 3.05, "step": 62000 }, { "epoch": 1.8282470883090078, "grad_norm": 2.772692949838114, "learning_rate": 2.9778414764051094e-06, "loss": 1.2823, "step": 62005 }, { "epoch": 1.8283945157010173, "grad_norm": 2.889552094143864, "learning_rate": 2.9772119955861253e-06, "loss": 1.2482, "step": 62010 }, { "epoch": 1.8285419430930268, "grad_norm": 2.810018834700795, "learning_rate": 2.976582537507311e-06, "loss": 1.3431, "step": 62015 }, { "epoch": 1.8286893704850362, "grad_norm": 2.8069748659463025, "learning_rate": 2.9759531021871874e-06, "loss": 1.2974, "step": 62020 }, { "epoch": 1.8288367978770457, "grad_norm": 2.828401993993435, "learning_rate": 2.975323689644279e-06, "loss": 1.2912, "step": 62025 }, { "epoch": 1.8289842252690551, "grad_norm": 2.9172638690719404, "learning_rate": 2.974694299897105e-06, "loss": 1.3194, "step": 62030 }, { "epoch": 1.8291316526610646, "grad_norm": 2.692164124476117, "learning_rate": 2.9740649329641877e-06, "loss": 1.2694, "step": 62035 }, { "epoch": 1.8292790800530738, "grad_norm": 2.8712017019322205, "learning_rate": 2.9734355888640453e-06, "loss": 1.2792, "step": 62040 }, { "epoch": 1.8294265074450833, "grad_norm": 2.8360181744141815, "learning_rate": 2.972806267615196e-06, "loss": 1.3109, "step": 62045 }, { "epoch": 1.8295739348370927, "grad_norm": 2.8688902557188896, "learning_rate": 2.972176969236161e-06, "loss": 1.2821, "step": 62050 }, { "epoch": 1.8297213622291022, "grad_norm": 2.8045208431220514, "learning_rate": 2.971547693745456e-06, "loss": 1.2738, "step": 62055 }, { "epoch": 1.8298687896211117, "grad_norm": 2.7723386752226777, "learning_rate": 2.9709184411616e-06, "loss": 1.2701, "step": 62060 }, { "epoch": 1.830016217013121, "grad_norm": 2.810076091188373, "learning_rate": 2.970289211503108e-06, "loss": 1.3315, "step": 62065 }, { "epoch": 1.8301636444051304, "grad_norm": 2.752785890275126, "learning_rate": 2.969660004788497e-06, "loss": 1.2935, "step": 62070 }, { "epoch": 1.8303110717971398, "grad_norm": 3.0665874378970184, "learning_rate": 2.96903082103628e-06, "loss": 1.2925, "step": 62075 }, { "epoch": 1.8304584991891493, "grad_norm": 2.8330643547524303, "learning_rate": 2.968401660264974e-06, "loss": 1.3082, "step": 62080 }, { "epoch": 1.8306059265811587, "grad_norm": 2.7992190145024667, "learning_rate": 2.967772522493091e-06, "loss": 1.2877, "step": 62085 }, { "epoch": 1.8307533539731682, "grad_norm": 2.8065548406846257, "learning_rate": 2.9671434077391455e-06, "loss": 1.347, "step": 62090 }, { "epoch": 1.8309007813651776, "grad_norm": 2.7425111437971768, "learning_rate": 2.9665143160216478e-06, "loss": 1.308, "step": 62095 }, { "epoch": 1.831048208757187, "grad_norm": 2.7246380469881153, "learning_rate": 2.9658852473591125e-06, "loss": 1.2822, "step": 62100 }, { "epoch": 1.8311956361491966, "grad_norm": 2.7962326810479365, "learning_rate": 2.965256201770049e-06, "loss": 1.2941, "step": 62105 }, { "epoch": 1.831343063541206, "grad_norm": 2.831304516584339, "learning_rate": 2.964627179272967e-06, "loss": 1.3142, "step": 62110 }, { "epoch": 1.8314904909332155, "grad_norm": 2.730445355486705, "learning_rate": 2.9639981798863782e-06, "loss": 1.3274, "step": 62115 }, { "epoch": 1.831637918325225, "grad_norm": 2.7266336259002433, "learning_rate": 2.9633692036287886e-06, "loss": 1.2962, "step": 62120 }, { "epoch": 1.8317853457172344, "grad_norm": 2.69824487086071, "learning_rate": 2.9627402505187104e-06, "loss": 1.2789, "step": 62125 }, { "epoch": 1.8319327731092439, "grad_norm": 2.808217831179675, "learning_rate": 2.9621113205746474e-06, "loss": 1.2241, "step": 62130 }, { "epoch": 1.832080200501253, "grad_norm": 2.8489088174874846, "learning_rate": 2.96148241381511e-06, "loss": 1.3243, "step": 62135 }, { "epoch": 1.8322276278932625, "grad_norm": 2.762950613060667, "learning_rate": 2.9608535302586022e-06, "loss": 1.3259, "step": 62140 }, { "epoch": 1.832375055285272, "grad_norm": 2.7564385378274103, "learning_rate": 2.960224669923631e-06, "loss": 1.2785, "step": 62145 }, { "epoch": 1.8325224826772815, "grad_norm": 2.6917846432177894, "learning_rate": 2.9595958328286995e-06, "loss": 1.2708, "step": 62150 }, { "epoch": 1.832669910069291, "grad_norm": 2.805811734714238, "learning_rate": 2.9589670189923147e-06, "loss": 1.2622, "step": 62155 }, { "epoch": 1.8328173374613002, "grad_norm": 2.9888803272585536, "learning_rate": 2.9583382284329777e-06, "loss": 1.2995, "step": 62160 }, { "epoch": 1.8329647648533096, "grad_norm": 2.7554466866190785, "learning_rate": 2.957709461169193e-06, "loss": 1.3098, "step": 62165 }, { "epoch": 1.833112192245319, "grad_norm": 2.9488518809756727, "learning_rate": 2.9570807172194627e-06, "loss": 1.3468, "step": 62170 }, { "epoch": 1.8332596196373285, "grad_norm": 2.84635762900461, "learning_rate": 2.9564519966022852e-06, "loss": 1.2382, "step": 62175 }, { "epoch": 1.833407047029338, "grad_norm": 2.9092680760903997, "learning_rate": 2.955823299336166e-06, "loss": 1.3208, "step": 62180 }, { "epoch": 1.8335544744213474, "grad_norm": 2.756105754686808, "learning_rate": 2.9551946254396016e-06, "loss": 1.2646, "step": 62185 }, { "epoch": 1.833701901813357, "grad_norm": 2.8147596608039347, "learning_rate": 2.954565974931094e-06, "loss": 1.3358, "step": 62190 }, { "epoch": 1.8338493292053664, "grad_norm": 2.8090639880206423, "learning_rate": 2.9539373478291405e-06, "loss": 1.3105, "step": 62195 }, { "epoch": 1.8339967565973758, "grad_norm": 2.871122653375335, "learning_rate": 2.95330874415224e-06, "loss": 1.253, "step": 62200 }, { "epoch": 1.8341441839893853, "grad_norm": 2.8081078419790235, "learning_rate": 2.9526801639188883e-06, "loss": 1.3312, "step": 62205 }, { "epoch": 1.8342916113813947, "grad_norm": 2.9529181475507853, "learning_rate": 2.9520516071475845e-06, "loss": 1.3073, "step": 62210 }, { "epoch": 1.8344390387734042, "grad_norm": 2.77457363189187, "learning_rate": 2.9514230738568225e-06, "loss": 1.3026, "step": 62215 }, { "epoch": 1.8345864661654137, "grad_norm": 2.818346975279742, "learning_rate": 2.9507945640650995e-06, "loss": 1.2511, "step": 62220 }, { "epoch": 1.8347338935574231, "grad_norm": 2.820254549761727, "learning_rate": 2.950166077790907e-06, "loss": 1.2566, "step": 62225 }, { "epoch": 1.8348813209494326, "grad_norm": 2.794603119248379, "learning_rate": 2.949537615052743e-06, "loss": 1.275, "step": 62230 }, { "epoch": 1.8350287483414418, "grad_norm": 2.7705560852621245, "learning_rate": 2.9489091758690983e-06, "loss": 1.2806, "step": 62235 }, { "epoch": 1.8351761757334513, "grad_norm": 2.896483249589858, "learning_rate": 2.9482807602584656e-06, "loss": 1.2846, "step": 62240 }, { "epoch": 1.8353236031254607, "grad_norm": 2.8998882664195444, "learning_rate": 2.9476523682393374e-06, "loss": 1.3133, "step": 62245 }, { "epoch": 1.8354710305174702, "grad_norm": 2.7812161218617413, "learning_rate": 2.947023999830203e-06, "loss": 1.3107, "step": 62250 }, { "epoch": 1.8356184579094794, "grad_norm": 2.736757590108684, "learning_rate": 2.946395655049556e-06, "loss": 1.3091, "step": 62255 }, { "epoch": 1.8357658853014889, "grad_norm": 2.863154603989905, "learning_rate": 2.9457673339158825e-06, "loss": 1.2831, "step": 62260 }, { "epoch": 1.8359133126934983, "grad_norm": 2.794140413681762, "learning_rate": 2.9451390364476757e-06, "loss": 1.3051, "step": 62265 }, { "epoch": 1.8360607400855078, "grad_norm": 2.7826591605252293, "learning_rate": 2.9445107626634206e-06, "loss": 1.288, "step": 62270 }, { "epoch": 1.8362081674775173, "grad_norm": 2.9190618239812878, "learning_rate": 2.943882512581607e-06, "loss": 1.3256, "step": 62275 }, { "epoch": 1.8363555948695267, "grad_norm": 2.8834825675105606, "learning_rate": 2.943254286220719e-06, "loss": 1.3412, "step": 62280 }, { "epoch": 1.8365030222615362, "grad_norm": 2.8379673505369603, "learning_rate": 2.942626083599247e-06, "loss": 1.2912, "step": 62285 }, { "epoch": 1.8366504496535456, "grad_norm": 2.844668570133599, "learning_rate": 2.9419979047356735e-06, "loss": 1.2784, "step": 62290 }, { "epoch": 1.836797877045555, "grad_norm": 2.839709034376215, "learning_rate": 2.941369749648485e-06, "loss": 1.2824, "step": 62295 }, { "epoch": 1.8369453044375645, "grad_norm": 2.7479212163346003, "learning_rate": 2.9407416183561643e-06, "loss": 1.2296, "step": 62300 }, { "epoch": 1.837092731829574, "grad_norm": 2.7721630900153054, "learning_rate": 2.940113510877195e-06, "loss": 1.2972, "step": 62305 }, { "epoch": 1.8372401592215835, "grad_norm": 2.7796582430974577, "learning_rate": 2.9394854272300615e-06, "loss": 1.2299, "step": 62310 }, { "epoch": 1.837387586613593, "grad_norm": 2.9469773331244835, "learning_rate": 2.9388573674332438e-06, "loss": 1.27, "step": 62315 }, { "epoch": 1.8375350140056024, "grad_norm": 2.9245923164984564, "learning_rate": 2.938229331505225e-06, "loss": 1.3285, "step": 62320 }, { "epoch": 1.8376824413976118, "grad_norm": 2.977492479524396, "learning_rate": 2.9376013194644848e-06, "loss": 1.3111, "step": 62325 }, { "epoch": 1.837829868789621, "grad_norm": 2.74930505524857, "learning_rate": 2.936973331329504e-06, "loss": 1.349, "step": 62330 }, { "epoch": 1.8379772961816305, "grad_norm": 2.773536244472988, "learning_rate": 2.93634536711876e-06, "loss": 1.2754, "step": 62335 }, { "epoch": 1.83812472357364, "grad_norm": 2.8003661665497432, "learning_rate": 2.935717426850734e-06, "loss": 1.3142, "step": 62340 }, { "epoch": 1.8382721509656494, "grad_norm": 2.926191197374511, "learning_rate": 2.935089510543902e-06, "loss": 1.2968, "step": 62345 }, { "epoch": 1.838419578357659, "grad_norm": 2.9000157116372196, "learning_rate": 2.934461618216742e-06, "loss": 1.2578, "step": 62350 }, { "epoch": 1.8385670057496681, "grad_norm": 2.8999276615682623, "learning_rate": 2.933833749887729e-06, "loss": 1.3251, "step": 62355 }, { "epoch": 1.8387144331416776, "grad_norm": 2.8360219366044377, "learning_rate": 2.9332059055753415e-06, "loss": 1.2578, "step": 62360 }, { "epoch": 1.838861860533687, "grad_norm": 2.7367957070309545, "learning_rate": 2.932578085298053e-06, "loss": 1.3291, "step": 62365 }, { "epoch": 1.8390092879256965, "grad_norm": 2.896982790644447, "learning_rate": 2.931950289074336e-06, "loss": 1.2829, "step": 62370 }, { "epoch": 1.839156715317706, "grad_norm": 2.735872091520076, "learning_rate": 2.9313225169226676e-06, "loss": 1.2675, "step": 62375 }, { "epoch": 1.8393041427097154, "grad_norm": 2.875549294062458, "learning_rate": 2.930694768861517e-06, "loss": 1.3189, "step": 62380 }, { "epoch": 1.8394515701017249, "grad_norm": 2.908151658243166, "learning_rate": 2.93006704490936e-06, "loss": 1.2509, "step": 62385 }, { "epoch": 1.8395989974937343, "grad_norm": 2.7675092454242387, "learning_rate": 2.9294393450846653e-06, "loss": 1.2805, "step": 62390 }, { "epoch": 1.8397464248857438, "grad_norm": 2.7548306927208914, "learning_rate": 2.928811669405906e-06, "loss": 1.2575, "step": 62395 }, { "epoch": 1.8398938522777533, "grad_norm": 2.8527039090326016, "learning_rate": 2.92818401789155e-06, "loss": 1.2988, "step": 62400 }, { "epoch": 1.8400412796697627, "grad_norm": 2.768278318705798, "learning_rate": 2.927556390560069e-06, "loss": 1.2309, "step": 62405 }, { "epoch": 1.8401887070617722, "grad_norm": 2.8058069297917476, "learning_rate": 2.9269287874299283e-06, "loss": 1.2799, "step": 62410 }, { "epoch": 1.8403361344537816, "grad_norm": 2.853301608210759, "learning_rate": 2.9263012085195996e-06, "loss": 1.2804, "step": 62415 }, { "epoch": 1.840483561845791, "grad_norm": 2.822410436793181, "learning_rate": 2.925673653847548e-06, "loss": 1.2785, "step": 62420 }, { "epoch": 1.8406309892378003, "grad_norm": 2.8002805150603516, "learning_rate": 2.9250461234322414e-06, "loss": 1.2461, "step": 62425 }, { "epoch": 1.8407784166298098, "grad_norm": 2.7974891247142994, "learning_rate": 2.9244186172921438e-06, "loss": 1.332, "step": 62430 }, { "epoch": 1.8409258440218192, "grad_norm": 2.870699956509953, "learning_rate": 2.92379113544572e-06, "loss": 1.3194, "step": 62435 }, { "epoch": 1.8410732714138287, "grad_norm": 2.7623219835771278, "learning_rate": 2.9231636779114368e-06, "loss": 1.2996, "step": 62440 }, { "epoch": 1.8412206988058382, "grad_norm": 2.807760420504607, "learning_rate": 2.922536244707755e-06, "loss": 1.2756, "step": 62445 }, { "epoch": 1.8413681261978474, "grad_norm": 2.806580578866568, "learning_rate": 2.921908835853141e-06, "loss": 1.3478, "step": 62450 }, { "epoch": 1.8415155535898569, "grad_norm": 2.8401084421388476, "learning_rate": 2.921281451366054e-06, "loss": 1.2776, "step": 62455 }, { "epoch": 1.8416629809818663, "grad_norm": 2.858448141943349, "learning_rate": 2.9206540912649573e-06, "loss": 1.3374, "step": 62460 }, { "epoch": 1.8418104083738758, "grad_norm": 2.7378937288987846, "learning_rate": 2.9200267555683093e-06, "loss": 1.301, "step": 62465 }, { "epoch": 1.8419578357658852, "grad_norm": 2.7664961101464214, "learning_rate": 2.9193994442945735e-06, "loss": 1.3447, "step": 62470 }, { "epoch": 1.8421052631578947, "grad_norm": 2.936228578279209, "learning_rate": 2.918772157462207e-06, "loss": 1.2658, "step": 62475 }, { "epoch": 1.8422526905499041, "grad_norm": 2.8210270189333597, "learning_rate": 2.918144895089669e-06, "loss": 1.2838, "step": 62480 }, { "epoch": 1.8424001179419136, "grad_norm": 2.859287182457154, "learning_rate": 2.9175176571954164e-06, "loss": 1.2812, "step": 62485 }, { "epoch": 1.842547545333923, "grad_norm": 2.9131047559741767, "learning_rate": 2.9168904437979086e-06, "loss": 1.3446, "step": 62490 }, { "epoch": 1.8426949727259325, "grad_norm": 2.7868907882878493, "learning_rate": 2.9162632549156014e-06, "loss": 1.2737, "step": 62495 }, { "epoch": 1.842842400117942, "grad_norm": 2.7842005119832343, "learning_rate": 2.9156360905669487e-06, "loss": 1.249, "step": 62500 }, { "epoch": 1.842842400117942, "eval_loss": 1.0708527565002441, "eval_runtime": 4.1797, "eval_samples_per_second": 94.743, "eval_steps_per_second": 3.11, "step": 62500 }, { "epoch": 1.8429898275099514, "grad_norm": 2.8714266796505705, "learning_rate": 2.9150089507704075e-06, "loss": 1.3021, "step": 62505 }, { "epoch": 1.843137254901961, "grad_norm": 2.789010441315025, "learning_rate": 2.91438183554443e-06, "loss": 1.2843, "step": 62510 }, { "epoch": 1.8432846822939704, "grad_norm": 2.7169325278874403, "learning_rate": 2.9137547449074722e-06, "loss": 1.25, "step": 62515 }, { "epoch": 1.8434321096859798, "grad_norm": 2.7985620469327355, "learning_rate": 2.9131276788779847e-06, "loss": 1.3221, "step": 62520 }, { "epoch": 1.843579537077989, "grad_norm": 2.8225692637315842, "learning_rate": 2.912500637474423e-06, "loss": 1.301, "step": 62525 }, { "epoch": 1.8437269644699985, "grad_norm": 2.72840895827357, "learning_rate": 2.9118736207152342e-06, "loss": 1.2932, "step": 62530 }, { "epoch": 1.843874391862008, "grad_norm": 2.8371114071466774, "learning_rate": 2.911246628618873e-06, "loss": 1.2353, "step": 62535 }, { "epoch": 1.8440218192540174, "grad_norm": 2.7775752029808887, "learning_rate": 2.9106196612037857e-06, "loss": 1.3088, "step": 62540 }, { "epoch": 1.8441692466460269, "grad_norm": 2.8994954631286967, "learning_rate": 2.909992718488425e-06, "loss": 1.275, "step": 62545 }, { "epoch": 1.8443166740380361, "grad_norm": 2.8344891833280004, "learning_rate": 2.9093658004912373e-06, "loss": 1.2726, "step": 62550 }, { "epoch": 1.8444641014300456, "grad_norm": 2.8857329854930205, "learning_rate": 2.9087389072306708e-06, "loss": 1.3224, "step": 62555 }, { "epoch": 1.844611528822055, "grad_norm": 2.7584211919394583, "learning_rate": 2.9081120387251733e-06, "loss": 1.2112, "step": 62560 }, { "epoch": 1.8447589562140645, "grad_norm": 2.8640166535644513, "learning_rate": 2.9074851949931885e-06, "loss": 1.3162, "step": 62565 }, { "epoch": 1.844906383606074, "grad_norm": 2.771800824132717, "learning_rate": 2.9068583760531656e-06, "loss": 1.2769, "step": 62570 }, { "epoch": 1.8450538109980834, "grad_norm": 2.7759593854965643, "learning_rate": 2.9062315819235458e-06, "loss": 1.2643, "step": 62575 }, { "epoch": 1.8452012383900929, "grad_norm": 2.9751184326130447, "learning_rate": 2.905604812622777e-06, "loss": 1.2933, "step": 62580 }, { "epoch": 1.8453486657821023, "grad_norm": 2.8811735542646306, "learning_rate": 2.9049780681693e-06, "loss": 1.2848, "step": 62585 }, { "epoch": 1.8454960931741118, "grad_norm": 2.670419342861324, "learning_rate": 2.904351348581559e-06, "loss": 1.2695, "step": 62590 }, { "epoch": 1.8456435205661212, "grad_norm": 2.8874386060477684, "learning_rate": 2.903724653877994e-06, "loss": 1.2631, "step": 62595 }, { "epoch": 1.8457909479581307, "grad_norm": 2.778552622860292, "learning_rate": 2.9030979840770483e-06, "loss": 1.3204, "step": 62600 }, { "epoch": 1.8459383753501402, "grad_norm": 2.6825527306932995, "learning_rate": 2.9024713391971614e-06, "loss": 1.2861, "step": 62605 }, { "epoch": 1.8460858027421496, "grad_norm": 2.836072029583861, "learning_rate": 2.9018447192567734e-06, "loss": 1.2773, "step": 62610 }, { "epoch": 1.846233230134159, "grad_norm": 2.6549256054891766, "learning_rate": 2.9012181242743217e-06, "loss": 1.2694, "step": 62615 }, { "epoch": 1.8463806575261683, "grad_norm": 2.9465057249606152, "learning_rate": 2.900591554268247e-06, "loss": 1.2891, "step": 62620 }, { "epoch": 1.8465280849181778, "grad_norm": 2.7609653566293204, "learning_rate": 2.8999650092569866e-06, "loss": 1.3201, "step": 62625 }, { "epoch": 1.8466755123101872, "grad_norm": 2.791943072206177, "learning_rate": 2.899338489258975e-06, "loss": 1.3227, "step": 62630 }, { "epoch": 1.8468229397021967, "grad_norm": 2.6966532213512244, "learning_rate": 2.8987119942926507e-06, "loss": 1.276, "step": 62635 }, { "epoch": 1.8469703670942061, "grad_norm": 2.806845250016093, "learning_rate": 2.8980855243764463e-06, "loss": 1.3191, "step": 62640 }, { "epoch": 1.8471177944862154, "grad_norm": 2.741548710346294, "learning_rate": 2.8974590795288e-06, "loss": 1.2664, "step": 62645 }, { "epoch": 1.8472652218782248, "grad_norm": 2.801827347259482, "learning_rate": 2.896832659768142e-06, "loss": 1.3001, "step": 62650 }, { "epoch": 1.8474126492702343, "grad_norm": 2.775295871970732, "learning_rate": 2.896206265112909e-06, "loss": 1.3131, "step": 62655 }, { "epoch": 1.8475600766622438, "grad_norm": 2.8618763444272655, "learning_rate": 2.8955798955815303e-06, "loss": 1.3031, "step": 62660 }, { "epoch": 1.8477075040542532, "grad_norm": 2.732818472579025, "learning_rate": 2.89495355119244e-06, "loss": 1.3126, "step": 62665 }, { "epoch": 1.8478549314462627, "grad_norm": 2.988911803159818, "learning_rate": 2.894327231964066e-06, "loss": 1.2335, "step": 62670 }, { "epoch": 1.8480023588382721, "grad_norm": 2.7215284175813026, "learning_rate": 2.8937009379148424e-06, "loss": 1.267, "step": 62675 }, { "epoch": 1.8481497862302816, "grad_norm": 2.7538170525070065, "learning_rate": 2.893074669063195e-06, "loss": 1.2737, "step": 62680 }, { "epoch": 1.848297213622291, "grad_norm": 2.7184284276580444, "learning_rate": 2.892448425427555e-06, "loss": 1.2577, "step": 62685 }, { "epoch": 1.8484446410143005, "grad_norm": 2.7729584309038966, "learning_rate": 2.8918222070263493e-06, "loss": 1.268, "step": 62690 }, { "epoch": 1.84859206840631, "grad_norm": 2.7530679646410166, "learning_rate": 2.891196013878004e-06, "loss": 1.3027, "step": 62695 }, { "epoch": 1.8487394957983194, "grad_norm": 2.806838099727906, "learning_rate": 2.890569846000948e-06, "loss": 1.2413, "step": 62700 }, { "epoch": 1.8488869231903289, "grad_norm": 2.6557536633340177, "learning_rate": 2.8899437034136035e-06, "loss": 1.2672, "step": 62705 }, { "epoch": 1.8490343505823383, "grad_norm": 2.8252130872265853, "learning_rate": 2.8893175861343996e-06, "loss": 1.2943, "step": 62710 }, { "epoch": 1.8491817779743478, "grad_norm": 2.918367275336651, "learning_rate": 2.888691494181758e-06, "loss": 1.3259, "step": 62715 }, { "epoch": 1.849329205366357, "grad_norm": 2.777918667412146, "learning_rate": 2.888065427574103e-06, "loss": 1.241, "step": 62720 }, { "epoch": 1.8494766327583665, "grad_norm": 2.868167938802608, "learning_rate": 2.8874393863298554e-06, "loss": 1.2773, "step": 62725 }, { "epoch": 1.849624060150376, "grad_norm": 2.815113249225383, "learning_rate": 2.8868133704674407e-06, "loss": 1.2344, "step": 62730 }, { "epoch": 1.8497714875423854, "grad_norm": 2.9004743082725835, "learning_rate": 2.8861873800052774e-06, "loss": 1.3065, "step": 62735 }, { "epoch": 1.8499189149343949, "grad_norm": 2.7836816493944867, "learning_rate": 2.8855614149617877e-06, "loss": 1.3112, "step": 62740 }, { "epoch": 1.850066342326404, "grad_norm": 2.82724154423312, "learning_rate": 2.884935475355389e-06, "loss": 1.2876, "step": 62745 }, { "epoch": 1.8502137697184136, "grad_norm": 2.7785612685303933, "learning_rate": 2.884309561204503e-06, "loss": 1.2354, "step": 62750 }, { "epoch": 1.850361197110423, "grad_norm": 2.852200761300533, "learning_rate": 2.883683672527546e-06, "loss": 1.2723, "step": 62755 }, { "epoch": 1.8505086245024325, "grad_norm": 2.833688334942624, "learning_rate": 2.8830578093429367e-06, "loss": 1.3103, "step": 62760 }, { "epoch": 1.850656051894442, "grad_norm": 2.8719363151598967, "learning_rate": 2.8824319716690914e-06, "loss": 1.3399, "step": 62765 }, { "epoch": 1.8508034792864514, "grad_norm": 2.923829297533542, "learning_rate": 2.8818061595244244e-06, "loss": 1.3055, "step": 62770 }, { "epoch": 1.8509509066784608, "grad_norm": 2.7932910627058862, "learning_rate": 2.881180372927354e-06, "loss": 1.2625, "step": 62775 }, { "epoch": 1.8510983340704703, "grad_norm": 2.8461206304957587, "learning_rate": 2.880554611896292e-06, "loss": 1.2548, "step": 62780 }, { "epoch": 1.8512457614624798, "grad_norm": 2.9767167683160833, "learning_rate": 2.8799288764496545e-06, "loss": 1.3818, "step": 62785 }, { "epoch": 1.8513931888544892, "grad_norm": 2.7657361550289687, "learning_rate": 2.879303166605853e-06, "loss": 1.2944, "step": 62790 }, { "epoch": 1.8515406162464987, "grad_norm": 2.8372001729812135, "learning_rate": 2.878677482383301e-06, "loss": 1.2796, "step": 62795 }, { "epoch": 1.8516880436385081, "grad_norm": 2.7065020810859823, "learning_rate": 2.8780518238004066e-06, "loss": 1.3259, "step": 62800 }, { "epoch": 1.8518354710305176, "grad_norm": 2.675432391836789, "learning_rate": 2.8774261908755857e-06, "loss": 1.2779, "step": 62805 }, { "epoch": 1.851982898422527, "grad_norm": 2.785172207605199, "learning_rate": 2.876800583627244e-06, "loss": 1.2648, "step": 62810 }, { "epoch": 1.8521303258145363, "grad_norm": 2.855067497650446, "learning_rate": 2.876175002073793e-06, "loss": 1.2952, "step": 62815 }, { "epoch": 1.8522777532065458, "grad_norm": 2.8174433450442677, "learning_rate": 2.8755494462336403e-06, "loss": 1.2996, "step": 62820 }, { "epoch": 1.8524251805985552, "grad_norm": 2.838385191441213, "learning_rate": 2.874923916125192e-06, "loss": 1.3132, "step": 62825 }, { "epoch": 1.8525726079905647, "grad_norm": 3.0927020715473215, "learning_rate": 2.8742984117668587e-06, "loss": 1.3336, "step": 62830 }, { "epoch": 1.8527200353825741, "grad_norm": 2.748261995380757, "learning_rate": 2.8736729331770427e-06, "loss": 1.2643, "step": 62835 }, { "epoch": 1.8528674627745834, "grad_norm": 2.6934625694627146, "learning_rate": 2.873047480374153e-06, "loss": 1.3, "step": 62840 }, { "epoch": 1.8530148901665928, "grad_norm": 2.859362796047116, "learning_rate": 2.8724220533765913e-06, "loss": 1.2576, "step": 62845 }, { "epoch": 1.8531623175586023, "grad_norm": 2.949151846296982, "learning_rate": 2.8717966522027643e-06, "loss": 1.2855, "step": 62850 }, { "epoch": 1.8533097449506117, "grad_norm": 2.7413036400158175, "learning_rate": 2.871171276871072e-06, "loss": 1.2702, "step": 62855 }, { "epoch": 1.8534571723426212, "grad_norm": 2.821114070814497, "learning_rate": 2.8705459273999195e-06, "loss": 1.2608, "step": 62860 }, { "epoch": 1.8536045997346307, "grad_norm": 2.7604368955006375, "learning_rate": 2.869920603807706e-06, "loss": 1.2677, "step": 62865 }, { "epoch": 1.8537520271266401, "grad_norm": 2.8540141389168445, "learning_rate": 2.869295306112836e-06, "loss": 1.3288, "step": 62870 }, { "epoch": 1.8538994545186496, "grad_norm": 2.881905951408645, "learning_rate": 2.868670034333705e-06, "loss": 1.3204, "step": 62875 }, { "epoch": 1.854046881910659, "grad_norm": 2.7291427448015537, "learning_rate": 2.8680447884887157e-06, "loss": 1.3137, "step": 62880 }, { "epoch": 1.8541943093026685, "grad_norm": 2.7345376254087177, "learning_rate": 2.867419568596266e-06, "loss": 1.2908, "step": 62885 }, { "epoch": 1.854341736694678, "grad_norm": 2.888742082327429, "learning_rate": 2.8667943746747527e-06, "loss": 1.2892, "step": 62890 }, { "epoch": 1.8544891640866874, "grad_norm": 2.792651967949019, "learning_rate": 2.866169206742574e-06, "loss": 1.2784, "step": 62895 }, { "epoch": 1.8546365914786969, "grad_norm": 2.970978475737632, "learning_rate": 2.865544064818124e-06, "loss": 1.2709, "step": 62900 }, { "epoch": 1.8547840188707063, "grad_norm": 2.820044111477041, "learning_rate": 2.8649189489198016e-06, "loss": 1.2741, "step": 62905 }, { "epoch": 1.8549314462627158, "grad_norm": 2.876323912837119, "learning_rate": 2.864293859065998e-06, "loss": 1.2572, "step": 62910 }, { "epoch": 1.855078873654725, "grad_norm": 2.848665527034071, "learning_rate": 2.8636687952751106e-06, "loss": 1.2628, "step": 62915 }, { "epoch": 1.8552263010467345, "grad_norm": 2.8225559453807945, "learning_rate": 2.86304375756553e-06, "loss": 1.2633, "step": 62920 }, { "epoch": 1.855373728438744, "grad_norm": 2.79182964923395, "learning_rate": 2.862418745955651e-06, "loss": 1.2821, "step": 62925 }, { "epoch": 1.8555211558307534, "grad_norm": 2.8247423084313197, "learning_rate": 2.8617937604638616e-06, "loss": 1.2892, "step": 62930 }, { "epoch": 1.8556685832227626, "grad_norm": 2.7736372192889336, "learning_rate": 2.861168801108557e-06, "loss": 1.3341, "step": 62935 }, { "epoch": 1.855816010614772, "grad_norm": 2.8094909206423178, "learning_rate": 2.8605438679081247e-06, "loss": 1.3073, "step": 62940 }, { "epoch": 1.8559634380067815, "grad_norm": 3.1256058536747435, "learning_rate": 2.8599189608809554e-06, "loss": 1.2629, "step": 62945 }, { "epoch": 1.856110865398791, "grad_norm": 2.684081572894187, "learning_rate": 2.859294080045437e-06, "loss": 1.3162, "step": 62950 }, { "epoch": 1.8562582927908005, "grad_norm": 2.759623826576359, "learning_rate": 2.858669225419956e-06, "loss": 1.3274, "step": 62955 }, { "epoch": 1.85640572018281, "grad_norm": 2.810728672523255, "learning_rate": 2.8580443970229017e-06, "loss": 1.2629, "step": 62960 }, { "epoch": 1.8565531475748194, "grad_norm": 2.7856153684930702, "learning_rate": 2.8574195948726583e-06, "loss": 1.3085, "step": 62965 }, { "epoch": 1.8567005749668288, "grad_norm": 2.8647645658389966, "learning_rate": 2.8567948189876145e-06, "loss": 1.2759, "step": 62970 }, { "epoch": 1.8568480023588383, "grad_norm": 2.7204519765104536, "learning_rate": 2.856170069386152e-06, "loss": 1.2819, "step": 62975 }, { "epoch": 1.8569954297508477, "grad_norm": 2.7552716503492087, "learning_rate": 2.855545346086657e-06, "loss": 1.3122, "step": 62980 }, { "epoch": 1.8571428571428572, "grad_norm": 2.8528965007920832, "learning_rate": 2.8549206491075095e-06, "loss": 1.3259, "step": 62985 }, { "epoch": 1.8572902845348667, "grad_norm": 2.8285207027014847, "learning_rate": 2.854295978467096e-06, "loss": 1.3063, "step": 62990 }, { "epoch": 1.8574377119268761, "grad_norm": 2.9060953355440082, "learning_rate": 2.853671334183795e-06, "loss": 1.2952, "step": 62995 }, { "epoch": 1.8575851393188856, "grad_norm": 2.8439847446334494, "learning_rate": 2.8530467162759897e-06, "loss": 1.2996, "step": 63000 }, { "epoch": 1.8575851393188856, "eval_loss": 1.069926381111145, "eval_runtime": 4.2796, "eval_samples_per_second": 92.532, "eval_steps_per_second": 3.038, "step": 63000 }, { "epoch": 1.857732566710895, "grad_norm": 2.786650334530303, "learning_rate": 2.8524221247620567e-06, "loss": 1.2641, "step": 63005 }, { "epoch": 1.8578799941029043, "grad_norm": 2.7268292770785316, "learning_rate": 2.8517975596603796e-06, "loss": 1.2539, "step": 63010 }, { "epoch": 1.8580274214949137, "grad_norm": 2.8597170716352536, "learning_rate": 2.8511730209893348e-06, "loss": 1.3059, "step": 63015 }, { "epoch": 1.8581748488869232, "grad_norm": 2.9421127168270194, "learning_rate": 2.8505485087673e-06, "loss": 1.3318, "step": 63020 }, { "epoch": 1.8583222762789326, "grad_norm": 2.7402076633669425, "learning_rate": 2.849924023012652e-06, "loss": 1.262, "step": 63025 }, { "epoch": 1.858469703670942, "grad_norm": 2.7046698669529654, "learning_rate": 2.8492995637437667e-06, "loss": 1.2651, "step": 63030 }, { "epoch": 1.8586171310629513, "grad_norm": 2.858763393491916, "learning_rate": 2.8486751309790216e-06, "loss": 1.2941, "step": 63035 }, { "epoch": 1.8587645584549608, "grad_norm": 2.788954651980766, "learning_rate": 2.848050724736789e-06, "loss": 1.2773, "step": 63040 }, { "epoch": 1.8589119858469703, "grad_norm": 2.7753993359827316, "learning_rate": 2.8474263450354447e-06, "loss": 1.2988, "step": 63045 }, { "epoch": 1.8590594132389797, "grad_norm": 2.8140981672040386, "learning_rate": 2.84680199189336e-06, "loss": 1.3178, "step": 63050 }, { "epoch": 1.8592068406309892, "grad_norm": 2.7957586119736297, "learning_rate": 2.8461776653289085e-06, "loss": 1.2518, "step": 63055 }, { "epoch": 1.8593542680229986, "grad_norm": 2.709973095074444, "learning_rate": 2.8455533653604604e-06, "loss": 1.2388, "step": 63060 }, { "epoch": 1.859501695415008, "grad_norm": 2.661557021777487, "learning_rate": 2.8449290920063885e-06, "loss": 1.266, "step": 63065 }, { "epoch": 1.8596491228070176, "grad_norm": 2.683640019056467, "learning_rate": 2.844304845285061e-06, "loss": 1.3276, "step": 63070 }, { "epoch": 1.859796550199027, "grad_norm": 2.73173342690877, "learning_rate": 2.843680625214848e-06, "loss": 1.2805, "step": 63075 }, { "epoch": 1.8599439775910365, "grad_norm": 2.7875402121745942, "learning_rate": 2.8430564318141176e-06, "loss": 1.3127, "step": 63080 }, { "epoch": 1.860091404983046, "grad_norm": 2.8502019580820708, "learning_rate": 2.8424322651012362e-06, "loss": 1.3055, "step": 63085 }, { "epoch": 1.8602388323750554, "grad_norm": 2.7718221475851537, "learning_rate": 2.8418081250945727e-06, "loss": 1.2919, "step": 63090 }, { "epoch": 1.8603862597670648, "grad_norm": 2.7537017552524747, "learning_rate": 2.841184011812491e-06, "loss": 1.2832, "step": 63095 }, { "epoch": 1.8605336871590743, "grad_norm": 2.856452800689072, "learning_rate": 2.840559925273359e-06, "loss": 1.3545, "step": 63100 }, { "epoch": 1.8606811145510835, "grad_norm": 2.7765055885986487, "learning_rate": 2.839935865495539e-06, "loss": 1.2731, "step": 63105 }, { "epoch": 1.860828541943093, "grad_norm": 2.6474317769131503, "learning_rate": 2.839311832497396e-06, "loss": 1.2853, "step": 63110 }, { "epoch": 1.8609759693351025, "grad_norm": 2.9074130931454594, "learning_rate": 2.8386878262972905e-06, "loss": 1.2783, "step": 63115 }, { "epoch": 1.861123396727112, "grad_norm": 2.769036060376737, "learning_rate": 2.838063846913588e-06, "loss": 1.2858, "step": 63120 }, { "epoch": 1.8612708241191214, "grad_norm": 2.8326675990184067, "learning_rate": 2.837439894364647e-06, "loss": 1.2653, "step": 63125 }, { "epoch": 1.8614182515111306, "grad_norm": 2.851630199737428, "learning_rate": 2.8368159686688308e-06, "loss": 1.3146, "step": 63130 }, { "epoch": 1.86156567890314, "grad_norm": 2.9999284180603802, "learning_rate": 2.8361920698444952e-06, "loss": 1.2834, "step": 63135 }, { "epoch": 1.8617131062951495, "grad_norm": 2.9183409425037636, "learning_rate": 2.8355681979100032e-06, "loss": 1.2908, "step": 63140 }, { "epoch": 1.861860533687159, "grad_norm": 2.716006430479361, "learning_rate": 2.834944352883711e-06, "loss": 1.2859, "step": 63145 }, { "epoch": 1.8620079610791684, "grad_norm": 2.7226409049533435, "learning_rate": 2.8343205347839754e-06, "loss": 1.2292, "step": 63150 }, { "epoch": 1.862155388471178, "grad_norm": 2.922095167778529, "learning_rate": 2.8336967436291547e-06, "loss": 1.3241, "step": 63155 }, { "epoch": 1.8623028158631874, "grad_norm": 2.8512490713971936, "learning_rate": 2.8330729794376017e-06, "loss": 1.2595, "step": 63160 }, { "epoch": 1.8624502432551968, "grad_norm": 2.7880782886359854, "learning_rate": 2.832449242227675e-06, "loss": 1.3008, "step": 63165 }, { "epoch": 1.8625976706472063, "grad_norm": 2.93018456097428, "learning_rate": 2.831825532017726e-06, "loss": 1.2857, "step": 63170 }, { "epoch": 1.8627450980392157, "grad_norm": 2.986082418992996, "learning_rate": 2.83120184882611e-06, "loss": 1.3312, "step": 63175 }, { "epoch": 1.8628925254312252, "grad_norm": 2.817645783504911, "learning_rate": 2.830578192671178e-06, "loss": 1.2901, "step": 63180 }, { "epoch": 1.8630399528232346, "grad_norm": 2.7447612350450568, "learning_rate": 2.8299545635712835e-06, "loss": 1.2978, "step": 63185 }, { "epoch": 1.863187380215244, "grad_norm": 2.806694504853047, "learning_rate": 2.8293309615447753e-06, "loss": 1.2682, "step": 63190 }, { "epoch": 1.8633348076072536, "grad_norm": 2.7305048573690196, "learning_rate": 2.828707386610006e-06, "loss": 1.248, "step": 63195 }, { "epoch": 1.863482234999263, "grad_norm": 2.8483540505622487, "learning_rate": 2.8280838387853233e-06, "loss": 1.3268, "step": 63200 }, { "epoch": 1.8636296623912723, "grad_norm": 2.8046599151647897, "learning_rate": 2.8274603180890772e-06, "loss": 1.2815, "step": 63205 }, { "epoch": 1.8637770897832817, "grad_norm": 2.7490699683133237, "learning_rate": 2.8268368245396146e-06, "loss": 1.2535, "step": 63210 }, { "epoch": 1.8639245171752912, "grad_norm": 2.8896923149237597, "learning_rate": 2.826213358155281e-06, "loss": 1.2811, "step": 63215 }, { "epoch": 1.8640719445673006, "grad_norm": 2.8094005255042527, "learning_rate": 2.8255899189544255e-06, "loss": 1.2709, "step": 63220 }, { "epoch": 1.86421937195931, "grad_norm": 2.788598641385545, "learning_rate": 2.824966506955391e-06, "loss": 1.2893, "step": 63225 }, { "epoch": 1.8643667993513193, "grad_norm": 2.706824610706259, "learning_rate": 2.8243431221765244e-06, "loss": 1.2371, "step": 63230 }, { "epoch": 1.8645142267433288, "grad_norm": 2.819185068943409, "learning_rate": 2.8237197646361678e-06, "loss": 1.2534, "step": 63235 }, { "epoch": 1.8646616541353382, "grad_norm": 2.8413931429230446, "learning_rate": 2.8230964343526665e-06, "loss": 1.2636, "step": 63240 }, { "epoch": 1.8648090815273477, "grad_norm": 2.864954781630891, "learning_rate": 2.8224731313443584e-06, "loss": 1.2758, "step": 63245 }, { "epoch": 1.8649565089193572, "grad_norm": 2.9819322046624945, "learning_rate": 2.8218498556295894e-06, "loss": 1.3021, "step": 63250 }, { "epoch": 1.8651039363113666, "grad_norm": 2.8592571554170583, "learning_rate": 2.821226607226697e-06, "loss": 1.3037, "step": 63255 }, { "epoch": 1.865251363703376, "grad_norm": 2.637219725862575, "learning_rate": 2.820603386154024e-06, "loss": 1.2639, "step": 63260 }, { "epoch": 1.8653987910953855, "grad_norm": 2.94002504631559, "learning_rate": 2.8199801924299055e-06, "loss": 1.2581, "step": 63265 }, { "epoch": 1.865546218487395, "grad_norm": 2.7382300344233856, "learning_rate": 2.8193570260726837e-06, "loss": 1.2994, "step": 63270 }, { "epoch": 1.8656936458794044, "grad_norm": 2.8000264631121485, "learning_rate": 2.8187338871006927e-06, "loss": 1.3218, "step": 63275 }, { "epoch": 1.865841073271414, "grad_norm": 2.6404135176266457, "learning_rate": 2.8181107755322717e-06, "loss": 1.2839, "step": 63280 }, { "epoch": 1.8659885006634234, "grad_norm": 2.8605204867870873, "learning_rate": 2.817487691385755e-06, "loss": 1.2687, "step": 63285 }, { "epoch": 1.8661359280554328, "grad_norm": 3.0813626704204387, "learning_rate": 2.8168646346794766e-06, "loss": 1.3221, "step": 63290 }, { "epoch": 1.8662833554474423, "grad_norm": 2.876342034617115, "learning_rate": 2.8162416054317732e-06, "loss": 1.2852, "step": 63295 }, { "epoch": 1.8664307828394515, "grad_norm": 2.8574812548437616, "learning_rate": 2.815618603660975e-06, "loss": 1.3043, "step": 63300 }, { "epoch": 1.866578210231461, "grad_norm": 2.859461157224634, "learning_rate": 2.8149956293854185e-06, "loss": 1.3358, "step": 63305 }, { "epoch": 1.8667256376234704, "grad_norm": 2.8255183856376025, "learning_rate": 2.814372682623432e-06, "loss": 1.2988, "step": 63310 }, { "epoch": 1.86687306501548, "grad_norm": 2.746322383491932, "learning_rate": 2.813749763393348e-06, "loss": 1.2659, "step": 63315 }, { "epoch": 1.8670204924074894, "grad_norm": 2.76678425171931, "learning_rate": 2.813126871713496e-06, "loss": 1.3443, "step": 63320 }, { "epoch": 1.8671679197994986, "grad_norm": 2.7301848855305972, "learning_rate": 2.812504007602206e-06, "loss": 1.252, "step": 63325 }, { "epoch": 1.867315347191508, "grad_norm": 2.789598495655565, "learning_rate": 2.811881171077806e-06, "loss": 1.272, "step": 63330 }, { "epoch": 1.8674627745835175, "grad_norm": 2.869310507949151, "learning_rate": 2.811258362158624e-06, "loss": 1.3007, "step": 63335 }, { "epoch": 1.867610201975527, "grad_norm": 2.6835701014661675, "learning_rate": 2.8106355808629857e-06, "loss": 1.2589, "step": 63340 }, { "epoch": 1.8677576293675364, "grad_norm": 2.69186450147807, "learning_rate": 2.8100128272092195e-06, "loss": 1.2471, "step": 63345 }, { "epoch": 1.8679050567595459, "grad_norm": 2.7376209051706435, "learning_rate": 2.809390101215649e-06, "loss": 1.2463, "step": 63350 }, { "epoch": 1.8680524841515553, "grad_norm": 2.641229255825691, "learning_rate": 2.808767402900597e-06, "loss": 1.2808, "step": 63355 }, { "epoch": 1.8681999115435648, "grad_norm": 2.737712681166911, "learning_rate": 2.808144732282391e-06, "loss": 1.2876, "step": 63360 }, { "epoch": 1.8683473389355743, "grad_norm": 2.8105724935638565, "learning_rate": 2.8075220893793503e-06, "loss": 1.2807, "step": 63365 }, { "epoch": 1.8684947663275837, "grad_norm": 2.9122607369777276, "learning_rate": 2.8068994742098e-06, "loss": 1.2502, "step": 63370 }, { "epoch": 1.8686421937195932, "grad_norm": 3.0107548795303885, "learning_rate": 2.8062768867920575e-06, "loss": 1.2659, "step": 63375 }, { "epoch": 1.8687896211116026, "grad_norm": 2.8236055392403228, "learning_rate": 2.8056543271444468e-06, "loss": 1.2946, "step": 63380 }, { "epoch": 1.868937048503612, "grad_norm": 2.7581005168652766, "learning_rate": 2.805031795285285e-06, "loss": 1.2536, "step": 63385 }, { "epoch": 1.8690844758956215, "grad_norm": 2.7855761542061, "learning_rate": 2.8044092912328927e-06, "loss": 1.2967, "step": 63390 }, { "epoch": 1.869231903287631, "grad_norm": 2.804877930171789, "learning_rate": 2.803786815005585e-06, "loss": 1.2738, "step": 63395 }, { "epoch": 1.8693793306796402, "grad_norm": 2.8579521132112324, "learning_rate": 2.803164366621682e-06, "loss": 1.3102, "step": 63400 }, { "epoch": 1.8695267580716497, "grad_norm": 2.871667183689441, "learning_rate": 2.8025419460994986e-06, "loss": 1.2841, "step": 63405 }, { "epoch": 1.8696741854636592, "grad_norm": 2.7656958010668995, "learning_rate": 2.801919553457351e-06, "loss": 1.3274, "step": 63410 }, { "epoch": 1.8698216128556686, "grad_norm": 2.8349490340023644, "learning_rate": 2.801297188713553e-06, "loss": 1.2884, "step": 63415 }, { "epoch": 1.869969040247678, "grad_norm": 2.7854694723773075, "learning_rate": 2.8006748518864167e-06, "loss": 1.2841, "step": 63420 }, { "epoch": 1.8701164676396873, "grad_norm": 2.9301959465419545, "learning_rate": 2.800052542994259e-06, "loss": 1.3439, "step": 63425 }, { "epoch": 1.8702638950316968, "grad_norm": 2.7594447841922967, "learning_rate": 2.7994302620553876e-06, "loss": 1.3311, "step": 63430 }, { "epoch": 1.8704113224237062, "grad_norm": 2.6935596067728276, "learning_rate": 2.798808009088118e-06, "loss": 1.3282, "step": 63435 }, { "epoch": 1.8705587498157157, "grad_norm": 2.908788727386942, "learning_rate": 2.7981857841107583e-06, "loss": 1.2652, "step": 63440 }, { "epoch": 1.8707061772077251, "grad_norm": 2.897879820752736, "learning_rate": 2.7975635871416195e-06, "loss": 1.3165, "step": 63445 }, { "epoch": 1.8708536045997346, "grad_norm": 2.763965566856699, "learning_rate": 2.7969414181990082e-06, "loss": 1.2808, "step": 63450 }, { "epoch": 1.871001031991744, "grad_norm": 2.8613653528335825, "learning_rate": 2.7963192773012356e-06, "loss": 1.2355, "step": 63455 }, { "epoch": 1.8711484593837535, "grad_norm": 2.817402613061473, "learning_rate": 2.795697164466606e-06, "loss": 1.276, "step": 63460 }, { "epoch": 1.871295886775763, "grad_norm": 2.8620618497347117, "learning_rate": 2.7950750797134284e-06, "loss": 1.3473, "step": 63465 }, { "epoch": 1.8714433141677724, "grad_norm": 2.944795974407003, "learning_rate": 2.7944530230600047e-06, "loss": 1.2856, "step": 63470 }, { "epoch": 1.871590741559782, "grad_norm": 2.9295335393408966, "learning_rate": 2.7938309945246444e-06, "loss": 1.2751, "step": 63475 }, { "epoch": 1.8717381689517913, "grad_norm": 2.882247648578053, "learning_rate": 2.7932089941256485e-06, "loss": 1.2875, "step": 63480 }, { "epoch": 1.8718855963438008, "grad_norm": 2.8188153494245065, "learning_rate": 2.792587021881319e-06, "loss": 1.2531, "step": 63485 }, { "epoch": 1.8720330237358103, "grad_norm": 2.8538150641372475, "learning_rate": 2.791965077809961e-06, "loss": 1.3237, "step": 63490 }, { "epoch": 1.8721804511278195, "grad_norm": 2.818652463382323, "learning_rate": 2.7913431619298737e-06, "loss": 1.2749, "step": 63495 }, { "epoch": 1.872327878519829, "grad_norm": 2.7880689506507133, "learning_rate": 2.7907212742593595e-06, "loss": 1.2934, "step": 63500 }, { "epoch": 1.872327878519829, "eval_loss": 1.0686620473861694, "eval_runtime": 4.1916, "eval_samples_per_second": 94.474, "eval_steps_per_second": 3.101, "step": 63500 }, { "epoch": 1.8724753059118384, "grad_norm": 2.8905599631719396, "learning_rate": 2.7900994148167155e-06, "loss": 1.3363, "step": 63505 }, { "epoch": 1.8726227333038479, "grad_norm": 2.8768814408796444, "learning_rate": 2.7894775836202444e-06, "loss": 1.297, "step": 63510 }, { "epoch": 1.8727701606958573, "grad_norm": 2.775055388304847, "learning_rate": 2.7888557806882407e-06, "loss": 1.3237, "step": 63515 }, { "epoch": 1.8729175880878666, "grad_norm": 2.837528372495641, "learning_rate": 2.7882340060390048e-06, "loss": 1.3143, "step": 63520 }, { "epoch": 1.873065015479876, "grad_norm": 2.736200346115406, "learning_rate": 2.787612259690829e-06, "loss": 1.2974, "step": 63525 }, { "epoch": 1.8732124428718855, "grad_norm": 2.7278324191883065, "learning_rate": 2.7869905416620135e-06, "loss": 1.2876, "step": 63530 }, { "epoch": 1.873359870263895, "grad_norm": 2.7841007476161295, "learning_rate": 2.7863688519708496e-06, "loss": 1.3256, "step": 63535 }, { "epoch": 1.8735072976559044, "grad_norm": 2.751775507102379, "learning_rate": 2.785747190635634e-06, "loss": 1.2513, "step": 63540 }, { "epoch": 1.8736547250479139, "grad_norm": 2.624866166808657, "learning_rate": 2.785125557674658e-06, "loss": 1.2728, "step": 63545 }, { "epoch": 1.8738021524399233, "grad_norm": 2.8063827716523515, "learning_rate": 2.784503953106213e-06, "loss": 1.3207, "step": 63550 }, { "epoch": 1.8739495798319328, "grad_norm": 2.6241037182826767, "learning_rate": 2.7838823769485928e-06, "loss": 1.2655, "step": 63555 }, { "epoch": 1.8740970072239422, "grad_norm": 2.776880130780907, "learning_rate": 2.7832608292200856e-06, "loss": 1.2474, "step": 63560 }, { "epoch": 1.8742444346159517, "grad_norm": 2.825274578611043, "learning_rate": 2.782639309938984e-06, "loss": 1.2732, "step": 63565 }, { "epoch": 1.8743918620079612, "grad_norm": 2.647657402849888, "learning_rate": 2.7820178191235742e-06, "loss": 1.2709, "step": 63570 }, { "epoch": 1.8745392893999706, "grad_norm": 2.838615850673582, "learning_rate": 2.7813963567921466e-06, "loss": 1.3035, "step": 63575 }, { "epoch": 1.87468671679198, "grad_norm": 2.7357238308614678, "learning_rate": 2.7807749229629854e-06, "loss": 1.2524, "step": 63580 }, { "epoch": 1.8748341441839895, "grad_norm": 2.7857572089621865, "learning_rate": 2.7801535176543803e-06, "loss": 1.2467, "step": 63585 }, { "epoch": 1.874981571575999, "grad_norm": 2.8477772631222513, "learning_rate": 2.779532140884615e-06, "loss": 1.3203, "step": 63590 }, { "epoch": 1.8751289989680082, "grad_norm": 2.7142818568062914, "learning_rate": 2.778910792671975e-06, "loss": 1.2461, "step": 63595 }, { "epoch": 1.8752764263600177, "grad_norm": 2.8334715375920165, "learning_rate": 2.7782894730347423e-06, "loss": 1.2578, "step": 63600 }, { "epoch": 1.8754238537520271, "grad_norm": 2.9757917547330606, "learning_rate": 2.7776681819912032e-06, "loss": 1.3024, "step": 63605 }, { "epoch": 1.8755712811440366, "grad_norm": 2.856661416332051, "learning_rate": 2.7770469195596384e-06, "loss": 1.2941, "step": 63610 }, { "epoch": 1.8757187085360458, "grad_norm": 2.6859968632212166, "learning_rate": 2.7764256857583267e-06, "loss": 1.2708, "step": 63615 }, { "epoch": 1.8758661359280553, "grad_norm": 2.6147809418704044, "learning_rate": 2.7758044806055527e-06, "loss": 1.3128, "step": 63620 }, { "epoch": 1.8760135633200647, "grad_norm": 2.878297019667667, "learning_rate": 2.7751833041195936e-06, "loss": 1.2956, "step": 63625 }, { "epoch": 1.8761609907120742, "grad_norm": 2.72560225515116, "learning_rate": 2.7745621563187296e-06, "loss": 1.2994, "step": 63630 }, { "epoch": 1.8763084181040837, "grad_norm": 2.8430942389460134, "learning_rate": 2.7739410372212365e-06, "loss": 1.3087, "step": 63635 }, { "epoch": 1.8764558454960931, "grad_norm": 2.7986045231273695, "learning_rate": 2.7733199468453943e-06, "loss": 1.3131, "step": 63640 }, { "epoch": 1.8766032728881026, "grad_norm": 2.689417284680697, "learning_rate": 2.7726988852094774e-06, "loss": 1.2538, "step": 63645 }, { "epoch": 1.876750700280112, "grad_norm": 2.841875933163465, "learning_rate": 2.7720778523317623e-06, "loss": 1.3213, "step": 63650 }, { "epoch": 1.8768981276721215, "grad_norm": 2.784387658220881, "learning_rate": 2.771456848230522e-06, "loss": 1.2693, "step": 63655 }, { "epoch": 1.877045555064131, "grad_norm": 2.7549959605276197, "learning_rate": 2.7708358729240324e-06, "loss": 1.2773, "step": 63660 }, { "epoch": 1.8771929824561404, "grad_norm": 2.846471457433389, "learning_rate": 2.7702149264305645e-06, "loss": 1.2834, "step": 63665 }, { "epoch": 1.8773404098481499, "grad_norm": 2.902296718327065, "learning_rate": 2.7695940087683923e-06, "loss": 1.2592, "step": 63670 }, { "epoch": 1.8774878372401593, "grad_norm": 2.88514014511096, "learning_rate": 2.768973119955786e-06, "loss": 1.2557, "step": 63675 }, { "epoch": 1.8776352646321688, "grad_norm": 2.8261746394040954, "learning_rate": 2.768352260011014e-06, "loss": 1.296, "step": 63680 }, { "epoch": 1.8777826920241782, "grad_norm": 2.7323373829920166, "learning_rate": 2.7677314289523495e-06, "loss": 1.2881, "step": 63685 }, { "epoch": 1.8779301194161875, "grad_norm": 2.7157746587593494, "learning_rate": 2.7671106267980575e-06, "loss": 1.2937, "step": 63690 }, { "epoch": 1.878077546808197, "grad_norm": 2.8194326580584628, "learning_rate": 2.7664898535664093e-06, "loss": 1.2875, "step": 63695 }, { "epoch": 1.8782249742002064, "grad_norm": 2.7483319482492634, "learning_rate": 2.7658691092756695e-06, "loss": 1.2598, "step": 63700 }, { "epoch": 1.8783724015922159, "grad_norm": 2.9133729726451376, "learning_rate": 2.7652483939441054e-06, "loss": 1.3078, "step": 63705 }, { "epoch": 1.8785198289842253, "grad_norm": 2.9653158766982295, "learning_rate": 2.76462770758998e-06, "loss": 1.2721, "step": 63710 }, { "epoch": 1.8786672563762346, "grad_norm": 2.9696686063988453, "learning_rate": 2.7640070502315615e-06, "loss": 1.3222, "step": 63715 }, { "epoch": 1.878814683768244, "grad_norm": 2.8467647198446766, "learning_rate": 2.7633864218871106e-06, "loss": 1.2718, "step": 63720 }, { "epoch": 1.8789621111602535, "grad_norm": 2.7319895570529806, "learning_rate": 2.762765822574892e-06, "loss": 1.2593, "step": 63725 }, { "epoch": 1.879109538552263, "grad_norm": 2.758895087733029, "learning_rate": 2.762145252313164e-06, "loss": 1.2916, "step": 63730 }, { "epoch": 1.8792569659442724, "grad_norm": 2.8643222531268426, "learning_rate": 2.7615247111201924e-06, "loss": 1.2893, "step": 63735 }, { "epoch": 1.8794043933362818, "grad_norm": 2.9116279870764106, "learning_rate": 2.760904199014234e-06, "loss": 1.284, "step": 63740 }, { "epoch": 1.8795518207282913, "grad_norm": 2.9331417127173487, "learning_rate": 2.760283716013548e-06, "loss": 1.2783, "step": 63745 }, { "epoch": 1.8796992481203008, "grad_norm": 3.287286055525273, "learning_rate": 2.759663262136395e-06, "loss": 1.297, "step": 63750 }, { "epoch": 1.8798466755123102, "grad_norm": 2.7262259406678653, "learning_rate": 2.759042837401031e-06, "loss": 1.2515, "step": 63755 }, { "epoch": 1.8799941029043197, "grad_norm": 2.8579483174908984, "learning_rate": 2.758422441825714e-06, "loss": 1.3002, "step": 63760 }, { "epoch": 1.8801415302963291, "grad_norm": 2.721532955205343, "learning_rate": 2.757802075428697e-06, "loss": 1.2418, "step": 63765 }, { "epoch": 1.8802889576883386, "grad_norm": 2.9266520730440173, "learning_rate": 2.7571817382282386e-06, "loss": 1.3087, "step": 63770 }, { "epoch": 1.880436385080348, "grad_norm": 2.7419901718912194, "learning_rate": 2.75656143024259e-06, "loss": 1.286, "step": 63775 }, { "epoch": 1.8805838124723575, "grad_norm": 2.857657046256729, "learning_rate": 2.7559411514900065e-06, "loss": 1.3802, "step": 63780 }, { "epoch": 1.8807312398643667, "grad_norm": 2.7755628219473123, "learning_rate": 2.7553209019887386e-06, "loss": 1.2614, "step": 63785 }, { "epoch": 1.8808786672563762, "grad_norm": 2.7711910029366553, "learning_rate": 2.7547006817570402e-06, "loss": 1.3016, "step": 63790 }, { "epoch": 1.8810260946483857, "grad_norm": 2.7397867125093724, "learning_rate": 2.7540804908131603e-06, "loss": 1.3187, "step": 63795 }, { "epoch": 1.8811735220403951, "grad_norm": 2.904868013075521, "learning_rate": 2.7534603291753498e-06, "loss": 1.3062, "step": 63800 }, { "epoch": 1.8813209494324046, "grad_norm": 2.824422873331644, "learning_rate": 2.7528401968618567e-06, "loss": 1.2489, "step": 63805 }, { "epoch": 1.8814683768244138, "grad_norm": 2.78990538703877, "learning_rate": 2.7522200938909283e-06, "loss": 1.2724, "step": 63810 }, { "epoch": 1.8816158042164233, "grad_norm": 2.8637534198372823, "learning_rate": 2.751600020280815e-06, "loss": 1.2743, "step": 63815 }, { "epoch": 1.8817632316084327, "grad_norm": 2.7926760133620685, "learning_rate": 2.7509799760497587e-06, "loss": 1.2962, "step": 63820 }, { "epoch": 1.8819106590004422, "grad_norm": 2.8960657192503114, "learning_rate": 2.75035996121601e-06, "loss": 1.3038, "step": 63825 }, { "epoch": 1.8820580863924516, "grad_norm": 2.7192444060789183, "learning_rate": 2.749739975797809e-06, "loss": 1.286, "step": 63830 }, { "epoch": 1.882205513784461, "grad_norm": 2.797344462841375, "learning_rate": 2.7491200198134023e-06, "loss": 1.322, "step": 63835 }, { "epoch": 1.8823529411764706, "grad_norm": 2.7860380195323358, "learning_rate": 2.748500093281031e-06, "loss": 1.2284, "step": 63840 }, { "epoch": 1.88250036856848, "grad_norm": 2.8116247534021723, "learning_rate": 2.747880196218939e-06, "loss": 1.283, "step": 63845 }, { "epoch": 1.8826477959604895, "grad_norm": 2.8669414563146636, "learning_rate": 2.7472603286453663e-06, "loss": 1.2336, "step": 63850 }, { "epoch": 1.882795223352499, "grad_norm": 2.713492157491494, "learning_rate": 2.746640490578554e-06, "loss": 1.231, "step": 63855 }, { "epoch": 1.8829426507445084, "grad_norm": 2.9377816873322895, "learning_rate": 2.746020682036739e-06, "loss": 1.342, "step": 63860 }, { "epoch": 1.8830900781365179, "grad_norm": 2.7478705063798046, "learning_rate": 2.7454009030381635e-06, "loss": 1.2706, "step": 63865 }, { "epoch": 1.8832375055285273, "grad_norm": 2.736402202547648, "learning_rate": 2.744781153601064e-06, "loss": 1.2595, "step": 63870 }, { "epoch": 1.8833849329205368, "grad_norm": 2.8239402726834912, "learning_rate": 2.744161433743675e-06, "loss": 1.3103, "step": 63875 }, { "epoch": 1.8835323603125462, "grad_norm": 2.791589820727202, "learning_rate": 2.743541743484236e-06, "loss": 1.3323, "step": 63880 }, { "epoch": 1.8836797877045555, "grad_norm": 2.895815870242262, "learning_rate": 2.74292208284098e-06, "loss": 1.3163, "step": 63885 }, { "epoch": 1.883827215096565, "grad_norm": 2.845574450269909, "learning_rate": 2.742302451832142e-06, "loss": 1.2633, "step": 63890 }, { "epoch": 1.8839746424885744, "grad_norm": 2.7222455435763626, "learning_rate": 2.741682850475954e-06, "loss": 1.2935, "step": 63895 }, { "epoch": 1.8841220698805838, "grad_norm": 2.7977754679337528, "learning_rate": 2.7410632787906505e-06, "loss": 1.2669, "step": 63900 }, { "epoch": 1.8842694972725933, "grad_norm": 2.8343285782302297, "learning_rate": 2.7404437367944617e-06, "loss": 1.3238, "step": 63905 }, { "epoch": 1.8844169246646025, "grad_norm": 2.798629922803977, "learning_rate": 2.739824224505619e-06, "loss": 1.2618, "step": 63910 }, { "epoch": 1.884564352056612, "grad_norm": 2.8688567160638083, "learning_rate": 2.739204741942351e-06, "loss": 1.3121, "step": 63915 }, { "epoch": 1.8847117794486214, "grad_norm": 3.058322760635213, "learning_rate": 2.738585289122889e-06, "loss": 1.3062, "step": 63920 }, { "epoch": 1.884859206840631, "grad_norm": 2.8643233267059807, "learning_rate": 2.737965866065459e-06, "loss": 1.2908, "step": 63925 }, { "epoch": 1.8850066342326404, "grad_norm": 2.722798068608238, "learning_rate": 2.7373464727882896e-06, "loss": 1.2848, "step": 63930 }, { "epoch": 1.8851540616246498, "grad_norm": 2.9128803055940398, "learning_rate": 2.736727109309607e-06, "loss": 1.2924, "step": 63935 }, { "epoch": 1.8853014890166593, "grad_norm": 2.849058142276432, "learning_rate": 2.7361077756476345e-06, "loss": 1.3153, "step": 63940 }, { "epoch": 1.8854489164086687, "grad_norm": 2.76786778983234, "learning_rate": 2.7354884718206e-06, "loss": 1.2845, "step": 63945 }, { "epoch": 1.8855963438006782, "grad_norm": 2.7153164536562713, "learning_rate": 2.734869197846724e-06, "loss": 1.2534, "step": 63950 }, { "epoch": 1.8857437711926877, "grad_norm": 2.77095877198108, "learning_rate": 2.7342499537442326e-06, "loss": 1.2268, "step": 63955 }, { "epoch": 1.8858911985846971, "grad_norm": 2.918298655320154, "learning_rate": 2.7336307395313457e-06, "loss": 1.3, "step": 63960 }, { "epoch": 1.8860386259767066, "grad_norm": 2.878666702795241, "learning_rate": 2.7330115552262853e-06, "loss": 1.3215, "step": 63965 }, { "epoch": 1.886186053368716, "grad_norm": 2.6960495533429216, "learning_rate": 2.7323924008472695e-06, "loss": 1.2053, "step": 63970 }, { "epoch": 1.8863334807607255, "grad_norm": 2.8905770757954756, "learning_rate": 2.7317732764125214e-06, "loss": 1.3112, "step": 63975 }, { "epoch": 1.8864809081527347, "grad_norm": 2.751611684873081, "learning_rate": 2.7311541819402556e-06, "loss": 1.3249, "step": 63980 }, { "epoch": 1.8866283355447442, "grad_norm": 2.7555861096944896, "learning_rate": 2.730535117448693e-06, "loss": 1.3092, "step": 63985 }, { "epoch": 1.8867757629367536, "grad_norm": 2.9329049209851936, "learning_rate": 2.729916082956047e-06, "loss": 1.274, "step": 63990 }, { "epoch": 1.886923190328763, "grad_norm": 2.7353168566262105, "learning_rate": 2.7292970784805367e-06, "loss": 1.2818, "step": 63995 }, { "epoch": 1.8870706177207726, "grad_norm": 2.7179438008742864, "learning_rate": 2.728678104040375e-06, "loss": 1.3182, "step": 64000 }, { "epoch": 1.8870706177207726, "eval_loss": 1.0675138235092163, "eval_runtime": 4.2998, "eval_samples_per_second": 92.097, "eval_steps_per_second": 3.023, "step": 64000 }, { "epoch": 1.8872180451127818, "grad_norm": 2.8125122143775827, "learning_rate": 2.728059159653775e-06, "loss": 1.2619, "step": 64005 }, { "epoch": 1.8873654725047913, "grad_norm": 2.916654982377363, "learning_rate": 2.7274402453389526e-06, "loss": 1.2693, "step": 64010 }, { "epoch": 1.8875128998968007, "grad_norm": 2.735420245718765, "learning_rate": 2.7268213611141184e-06, "loss": 1.2945, "step": 64015 }, { "epoch": 1.8876603272888102, "grad_norm": 2.8133911251445296, "learning_rate": 2.726202506997484e-06, "loss": 1.2667, "step": 64020 }, { "epoch": 1.8878077546808196, "grad_norm": 2.800343701483641, "learning_rate": 2.7255836830072582e-06, "loss": 1.3234, "step": 64025 }, { "epoch": 1.887955182072829, "grad_norm": 2.8056483470443605, "learning_rate": 2.7249648891616547e-06, "loss": 1.3305, "step": 64030 }, { "epoch": 1.8881026094648385, "grad_norm": 2.970797167033516, "learning_rate": 2.724346125478879e-06, "loss": 1.3141, "step": 64035 }, { "epoch": 1.888250036856848, "grad_norm": 2.8383474868749556, "learning_rate": 2.72372739197714e-06, "loss": 1.2819, "step": 64040 }, { "epoch": 1.8883974642488575, "grad_norm": 2.8137427138047517, "learning_rate": 2.7231086886746427e-06, "loss": 1.2684, "step": 64045 }, { "epoch": 1.888544891640867, "grad_norm": 2.790239318216158, "learning_rate": 2.7224900155895964e-06, "loss": 1.2955, "step": 64050 }, { "epoch": 1.8886923190328764, "grad_norm": 2.831575658358185, "learning_rate": 2.7218713727402044e-06, "loss": 1.2757, "step": 64055 }, { "epoch": 1.8888397464248858, "grad_norm": 2.7281584928774065, "learning_rate": 2.721252760144672e-06, "loss": 1.2773, "step": 64060 }, { "epoch": 1.8889871738168953, "grad_norm": 2.75270090995654, "learning_rate": 2.720634177821201e-06, "loss": 1.3077, "step": 64065 }, { "epoch": 1.8891346012089048, "grad_norm": 2.8145034448940636, "learning_rate": 2.720015625787994e-06, "loss": 1.2846, "step": 64070 }, { "epoch": 1.8892820286009142, "grad_norm": 2.8087813869894367, "learning_rate": 2.7193971040632546e-06, "loss": 1.2403, "step": 64075 }, { "epoch": 1.8894294559929234, "grad_norm": 2.837741743231755, "learning_rate": 2.718778612665181e-06, "loss": 1.2927, "step": 64080 }, { "epoch": 1.889576883384933, "grad_norm": 2.898010808784101, "learning_rate": 2.718160151611975e-06, "loss": 1.2697, "step": 64085 }, { "epoch": 1.8897243107769424, "grad_norm": 3.1144774757839184, "learning_rate": 2.717541720921835e-06, "loss": 1.2803, "step": 64090 }, { "epoch": 1.8898717381689518, "grad_norm": 2.8856461675670384, "learning_rate": 2.716923320612959e-06, "loss": 1.2812, "step": 64095 }, { "epoch": 1.8900191655609613, "grad_norm": 2.694998982819367, "learning_rate": 2.7163049507035423e-06, "loss": 1.2983, "step": 64100 }, { "epoch": 1.8901665929529705, "grad_norm": 2.785660854081423, "learning_rate": 2.715686611211785e-06, "loss": 1.3424, "step": 64105 }, { "epoch": 1.89031402034498, "grad_norm": 2.693094208754067, "learning_rate": 2.7150683021558787e-06, "loss": 1.2786, "step": 64110 }, { "epoch": 1.8904614477369894, "grad_norm": 2.9325440158253597, "learning_rate": 2.7144500235540206e-06, "loss": 1.2954, "step": 64115 }, { "epoch": 1.890608875128999, "grad_norm": 2.7196239994199685, "learning_rate": 2.713831775424401e-06, "loss": 1.2419, "step": 64120 }, { "epoch": 1.8907563025210083, "grad_norm": 2.9375090015674794, "learning_rate": 2.713213557785217e-06, "loss": 1.2929, "step": 64125 }, { "epoch": 1.8909037299130178, "grad_norm": 2.731670854464566, "learning_rate": 2.7125953706546574e-06, "loss": 1.2725, "step": 64130 }, { "epoch": 1.8910511573050273, "grad_norm": 2.670495937473536, "learning_rate": 2.711977214050912e-06, "loss": 1.2748, "step": 64135 }, { "epoch": 1.8911985846970367, "grad_norm": 2.806719441175616, "learning_rate": 2.711359087992174e-06, "loss": 1.2603, "step": 64140 }, { "epoch": 1.8913460120890462, "grad_norm": 2.8046637462817707, "learning_rate": 2.7107409924966296e-06, "loss": 1.3072, "step": 64145 }, { "epoch": 1.8914934394810556, "grad_norm": 2.7902076037151406, "learning_rate": 2.7101229275824698e-06, "loss": 1.2954, "step": 64150 }, { "epoch": 1.891640866873065, "grad_norm": 2.826157929330505, "learning_rate": 2.7095048932678783e-06, "loss": 1.3343, "step": 64155 }, { "epoch": 1.8917882942650746, "grad_norm": 2.7603964947671664, "learning_rate": 2.708886889571045e-06, "loss": 1.2631, "step": 64160 }, { "epoch": 1.891935721657084, "grad_norm": 2.8270210771903743, "learning_rate": 2.708268916510153e-06, "loss": 1.356, "step": 64165 }, { "epoch": 1.8920831490490935, "grad_norm": 2.8818321255894688, "learning_rate": 2.7076509741033886e-06, "loss": 1.2235, "step": 64170 }, { "epoch": 1.8922305764411027, "grad_norm": 2.8335258240656818, "learning_rate": 2.7070330623689328e-06, "loss": 1.3458, "step": 64175 }, { "epoch": 1.8923780038331122, "grad_norm": 2.8188690074860308, "learning_rate": 2.7064151813249718e-06, "loss": 1.2762, "step": 64180 }, { "epoch": 1.8925254312251216, "grad_norm": 2.8570544860555627, "learning_rate": 2.705797330989685e-06, "loss": 1.2839, "step": 64185 }, { "epoch": 1.892672858617131, "grad_norm": 2.7903246250023543, "learning_rate": 2.7051795113812545e-06, "loss": 1.3186, "step": 64190 }, { "epoch": 1.8928202860091405, "grad_norm": 2.569936003008469, "learning_rate": 2.7045617225178605e-06, "loss": 1.2828, "step": 64195 }, { "epoch": 1.8929677134011498, "grad_norm": 2.7272521219885513, "learning_rate": 2.7039439644176794e-06, "loss": 1.2329, "step": 64200 }, { "epoch": 1.8931151407931592, "grad_norm": 2.823274175094057, "learning_rate": 2.7033262370988934e-06, "loss": 1.2827, "step": 64205 }, { "epoch": 1.8932625681851687, "grad_norm": 2.6408339486828156, "learning_rate": 2.7027085405796768e-06, "loss": 1.2611, "step": 64210 }, { "epoch": 1.8934099955771782, "grad_norm": 2.837481348204673, "learning_rate": 2.702090874878209e-06, "loss": 1.3017, "step": 64215 }, { "epoch": 1.8935574229691876, "grad_norm": 2.8950795026148652, "learning_rate": 2.701473240012662e-06, "loss": 1.3019, "step": 64220 }, { "epoch": 1.893704850361197, "grad_norm": 2.810085460601545, "learning_rate": 2.7008556360012136e-06, "loss": 1.3083, "step": 64225 }, { "epoch": 1.8938522777532065, "grad_norm": 2.765416530837998, "learning_rate": 2.700238062862035e-06, "loss": 1.3102, "step": 64230 }, { "epoch": 1.893999705145216, "grad_norm": 2.8242017721053565, "learning_rate": 2.699620520613301e-06, "loss": 1.3041, "step": 64235 }, { "epoch": 1.8941471325372254, "grad_norm": 2.748079764313636, "learning_rate": 2.6990030092731822e-06, "loss": 1.2665, "step": 64240 }, { "epoch": 1.894294559929235, "grad_norm": 2.709430955068676, "learning_rate": 2.6983855288598506e-06, "loss": 1.2664, "step": 64245 }, { "epoch": 1.8944419873212444, "grad_norm": 2.807779947930213, "learning_rate": 2.6977680793914742e-06, "loss": 1.2909, "step": 64250 }, { "epoch": 1.8945894147132538, "grad_norm": 2.6424658250521245, "learning_rate": 2.6971506608862247e-06, "loss": 1.2278, "step": 64255 }, { "epoch": 1.8947368421052633, "grad_norm": 2.7897009103437114, "learning_rate": 2.6965332733622695e-06, "loss": 1.2839, "step": 64260 }, { "epoch": 1.8948842694972727, "grad_norm": 2.8613374255475708, "learning_rate": 2.695915916837774e-06, "loss": 1.3118, "step": 64265 }, { "epoch": 1.8950316968892822, "grad_norm": 2.725687197283902, "learning_rate": 2.695298591330908e-06, "loss": 1.2911, "step": 64270 }, { "epoch": 1.8951791242812914, "grad_norm": 2.68407763615635, "learning_rate": 2.6946812968598343e-06, "loss": 1.3074, "step": 64275 }, { "epoch": 1.8953265516733009, "grad_norm": 2.781157769318069, "learning_rate": 2.6940640334427196e-06, "loss": 1.2713, "step": 64280 }, { "epoch": 1.8954739790653103, "grad_norm": 2.7938308644827323, "learning_rate": 2.693446801097725e-06, "loss": 1.2541, "step": 64285 }, { "epoch": 1.8956214064573198, "grad_norm": 2.7752682556462474, "learning_rate": 2.692829599843016e-06, "loss": 1.2519, "step": 64290 }, { "epoch": 1.8957688338493293, "grad_norm": 2.80955562918567, "learning_rate": 2.6922124296967523e-06, "loss": 1.231, "step": 64295 }, { "epoch": 1.8959162612413385, "grad_norm": 2.9024608595395494, "learning_rate": 2.6915952906770964e-06, "loss": 1.3171, "step": 64300 }, { "epoch": 1.896063688633348, "grad_norm": 2.8619236643946793, "learning_rate": 2.6909781828022067e-06, "loss": 1.325, "step": 64305 }, { "epoch": 1.8962111160253574, "grad_norm": 2.891204486828326, "learning_rate": 2.6903611060902444e-06, "loss": 1.2396, "step": 64310 }, { "epoch": 1.8963585434173669, "grad_norm": 2.823093569787529, "learning_rate": 2.6897440605593657e-06, "loss": 1.3012, "step": 64315 }, { "epoch": 1.8965059708093763, "grad_norm": 2.7095212864762113, "learning_rate": 2.68912704622773e-06, "loss": 1.3066, "step": 64320 }, { "epoch": 1.8966533982013858, "grad_norm": 2.785052020513448, "learning_rate": 2.688510063113492e-06, "loss": 1.2717, "step": 64325 }, { "epoch": 1.8968008255933952, "grad_norm": 2.8771478303455718, "learning_rate": 2.687893111234806e-06, "loss": 1.333, "step": 64330 }, { "epoch": 1.8969482529854047, "grad_norm": 2.7513134746521843, "learning_rate": 2.687276190609829e-06, "loss": 1.2609, "step": 64335 }, { "epoch": 1.8970956803774142, "grad_norm": 2.8378968721042783, "learning_rate": 2.6866593012567125e-06, "loss": 1.2769, "step": 64340 }, { "epoch": 1.8972431077694236, "grad_norm": 2.836966225933698, "learning_rate": 2.686042443193612e-06, "loss": 1.3325, "step": 64345 }, { "epoch": 1.897390535161433, "grad_norm": 2.868062101799177, "learning_rate": 2.685425616438676e-06, "loss": 1.2864, "step": 64350 }, { "epoch": 1.8975379625534425, "grad_norm": 2.917292505502684, "learning_rate": 2.6848088210100583e-06, "loss": 1.2549, "step": 64355 }, { "epoch": 1.897685389945452, "grad_norm": 2.826524935067174, "learning_rate": 2.684192056925906e-06, "loss": 1.2676, "step": 64360 }, { "epoch": 1.8978328173374615, "grad_norm": 2.772839321447882, "learning_rate": 2.6835753242043707e-06, "loss": 1.3218, "step": 64365 }, { "epoch": 1.8979802447294707, "grad_norm": 2.8640378641570314, "learning_rate": 2.6829586228635984e-06, "loss": 1.3002, "step": 64370 }, { "epoch": 1.8981276721214801, "grad_norm": 2.8199530876106, "learning_rate": 2.682341952921738e-06, "loss": 1.2608, "step": 64375 }, { "epoch": 1.8982750995134896, "grad_norm": 2.847792002127042, "learning_rate": 2.681725314396933e-06, "loss": 1.3088, "step": 64380 }, { "epoch": 1.898422526905499, "grad_norm": 2.9259769133428084, "learning_rate": 2.681108707307332e-06, "loss": 1.3516, "step": 64385 }, { "epoch": 1.8985699542975085, "grad_norm": 2.8467779371231665, "learning_rate": 2.680492131671078e-06, "loss": 1.292, "step": 64390 }, { "epoch": 1.8987173816895178, "grad_norm": 2.8504733330549112, "learning_rate": 2.6798755875063123e-06, "loss": 1.2305, "step": 64395 }, { "epoch": 1.8988648090815272, "grad_norm": 2.748289238099769, "learning_rate": 2.679259074831182e-06, "loss": 1.2444, "step": 64400 }, { "epoch": 1.8990122364735367, "grad_norm": 2.8566117365618124, "learning_rate": 2.678642593663824e-06, "loss": 1.2681, "step": 64405 }, { "epoch": 1.8991596638655461, "grad_norm": 2.974076884084325, "learning_rate": 2.678026144022383e-06, "loss": 1.2823, "step": 64410 }, { "epoch": 1.8993070912575556, "grad_norm": 2.650183079744284, "learning_rate": 2.6774097259249943e-06, "loss": 1.299, "step": 64415 }, { "epoch": 1.899454518649565, "grad_norm": 2.780185363730953, "learning_rate": 2.676793339389801e-06, "loss": 1.3224, "step": 64420 }, { "epoch": 1.8996019460415745, "grad_norm": 2.7472312627520465, "learning_rate": 2.6761769844349385e-06, "loss": 1.2799, "step": 64425 }, { "epoch": 1.899749373433584, "grad_norm": 2.825049587424326, "learning_rate": 2.6755606610785456e-06, "loss": 1.341, "step": 64430 }, { "epoch": 1.8998968008255934, "grad_norm": 2.84454953084455, "learning_rate": 2.6749443693387546e-06, "loss": 1.3256, "step": 64435 }, { "epoch": 1.9000442282176029, "grad_norm": 2.7177535896296967, "learning_rate": 2.6743281092337055e-06, "loss": 1.2811, "step": 64440 }, { "epoch": 1.9001916556096123, "grad_norm": 3.1534577799368457, "learning_rate": 2.6737118807815287e-06, "loss": 1.2876, "step": 64445 }, { "epoch": 1.9003390830016218, "grad_norm": 2.7926472493553254, "learning_rate": 2.6730956840003603e-06, "loss": 1.2573, "step": 64450 }, { "epoch": 1.9004865103936313, "grad_norm": 2.8140266740052025, "learning_rate": 2.6724795189083305e-06, "loss": 1.2832, "step": 64455 }, { "epoch": 1.9006339377856407, "grad_norm": 2.882224406316209, "learning_rate": 2.67186338552357e-06, "loss": 1.3287, "step": 64460 }, { "epoch": 1.9007813651776502, "grad_norm": 2.825020523666445, "learning_rate": 2.6712472838642117e-06, "loss": 1.2574, "step": 64465 }, { "epoch": 1.9009287925696594, "grad_norm": 2.906593370422612, "learning_rate": 2.6706312139483825e-06, "loss": 1.2824, "step": 64470 }, { "epoch": 1.9010762199616689, "grad_norm": 2.657179367086432, "learning_rate": 2.6700151757942136e-06, "loss": 1.2898, "step": 64475 }, { "epoch": 1.9012236473536783, "grad_norm": 2.7296505462361886, "learning_rate": 2.6693991694198315e-06, "loss": 1.2708, "step": 64480 }, { "epoch": 1.9013710747456878, "grad_norm": 2.9074763659800893, "learning_rate": 2.6687831948433623e-06, "loss": 1.338, "step": 64485 }, { "epoch": 1.901518502137697, "grad_norm": 2.8667410842031256, "learning_rate": 2.6681672520829317e-06, "loss": 1.2957, "step": 64490 }, { "epoch": 1.9016659295297065, "grad_norm": 2.7260646383974225, "learning_rate": 2.6675513411566664e-06, "loss": 1.2715, "step": 64495 }, { "epoch": 1.901813356921716, "grad_norm": 2.8697745050582837, "learning_rate": 2.6669354620826873e-06, "loss": 1.3103, "step": 64500 }, { "epoch": 1.901813356921716, "eval_loss": 1.0659339427947998, "eval_runtime": 4.1469, "eval_samples_per_second": 95.494, "eval_steps_per_second": 3.135, "step": 64500 }, { "epoch": 1.9019607843137254, "grad_norm": 2.763422855836147, "learning_rate": 2.6663196148791207e-06, "loss": 1.2982, "step": 64505 }, { "epoch": 1.9021082117057349, "grad_norm": 2.7682278794333435, "learning_rate": 2.665703799564085e-06, "loss": 1.2752, "step": 64510 }, { "epoch": 1.9022556390977443, "grad_norm": 2.8344778474871584, "learning_rate": 2.665088016155705e-06, "loss": 1.2582, "step": 64515 }, { "epoch": 1.9024030664897538, "grad_norm": 2.851267678820628, "learning_rate": 2.6644722646720985e-06, "loss": 1.2883, "step": 64520 }, { "epoch": 1.9025504938817632, "grad_norm": 2.8348424066115383, "learning_rate": 2.663856545131383e-06, "loss": 1.3192, "step": 64525 }, { "epoch": 1.9026979212737727, "grad_norm": 2.8514667418411745, "learning_rate": 2.6632408575516815e-06, "loss": 1.2877, "step": 64530 }, { "epoch": 1.9028453486657821, "grad_norm": 2.848977963021052, "learning_rate": 2.662625201951108e-06, "loss": 1.3234, "step": 64535 }, { "epoch": 1.9029927760577916, "grad_norm": 2.9340415126668105, "learning_rate": 2.6620095783477798e-06, "loss": 1.2626, "step": 64540 }, { "epoch": 1.903140203449801, "grad_norm": 2.727654194873751, "learning_rate": 2.6613939867598106e-06, "loss": 1.2688, "step": 64545 }, { "epoch": 1.9032876308418105, "grad_norm": 2.7650040442139057, "learning_rate": 2.660778427205318e-06, "loss": 1.2693, "step": 64550 }, { "epoch": 1.90343505823382, "grad_norm": 2.7889246781764943, "learning_rate": 2.6601628997024132e-06, "loss": 1.311, "step": 64555 }, { "epoch": 1.9035824856258294, "grad_norm": 2.863789454194863, "learning_rate": 2.6595474042692105e-06, "loss": 1.3151, "step": 64560 }, { "epoch": 1.9037299130178387, "grad_norm": 2.864949807976846, "learning_rate": 2.658931940923819e-06, "loss": 1.2647, "step": 64565 }, { "epoch": 1.9038773404098481, "grad_norm": 2.715996368360686, "learning_rate": 2.6583165096843523e-06, "loss": 1.2705, "step": 64570 }, { "epoch": 1.9040247678018576, "grad_norm": 2.703499748526089, "learning_rate": 2.6577011105689186e-06, "loss": 1.2347, "step": 64575 }, { "epoch": 1.904172195193867, "grad_norm": 2.7994918713105745, "learning_rate": 2.6570857435956276e-06, "loss": 1.3451, "step": 64580 }, { "epoch": 1.9043196225858765, "grad_norm": 2.9397652655861575, "learning_rate": 2.656470408782586e-06, "loss": 1.3225, "step": 64585 }, { "epoch": 1.9044670499778857, "grad_norm": 2.8641309194193765, "learning_rate": 2.6558551061479003e-06, "loss": 1.2975, "step": 64590 }, { "epoch": 1.9046144773698952, "grad_norm": 2.808459429117193, "learning_rate": 2.6552398357096787e-06, "loss": 1.3333, "step": 64595 }, { "epoch": 1.9047619047619047, "grad_norm": 2.812687889646886, "learning_rate": 2.654624597486024e-06, "loss": 1.2952, "step": 64600 }, { "epoch": 1.9049093321539141, "grad_norm": 2.615407209543962, "learning_rate": 2.6540093914950427e-06, "loss": 1.2934, "step": 64605 }, { "epoch": 1.9050567595459236, "grad_norm": 2.8619407169130384, "learning_rate": 2.653394217754835e-06, "loss": 1.2641, "step": 64610 }, { "epoch": 1.905204186937933, "grad_norm": 2.8054130162660607, "learning_rate": 2.652779076283506e-06, "loss": 1.3073, "step": 64615 }, { "epoch": 1.9053516143299425, "grad_norm": 2.785873643329788, "learning_rate": 2.652163967099154e-06, "loss": 1.2808, "step": 64620 }, { "epoch": 1.905499041721952, "grad_norm": 2.7601356499318292, "learning_rate": 2.651548890219883e-06, "loss": 1.2656, "step": 64625 }, { "epoch": 1.9056464691139614, "grad_norm": 2.813314770946709, "learning_rate": 2.650933845663789e-06, "loss": 1.3417, "step": 64630 }, { "epoch": 1.9057938965059709, "grad_norm": 2.7549412601786547, "learning_rate": 2.650318833448972e-06, "loss": 1.3351, "step": 64635 }, { "epoch": 1.9059413238979803, "grad_norm": 2.792673332816737, "learning_rate": 2.649703853593528e-06, "loss": 1.2875, "step": 64640 }, { "epoch": 1.9060887512899898, "grad_norm": 2.8659517314106426, "learning_rate": 2.649088906115555e-06, "loss": 1.266, "step": 64645 }, { "epoch": 1.9062361786819992, "grad_norm": 2.828068460873399, "learning_rate": 2.6484739910331494e-06, "loss": 1.2983, "step": 64650 }, { "epoch": 1.9063836060740087, "grad_norm": 2.8019885433046907, "learning_rate": 2.6478591083644025e-06, "loss": 1.3264, "step": 64655 }, { "epoch": 1.906531033466018, "grad_norm": 2.7472972280670196, "learning_rate": 2.6472442581274108e-06, "loss": 1.2844, "step": 64660 }, { "epoch": 1.9066784608580274, "grad_norm": 2.90592595971358, "learning_rate": 2.6466294403402657e-06, "loss": 1.3298, "step": 64665 }, { "epoch": 1.9068258882500368, "grad_norm": 2.738540787499977, "learning_rate": 2.6460146550210603e-06, "loss": 1.2577, "step": 64670 }, { "epoch": 1.9069733156420463, "grad_norm": 2.6680636910362323, "learning_rate": 2.645399902187882e-06, "loss": 1.3041, "step": 64675 }, { "epoch": 1.9071207430340558, "grad_norm": 2.934999821951222, "learning_rate": 2.644785181858825e-06, "loss": 1.2881, "step": 64680 }, { "epoch": 1.907268170426065, "grad_norm": 2.7896752746588365, "learning_rate": 2.644170494051975e-06, "loss": 1.3285, "step": 64685 }, { "epoch": 1.9074155978180745, "grad_norm": 2.8563998802690485, "learning_rate": 2.6435558387854215e-06, "loss": 1.2849, "step": 64690 }, { "epoch": 1.907563025210084, "grad_norm": 2.9766096093563235, "learning_rate": 2.64294121607725e-06, "loss": 1.2836, "step": 64695 }, { "epoch": 1.9077104526020934, "grad_norm": 2.8579885157516904, "learning_rate": 2.642326625945549e-06, "loss": 1.2991, "step": 64700 }, { "epoch": 1.9078578799941028, "grad_norm": 2.7640787587648132, "learning_rate": 2.6417120684084005e-06, "loss": 1.2827, "step": 64705 }, { "epoch": 1.9080053073861123, "grad_norm": 2.8282681525449056, "learning_rate": 2.641097543483891e-06, "loss": 1.2775, "step": 64710 }, { "epoch": 1.9081527347781218, "grad_norm": 2.752604456106736, "learning_rate": 2.6404830511901017e-06, "loss": 1.2964, "step": 64715 }, { "epoch": 1.9083001621701312, "grad_norm": 2.7289048177709985, "learning_rate": 2.639868591545115e-06, "loss": 1.2674, "step": 64720 }, { "epoch": 1.9084475895621407, "grad_norm": 2.73510626480194, "learning_rate": 2.6392541645670136e-06, "loss": 1.3105, "step": 64725 }, { "epoch": 1.9085950169541501, "grad_norm": 3.0011689591588704, "learning_rate": 2.638639770273876e-06, "loss": 1.2842, "step": 64730 }, { "epoch": 1.9087424443461596, "grad_norm": 2.8996571705923766, "learning_rate": 2.638025408683783e-06, "loss": 1.3192, "step": 64735 }, { "epoch": 1.908889871738169, "grad_norm": 2.643349648258736, "learning_rate": 2.6374110798148114e-06, "loss": 1.2733, "step": 64740 }, { "epoch": 1.9090372991301785, "grad_norm": 2.790100418670685, "learning_rate": 2.6367967836850398e-06, "loss": 1.3137, "step": 64745 }, { "epoch": 1.909184726522188, "grad_norm": 2.7876568472241887, "learning_rate": 2.636182520312543e-06, "loss": 1.2823, "step": 64750 }, { "epoch": 1.9093321539141974, "grad_norm": 2.7065571577711554, "learning_rate": 2.635568289715399e-06, "loss": 1.2782, "step": 64755 }, { "epoch": 1.9094795813062067, "grad_norm": 2.7050135748755517, "learning_rate": 2.634954091911679e-06, "loss": 1.2569, "step": 64760 }, { "epoch": 1.909627008698216, "grad_norm": 2.8137348157012836, "learning_rate": 2.63433992691946e-06, "loss": 1.2858, "step": 64765 }, { "epoch": 1.9097744360902256, "grad_norm": 2.7932608924154856, "learning_rate": 2.63372579475681e-06, "loss": 1.2879, "step": 64770 }, { "epoch": 1.909921863482235, "grad_norm": 2.9022085486589533, "learning_rate": 2.633111695441805e-06, "loss": 1.2933, "step": 64775 }, { "epoch": 1.9100692908742445, "grad_norm": 2.7317087399282993, "learning_rate": 2.632497628992514e-06, "loss": 1.2947, "step": 64780 }, { "epoch": 1.9102167182662537, "grad_norm": 2.820058863957512, "learning_rate": 2.631883595427004e-06, "loss": 1.3069, "step": 64785 }, { "epoch": 1.9103641456582632, "grad_norm": 2.697146921673487, "learning_rate": 2.6312695947633488e-06, "loss": 1.286, "step": 64790 }, { "epoch": 1.9105115730502726, "grad_norm": 2.8659156248219673, "learning_rate": 2.630655627019612e-06, "loss": 1.2787, "step": 64795 }, { "epoch": 1.910659000442282, "grad_norm": 2.764111382028687, "learning_rate": 2.6300416922138617e-06, "loss": 1.2549, "step": 64800 }, { "epoch": 1.9108064278342916, "grad_norm": 2.7735457674974153, "learning_rate": 2.6294277903641623e-06, "loss": 1.2932, "step": 64805 }, { "epoch": 1.910953855226301, "grad_norm": 2.8199268465501444, "learning_rate": 2.6288139214885814e-06, "loss": 1.2812, "step": 64810 }, { "epoch": 1.9111012826183105, "grad_norm": 2.9483308294507786, "learning_rate": 2.6282000856051804e-06, "loss": 1.2973, "step": 64815 }, { "epoch": 1.91124871001032, "grad_norm": 2.8414140463118565, "learning_rate": 2.627586282732023e-06, "loss": 1.2255, "step": 64820 }, { "epoch": 1.9113961374023294, "grad_norm": 2.824967859578657, "learning_rate": 2.6269725128871703e-06, "loss": 1.3279, "step": 64825 }, { "epoch": 1.9115435647943388, "grad_norm": 2.7021605888742832, "learning_rate": 2.6263587760886847e-06, "loss": 1.2946, "step": 64830 }, { "epoch": 1.9116909921863483, "grad_norm": 2.842401158851616, "learning_rate": 2.6257450723546245e-06, "loss": 1.3173, "step": 64835 }, { "epoch": 1.9118384195783578, "grad_norm": 2.8828733066532664, "learning_rate": 2.62513140170305e-06, "loss": 1.3227, "step": 64840 }, { "epoch": 1.9119858469703672, "grad_norm": 2.8273212021669316, "learning_rate": 2.6245177641520184e-06, "loss": 1.2582, "step": 64845 }, { "epoch": 1.9121332743623767, "grad_norm": 2.83086683976932, "learning_rate": 2.6239041597195858e-06, "loss": 1.2654, "step": 64850 }, { "epoch": 1.912280701754386, "grad_norm": 2.7794098754811287, "learning_rate": 2.62329058842381e-06, "loss": 1.243, "step": 64855 }, { "epoch": 1.9124281291463954, "grad_norm": 2.868816283205227, "learning_rate": 2.622677050282744e-06, "loss": 1.2873, "step": 64860 }, { "epoch": 1.9125755565384048, "grad_norm": 2.8860410254114957, "learning_rate": 2.6220635453144445e-06, "loss": 1.2898, "step": 64865 }, { "epoch": 1.9127229839304143, "grad_norm": 2.8558773876882633, "learning_rate": 2.6214500735369624e-06, "loss": 1.2947, "step": 64870 }, { "epoch": 1.9128704113224237, "grad_norm": 2.7938152509332905, "learning_rate": 2.6208366349683514e-06, "loss": 1.276, "step": 64875 }, { "epoch": 1.913017838714433, "grad_norm": 2.9879447293664247, "learning_rate": 2.6202232296266598e-06, "loss": 1.3046, "step": 64880 }, { "epoch": 1.9131652661064424, "grad_norm": 2.757090690095264, "learning_rate": 2.619609857529942e-06, "loss": 1.2674, "step": 64885 }, { "epoch": 1.913312693498452, "grad_norm": 2.9603881351369474, "learning_rate": 2.618996518696244e-06, "loss": 1.3459, "step": 64890 }, { "epoch": 1.9134601208904614, "grad_norm": 2.7595830755932425, "learning_rate": 2.618383213143615e-06, "loss": 1.2942, "step": 64895 }, { "epoch": 1.9136075482824708, "grad_norm": 2.797288299479574, "learning_rate": 2.6177699408901016e-06, "loss": 1.2922, "step": 64900 }, { "epoch": 1.9137549756744803, "grad_norm": 2.7930692509071875, "learning_rate": 2.617156701953751e-06, "loss": 1.3323, "step": 64905 }, { "epoch": 1.9139024030664897, "grad_norm": 2.835907115098265, "learning_rate": 2.616543496352609e-06, "loss": 1.2801, "step": 64910 }, { "epoch": 1.9140498304584992, "grad_norm": 2.9280153881603503, "learning_rate": 2.6159303241047167e-06, "loss": 1.3236, "step": 64915 }, { "epoch": 1.9141972578505086, "grad_norm": 2.7847477758783494, "learning_rate": 2.6153171852281214e-06, "loss": 1.286, "step": 64920 }, { "epoch": 1.914344685242518, "grad_norm": 2.8023601341989703, "learning_rate": 2.614704079740863e-06, "loss": 1.263, "step": 64925 }, { "epoch": 1.9144921126345276, "grad_norm": 2.7377984581510453, "learning_rate": 2.614091007660984e-06, "loss": 1.2775, "step": 64930 }, { "epoch": 1.914639540026537, "grad_norm": 2.946683715856267, "learning_rate": 2.6134779690065226e-06, "loss": 1.2863, "step": 64935 }, { "epoch": 1.9147869674185465, "grad_norm": 2.781121604091494, "learning_rate": 2.612864963795521e-06, "loss": 1.3104, "step": 64940 }, { "epoch": 1.914934394810556, "grad_norm": 2.7875757303665667, "learning_rate": 2.612251992046016e-06, "loss": 1.2752, "step": 64945 }, { "epoch": 1.9150818222025654, "grad_norm": 2.69500030599431, "learning_rate": 2.611639053776046e-06, "loss": 1.3043, "step": 64950 }, { "epoch": 1.9152292495945746, "grad_norm": 2.646393779709174, "learning_rate": 2.611026149003645e-06, "loss": 1.2876, "step": 64955 }, { "epoch": 1.915376676986584, "grad_norm": 2.618882379426733, "learning_rate": 2.610413277746852e-06, "loss": 1.3074, "step": 64960 }, { "epoch": 1.9155241043785936, "grad_norm": 2.6748132807987113, "learning_rate": 2.6098004400236986e-06, "loss": 1.2717, "step": 64965 }, { "epoch": 1.915671531770603, "grad_norm": 2.750573284312209, "learning_rate": 2.60918763585222e-06, "loss": 1.2705, "step": 64970 }, { "epoch": 1.9158189591626125, "grad_norm": 2.7862851820548524, "learning_rate": 2.6085748652504474e-06, "loss": 1.3055, "step": 64975 }, { "epoch": 1.9159663865546217, "grad_norm": 2.7301059759757016, "learning_rate": 2.6079621282364117e-06, "loss": 1.3138, "step": 64980 }, { "epoch": 1.9161138139466312, "grad_norm": 2.8587933220999107, "learning_rate": 2.607349424828146e-06, "loss": 1.3104, "step": 64985 }, { "epoch": 1.9162612413386406, "grad_norm": 2.886452212841709, "learning_rate": 2.606736755043677e-06, "loss": 1.2967, "step": 64990 }, { "epoch": 1.91640866873065, "grad_norm": 2.8416500278249663, "learning_rate": 2.6061241189010355e-06, "loss": 1.2884, "step": 64995 }, { "epoch": 1.9165560961226595, "grad_norm": 2.7404995892774298, "learning_rate": 2.6055115164182472e-06, "loss": 1.2764, "step": 65000 }, { "epoch": 1.9165560961226595, "eval_loss": 1.065126895904541, "eval_runtime": 4.2735, "eval_samples_per_second": 92.664, "eval_steps_per_second": 3.042, "step": 65000 }, { "epoch": 1.916703523514669, "grad_norm": 2.82297762433606, "learning_rate": 2.6048989476133403e-06, "loss": 1.2656, "step": 65005 }, { "epoch": 1.9168509509066785, "grad_norm": 2.954489045558918, "learning_rate": 2.6042864125043383e-06, "loss": 1.2776, "step": 65010 }, { "epoch": 1.916998378298688, "grad_norm": 2.7824820477300127, "learning_rate": 2.6036739111092677e-06, "loss": 1.2881, "step": 65015 }, { "epoch": 1.9171458056906974, "grad_norm": 2.764216242030633, "learning_rate": 2.603061443446151e-06, "loss": 1.2973, "step": 65020 }, { "epoch": 1.9172932330827068, "grad_norm": 2.8473767263566425, "learning_rate": 2.6024490095330116e-06, "loss": 1.251, "step": 65025 }, { "epoch": 1.9174406604747163, "grad_norm": 2.9613154907157955, "learning_rate": 2.601836609387869e-06, "loss": 1.3532, "step": 65030 }, { "epoch": 1.9175880878667257, "grad_norm": 2.805465488597897, "learning_rate": 2.6012242430287467e-06, "loss": 1.2975, "step": 65035 }, { "epoch": 1.9177355152587352, "grad_norm": 2.8172321134288856, "learning_rate": 2.6006119104736625e-06, "loss": 1.2602, "step": 65040 }, { "epoch": 1.9178829426507447, "grad_norm": 2.8340587190258972, "learning_rate": 2.599999611740634e-06, "loss": 1.3218, "step": 65045 }, { "epoch": 1.918030370042754, "grad_norm": 2.7056102444411847, "learning_rate": 2.599387346847682e-06, "loss": 1.3031, "step": 65050 }, { "epoch": 1.9181777974347634, "grad_norm": 2.6994239001858937, "learning_rate": 2.5987751158128196e-06, "loss": 1.2568, "step": 65055 }, { "epoch": 1.9183252248267728, "grad_norm": 2.8963569869068895, "learning_rate": 2.598162918654065e-06, "loss": 1.2802, "step": 65060 }, { "epoch": 1.9184726522187823, "grad_norm": 2.6840136306309232, "learning_rate": 2.597550755389431e-06, "loss": 1.2578, "step": 65065 }, { "epoch": 1.9186200796107917, "grad_norm": 2.8405044298750144, "learning_rate": 2.596938626036933e-06, "loss": 1.2829, "step": 65070 }, { "epoch": 1.918767507002801, "grad_norm": 2.720798427537121, "learning_rate": 2.5963265306145816e-06, "loss": 1.2713, "step": 65075 }, { "epoch": 1.9189149343948104, "grad_norm": 2.769778736149785, "learning_rate": 2.5957144691403905e-06, "loss": 1.3055, "step": 65080 }, { "epoch": 1.9190623617868199, "grad_norm": 2.7518285441558823, "learning_rate": 2.5951024416323675e-06, "loss": 1.3147, "step": 65085 }, { "epoch": 1.9192097891788293, "grad_norm": 2.7623302010697857, "learning_rate": 2.5944904481085256e-06, "loss": 1.2919, "step": 65090 }, { "epoch": 1.9193572165708388, "grad_norm": 2.6775485995262653, "learning_rate": 2.5938784885868708e-06, "loss": 1.2891, "step": 65095 }, { "epoch": 1.9195046439628483, "grad_norm": 2.75721533770992, "learning_rate": 2.5932665630854124e-06, "loss": 1.3106, "step": 65100 }, { "epoch": 1.9196520713548577, "grad_norm": 2.769712880852679, "learning_rate": 2.592654671622156e-06, "loss": 1.3116, "step": 65105 }, { "epoch": 1.9197994987468672, "grad_norm": 2.830555567829339, "learning_rate": 2.5920428142151057e-06, "loss": 1.2721, "step": 65110 }, { "epoch": 1.9199469261388766, "grad_norm": 2.8839973360053057, "learning_rate": 2.59143099088227e-06, "loss": 1.3389, "step": 65115 }, { "epoch": 1.920094353530886, "grad_norm": 2.878758256073415, "learning_rate": 2.590819201641649e-06, "loss": 1.2449, "step": 65120 }, { "epoch": 1.9202417809228955, "grad_norm": 2.859860048202414, "learning_rate": 2.590207446511248e-06, "loss": 1.2653, "step": 65125 }, { "epoch": 1.920389208314905, "grad_norm": 2.8313772464184668, "learning_rate": 2.5895957255090657e-06, "loss": 1.2787, "step": 65130 }, { "epoch": 1.9205366357069145, "grad_norm": 2.69655684478482, "learning_rate": 2.588984038653106e-06, "loss": 1.3621, "step": 65135 }, { "epoch": 1.920684063098924, "grad_norm": 2.8296994353333194, "learning_rate": 2.5883723859613643e-06, "loss": 1.3052, "step": 65140 }, { "epoch": 1.9208314904909334, "grad_norm": 2.7687943596748297, "learning_rate": 2.587760767451844e-06, "loss": 1.3674, "step": 65145 }, { "epoch": 1.9209789178829426, "grad_norm": 2.848200208551086, "learning_rate": 2.5871491831425387e-06, "loss": 1.2826, "step": 65150 }, { "epoch": 1.921126345274952, "grad_norm": 2.692058256515946, "learning_rate": 2.5865376330514477e-06, "loss": 1.2705, "step": 65155 }, { "epoch": 1.9212737726669615, "grad_norm": 2.7911862705203365, "learning_rate": 2.585926117196564e-06, "loss": 1.2768, "step": 65160 }, { "epoch": 1.921421200058971, "grad_norm": 2.909864531285626, "learning_rate": 2.585314635595885e-06, "loss": 1.3045, "step": 65165 }, { "epoch": 1.9215686274509802, "grad_norm": 2.96574037634458, "learning_rate": 2.5847031882674025e-06, "loss": 1.2721, "step": 65170 }, { "epoch": 1.9217160548429897, "grad_norm": 2.7646530924271766, "learning_rate": 2.5840917752291075e-06, "loss": 1.2472, "step": 65175 }, { "epoch": 1.9218634822349991, "grad_norm": 2.7136417000848665, "learning_rate": 2.583480396498995e-06, "loss": 1.3083, "step": 65180 }, { "epoch": 1.9220109096270086, "grad_norm": 2.7812931905992895, "learning_rate": 2.5828690520950535e-06, "loss": 1.2595, "step": 65185 }, { "epoch": 1.922158337019018, "grad_norm": 2.6567793204032077, "learning_rate": 2.582257742035274e-06, "loss": 1.3022, "step": 65190 }, { "epoch": 1.9223057644110275, "grad_norm": 2.8219882116016195, "learning_rate": 2.5816464663376415e-06, "loss": 1.3137, "step": 65195 }, { "epoch": 1.922453191803037, "grad_norm": 2.822785923085315, "learning_rate": 2.581035225020148e-06, "loss": 1.3345, "step": 65200 }, { "epoch": 1.9226006191950464, "grad_norm": 2.774024453133643, "learning_rate": 2.5804240181007766e-06, "loss": 1.3145, "step": 65205 }, { "epoch": 1.922748046587056, "grad_norm": 2.820197077118163, "learning_rate": 2.5798128455975146e-06, "loss": 1.3146, "step": 65210 }, { "epoch": 1.9228954739790654, "grad_norm": 2.766443930542308, "learning_rate": 2.579201707528345e-06, "loss": 1.2996, "step": 65215 }, { "epoch": 1.9230429013710748, "grad_norm": 2.8474786125354483, "learning_rate": 2.578590603911253e-06, "loss": 1.2471, "step": 65220 }, { "epoch": 1.9231903287630843, "grad_norm": 2.6922786710079643, "learning_rate": 2.5779795347642196e-06, "loss": 1.2925, "step": 65225 }, { "epoch": 1.9233377561550937, "grad_norm": 2.8261727211540584, "learning_rate": 2.5773685001052275e-06, "loss": 1.2484, "step": 65230 }, { "epoch": 1.9234851835471032, "grad_norm": 2.82167412716644, "learning_rate": 2.576757499952256e-06, "loss": 1.2411, "step": 65235 }, { "epoch": 1.9236326109391126, "grad_norm": 2.973121962642761, "learning_rate": 2.576146534323283e-06, "loss": 1.2882, "step": 65240 }, { "epoch": 1.9237800383311219, "grad_norm": 2.6849785047365686, "learning_rate": 2.575535603236291e-06, "loss": 1.2669, "step": 65245 }, { "epoch": 1.9239274657231313, "grad_norm": 2.7518060607545443, "learning_rate": 2.5749247067092526e-06, "loss": 1.2525, "step": 65250 }, { "epoch": 1.9240748931151408, "grad_norm": 2.8393922874933097, "learning_rate": 2.574313844760149e-06, "loss": 1.3389, "step": 65255 }, { "epoch": 1.9242223205071503, "grad_norm": 2.7862280248983518, "learning_rate": 2.5737030174069513e-06, "loss": 1.2665, "step": 65260 }, { "epoch": 1.9243697478991597, "grad_norm": 2.893692307203677, "learning_rate": 2.5730922246676366e-06, "loss": 1.2746, "step": 65265 }, { "epoch": 1.924517175291169, "grad_norm": 2.715550404716354, "learning_rate": 2.5724814665601758e-06, "loss": 1.2416, "step": 65270 }, { "epoch": 1.9246646026831784, "grad_norm": 2.7563481607241367, "learning_rate": 2.5718707431025437e-06, "loss": 1.278, "step": 65275 }, { "epoch": 1.9248120300751879, "grad_norm": 2.857940627004071, "learning_rate": 2.5712600543127097e-06, "loss": 1.2406, "step": 65280 }, { "epoch": 1.9249594574671973, "grad_norm": 2.720875124631842, "learning_rate": 2.5706494002086454e-06, "loss": 1.2905, "step": 65285 }, { "epoch": 1.9251068848592068, "grad_norm": 2.8818748398538787, "learning_rate": 2.5700387808083172e-06, "loss": 1.3011, "step": 65290 }, { "epoch": 1.9252543122512162, "grad_norm": 2.757221967351265, "learning_rate": 2.5694281961296974e-06, "loss": 1.2783, "step": 65295 }, { "epoch": 1.9254017396432257, "grad_norm": 2.7590768925352616, "learning_rate": 2.56881764619075e-06, "loss": 1.2912, "step": 65300 }, { "epoch": 1.9255491670352352, "grad_norm": 2.7257235367404005, "learning_rate": 2.5682071310094414e-06, "loss": 1.277, "step": 65305 }, { "epoch": 1.9256965944272446, "grad_norm": 2.7700799274377204, "learning_rate": 2.567596650603739e-06, "loss": 1.2759, "step": 65310 }, { "epoch": 1.925844021819254, "grad_norm": 2.8165043960588427, "learning_rate": 2.566986204991604e-06, "loss": 1.2784, "step": 65315 }, { "epoch": 1.9259914492112635, "grad_norm": 2.777082598248988, "learning_rate": 2.5663757941910023e-06, "loss": 1.2899, "step": 65320 }, { "epoch": 1.926138876603273, "grad_norm": 2.766911726337405, "learning_rate": 2.565765418219892e-06, "loss": 1.2922, "step": 65325 }, { "epoch": 1.9262863039952824, "grad_norm": 2.8052640411790164, "learning_rate": 2.565155077096239e-06, "loss": 1.2941, "step": 65330 }, { "epoch": 1.926433731387292, "grad_norm": 2.829841948094408, "learning_rate": 2.5645447708379995e-06, "loss": 1.2973, "step": 65335 }, { "epoch": 1.9265811587793011, "grad_norm": 2.85885977511471, "learning_rate": 2.5639344994631348e-06, "loss": 1.3446, "step": 65340 }, { "epoch": 1.9267285861713106, "grad_norm": 2.8471056225774443, "learning_rate": 2.5633242629896e-06, "loss": 1.3573, "step": 65345 }, { "epoch": 1.92687601356332, "grad_norm": 2.848096705341856, "learning_rate": 2.562714061435355e-06, "loss": 1.3262, "step": 65350 }, { "epoch": 1.9270234409553295, "grad_norm": 2.6793911873412934, "learning_rate": 2.5621038948183545e-06, "loss": 1.2467, "step": 65355 }, { "epoch": 1.927170868347339, "grad_norm": 2.7874560746788175, "learning_rate": 2.5614937631565536e-06, "loss": 1.2921, "step": 65360 }, { "epoch": 1.9273182957393482, "grad_norm": 2.8908561644977024, "learning_rate": 2.5608836664679046e-06, "loss": 1.2828, "step": 65365 }, { "epoch": 1.9274657231313577, "grad_norm": 2.6868534633391055, "learning_rate": 2.5602736047703627e-06, "loss": 1.2912, "step": 65370 }, { "epoch": 1.9276131505233671, "grad_norm": 2.8819140442739193, "learning_rate": 2.5596635780818785e-06, "loss": 1.288, "step": 65375 }, { "epoch": 1.9277605779153766, "grad_norm": 2.6782405262604754, "learning_rate": 2.5590535864204017e-06, "loss": 1.3101, "step": 65380 }, { "epoch": 1.927908005307386, "grad_norm": 2.7517600704930625, "learning_rate": 2.5584436298038844e-06, "loss": 1.2523, "step": 65385 }, { "epoch": 1.9280554326993955, "grad_norm": 2.7653563172844877, "learning_rate": 2.557833708250273e-06, "loss": 1.2493, "step": 65390 }, { "epoch": 1.928202860091405, "grad_norm": 2.876834531829885, "learning_rate": 2.5572238217775167e-06, "loss": 1.3168, "step": 65395 }, { "epoch": 1.9283502874834144, "grad_norm": 2.7894701284111885, "learning_rate": 2.5566139704035602e-06, "loss": 1.294, "step": 65400 }, { "epoch": 1.9284977148754239, "grad_norm": 2.8006254636552907, "learning_rate": 2.556004154146352e-06, "loss": 1.3294, "step": 65405 }, { "epoch": 1.9286451422674333, "grad_norm": 2.7966543089162186, "learning_rate": 2.5553943730238338e-06, "loss": 1.2493, "step": 65410 }, { "epoch": 1.9287925696594428, "grad_norm": 2.7651405072243893, "learning_rate": 2.554784627053951e-06, "loss": 1.2645, "step": 65415 }, { "epoch": 1.9289399970514522, "grad_norm": 2.768684426860003, "learning_rate": 2.554174916254644e-06, "loss": 1.2347, "step": 65420 }, { "epoch": 1.9290874244434617, "grad_norm": 2.758356324626932, "learning_rate": 2.5535652406438565e-06, "loss": 1.2972, "step": 65425 }, { "epoch": 1.9292348518354712, "grad_norm": 2.6665825293119747, "learning_rate": 2.5529556002395276e-06, "loss": 1.2465, "step": 65430 }, { "epoch": 1.9293822792274806, "grad_norm": 2.780664744810671, "learning_rate": 2.552345995059598e-06, "loss": 1.2367, "step": 65435 }, { "epoch": 1.9295297066194899, "grad_norm": 2.896725859891843, "learning_rate": 2.551736425122005e-06, "loss": 1.2061, "step": 65440 }, { "epoch": 1.9296771340114993, "grad_norm": 2.8042716623048385, "learning_rate": 2.5511268904446855e-06, "loss": 1.299, "step": 65445 }, { "epoch": 1.9298245614035088, "grad_norm": 2.8529288139319506, "learning_rate": 2.5505173910455768e-06, "loss": 1.2814, "step": 65450 }, { "epoch": 1.9299719887955182, "grad_norm": 2.768877207322321, "learning_rate": 2.5499079269426114e-06, "loss": 1.2758, "step": 65455 }, { "epoch": 1.9301194161875277, "grad_norm": 2.8118370095026646, "learning_rate": 2.5492984981537277e-06, "loss": 1.3062, "step": 65460 }, { "epoch": 1.930266843579537, "grad_norm": 2.7245239908791596, "learning_rate": 2.5486891046968562e-06, "loss": 1.2446, "step": 65465 }, { "epoch": 1.9304142709715464, "grad_norm": 2.9160977166439586, "learning_rate": 2.54807974658993e-06, "loss": 1.2716, "step": 65470 }, { "epoch": 1.9305616983635558, "grad_norm": 2.720008614599181, "learning_rate": 2.5474704238508786e-06, "loss": 1.2445, "step": 65475 }, { "epoch": 1.9307091257555653, "grad_norm": 2.6896566512254005, "learning_rate": 2.546861136497634e-06, "loss": 1.2315, "step": 65480 }, { "epoch": 1.9308565531475748, "grad_norm": 2.7919363237809347, "learning_rate": 2.546251884548124e-06, "loss": 1.2782, "step": 65485 }, { "epoch": 1.9310039805395842, "grad_norm": 2.912674317985812, "learning_rate": 2.5456426680202776e-06, "loss": 1.3152, "step": 65490 }, { "epoch": 1.9311514079315937, "grad_norm": 2.7520369259798008, "learning_rate": 2.5450334869320195e-06, "loss": 1.2383, "step": 65495 }, { "epoch": 1.9312988353236031, "grad_norm": 2.871790378492726, "learning_rate": 2.5444243413012776e-06, "loss": 1.2848, "step": 65500 }, { "epoch": 1.9312988353236031, "eval_loss": 1.063809871673584, "eval_runtime": 4.175, "eval_samples_per_second": 94.85, "eval_steps_per_second": 3.114, "step": 65500 }, { "epoch": 1.9314462627156126, "grad_norm": 2.7381435691874425, "learning_rate": 2.543815231145977e-06, "loss": 1.2792, "step": 65505 }, { "epoch": 1.931593690107622, "grad_norm": 2.8651830575889963, "learning_rate": 2.5432061564840386e-06, "loss": 1.3041, "step": 65510 }, { "epoch": 1.9317411174996315, "grad_norm": 2.5928600225228196, "learning_rate": 2.5425971173333892e-06, "loss": 1.231, "step": 65515 }, { "epoch": 1.931888544891641, "grad_norm": 2.7348516975451598, "learning_rate": 2.541988113711947e-06, "loss": 1.298, "step": 65520 }, { "epoch": 1.9320359722836504, "grad_norm": 2.8604090425424435, "learning_rate": 2.541379145637635e-06, "loss": 1.3001, "step": 65525 }, { "epoch": 1.9321833996756599, "grad_norm": 2.7622776732376444, "learning_rate": 2.5407702131283693e-06, "loss": 1.2603, "step": 65530 }, { "epoch": 1.9323308270676691, "grad_norm": 2.8163621013267295, "learning_rate": 2.5401613162020734e-06, "loss": 1.307, "step": 65535 }, { "epoch": 1.9324782544596786, "grad_norm": 2.789682703247408, "learning_rate": 2.5395524548766616e-06, "loss": 1.2827, "step": 65540 }, { "epoch": 1.932625681851688, "grad_norm": 2.8734886343816233, "learning_rate": 2.5389436291700513e-06, "loss": 1.2822, "step": 65545 }, { "epoch": 1.9327731092436975, "grad_norm": 2.727609667988799, "learning_rate": 2.5383348391001563e-06, "loss": 1.2481, "step": 65550 }, { "epoch": 1.932920536635707, "grad_norm": 2.89954938058057, "learning_rate": 2.5377260846848935e-06, "loss": 1.3756, "step": 65555 }, { "epoch": 1.9330679640277162, "grad_norm": 2.748763950829935, "learning_rate": 2.537117365942174e-06, "loss": 1.3101, "step": 65560 }, { "epoch": 1.9332153914197256, "grad_norm": 2.905539827239795, "learning_rate": 2.5365086828899116e-06, "loss": 1.2917, "step": 65565 }, { "epoch": 1.933362818811735, "grad_norm": 2.883193567026473, "learning_rate": 2.5359000355460173e-06, "loss": 1.2704, "step": 65570 }, { "epoch": 1.9335102462037446, "grad_norm": 2.752752351677662, "learning_rate": 2.5352914239284e-06, "loss": 1.2664, "step": 65575 }, { "epoch": 1.933657673595754, "grad_norm": 2.8823817615657803, "learning_rate": 2.5346828480549704e-06, "loss": 1.258, "step": 65580 }, { "epoch": 1.9338051009877635, "grad_norm": 2.8085706479024353, "learning_rate": 2.534074307943634e-06, "loss": 1.2712, "step": 65585 }, { "epoch": 1.933952528379773, "grad_norm": 2.8894662717751283, "learning_rate": 2.5334658036123016e-06, "loss": 1.3221, "step": 65590 }, { "epoch": 1.9340999557717824, "grad_norm": 2.7593686308358247, "learning_rate": 2.532857335078876e-06, "loss": 1.2995, "step": 65595 }, { "epoch": 1.9342473831637919, "grad_norm": 2.9900594402463314, "learning_rate": 2.5322489023612636e-06, "loss": 1.346, "step": 65600 }, { "epoch": 1.9343948105558013, "grad_norm": 2.958165338980227, "learning_rate": 2.531640505477367e-06, "loss": 1.3276, "step": 65605 }, { "epoch": 1.9345422379478108, "grad_norm": 2.750510037602738, "learning_rate": 2.5310321444450905e-06, "loss": 1.3046, "step": 65610 }, { "epoch": 1.9346896653398202, "grad_norm": 2.769794565488269, "learning_rate": 2.5304238192823344e-06, "loss": 1.3137, "step": 65615 }, { "epoch": 1.9348370927318297, "grad_norm": 2.87667403789354, "learning_rate": 2.5298155300070013e-06, "loss": 1.2991, "step": 65620 }, { "epoch": 1.9349845201238391, "grad_norm": 2.708685071571634, "learning_rate": 2.529207276636988e-06, "loss": 1.3336, "step": 65625 }, { "epoch": 1.9351319475158486, "grad_norm": 2.6882557469412873, "learning_rate": 2.528599059190195e-06, "loss": 1.2784, "step": 65630 }, { "epoch": 1.9352793749078578, "grad_norm": 2.74045078498413, "learning_rate": 2.527990877684521e-06, "loss": 1.236, "step": 65635 }, { "epoch": 1.9354268022998673, "grad_norm": 2.843555699286235, "learning_rate": 2.527382732137858e-06, "loss": 1.285, "step": 65640 }, { "epoch": 1.9355742296918768, "grad_norm": 2.872406091912287, "learning_rate": 2.526774622568107e-06, "loss": 1.2425, "step": 65645 }, { "epoch": 1.9357216570838862, "grad_norm": 2.822201556569963, "learning_rate": 2.526166548993158e-06, "loss": 1.3146, "step": 65650 }, { "epoch": 1.9358690844758957, "grad_norm": 2.7036557756536728, "learning_rate": 2.5255585114309064e-06, "loss": 1.2628, "step": 65655 }, { "epoch": 1.936016511867905, "grad_norm": 2.703414883610819, "learning_rate": 2.524950509899242e-06, "loss": 1.2678, "step": 65660 }, { "epoch": 1.9361639392599144, "grad_norm": 2.915389534551807, "learning_rate": 2.5243425444160595e-06, "loss": 1.2841, "step": 65665 }, { "epoch": 1.9363113666519238, "grad_norm": 2.7807038523419, "learning_rate": 2.5237346149992465e-06, "loss": 1.2452, "step": 65670 }, { "epoch": 1.9364587940439333, "grad_norm": 3.009922363596263, "learning_rate": 2.5231267216666936e-06, "loss": 1.2732, "step": 65675 }, { "epoch": 1.9366062214359427, "grad_norm": 2.8242405212110486, "learning_rate": 2.5225188644362857e-06, "loss": 1.3184, "step": 65680 }, { "epoch": 1.9367536488279522, "grad_norm": 2.838540082748462, "learning_rate": 2.5219110433259144e-06, "loss": 1.2155, "step": 65685 }, { "epoch": 1.9369010762199617, "grad_norm": 2.7272270578782853, "learning_rate": 2.5213032583534606e-06, "loss": 1.2748, "step": 65690 }, { "epoch": 1.9370485036119711, "grad_norm": 2.738549451613473, "learning_rate": 2.520695509536813e-06, "loss": 1.274, "step": 65695 }, { "epoch": 1.9371959310039806, "grad_norm": 2.916598249345975, "learning_rate": 2.520087796893854e-06, "loss": 1.3474, "step": 65700 }, { "epoch": 1.93734335839599, "grad_norm": 2.8163260552774694, "learning_rate": 2.5194801204424655e-06, "loss": 1.2434, "step": 65705 }, { "epoch": 1.9374907857879995, "grad_norm": 2.7510633666833058, "learning_rate": 2.51887248020053e-06, "loss": 1.3262, "step": 65710 }, { "epoch": 1.937638213180009, "grad_norm": 2.7690639448066854, "learning_rate": 2.5182648761859264e-06, "loss": 1.2922, "step": 65715 }, { "epoch": 1.9377856405720184, "grad_norm": 2.741488784337544, "learning_rate": 2.5176573084165363e-06, "loss": 1.3285, "step": 65720 }, { "epoch": 1.9379330679640279, "grad_norm": 2.8565872341669873, "learning_rate": 2.5170497769102368e-06, "loss": 1.248, "step": 65725 }, { "epoch": 1.938080495356037, "grad_norm": 2.7910757364949705, "learning_rate": 2.5164422816849063e-06, "loss": 1.3028, "step": 65730 }, { "epoch": 1.9382279227480466, "grad_norm": 2.8312641870306363, "learning_rate": 2.515834822758418e-06, "loss": 1.3078, "step": 65735 }, { "epoch": 1.938375350140056, "grad_norm": 2.972503918599546, "learning_rate": 2.5152274001486517e-06, "loss": 1.2932, "step": 65740 }, { "epoch": 1.9385227775320655, "grad_norm": 2.7494540980858955, "learning_rate": 2.514620013873478e-06, "loss": 1.231, "step": 65745 }, { "epoch": 1.938670204924075, "grad_norm": 2.997912189644652, "learning_rate": 2.5140126639507715e-06, "loss": 1.3469, "step": 65750 }, { "epoch": 1.9388176323160842, "grad_norm": 2.836940119419417, "learning_rate": 2.5134053503984024e-06, "loss": 1.294, "step": 65755 }, { "epoch": 1.9389650597080936, "grad_norm": 2.833819202742471, "learning_rate": 2.512798073234245e-06, "loss": 1.3247, "step": 65760 }, { "epoch": 1.939112487100103, "grad_norm": 2.776974151482319, "learning_rate": 2.512190832476166e-06, "loss": 1.3062, "step": 65765 }, { "epoch": 1.9392599144921125, "grad_norm": 2.678826861818172, "learning_rate": 2.511583628142034e-06, "loss": 1.2347, "step": 65770 }, { "epoch": 1.939407341884122, "grad_norm": 2.9626525559222814, "learning_rate": 2.5109764602497196e-06, "loss": 1.3378, "step": 65775 }, { "epoch": 1.9395547692761315, "grad_norm": 2.8069136273908795, "learning_rate": 2.5103693288170868e-06, "loss": 1.2758, "step": 65780 }, { "epoch": 1.939702196668141, "grad_norm": 2.838464544244297, "learning_rate": 2.509762233862003e-06, "loss": 1.2673, "step": 65785 }, { "epoch": 1.9398496240601504, "grad_norm": 2.7834083437101014, "learning_rate": 2.50915517540233e-06, "loss": 1.3194, "step": 65790 }, { "epoch": 1.9399970514521598, "grad_norm": 2.879540085428334, "learning_rate": 2.5085481534559344e-06, "loss": 1.3093, "step": 65795 }, { "epoch": 1.9401444788441693, "grad_norm": 2.8431126367996824, "learning_rate": 2.5079411680406762e-06, "loss": 1.269, "step": 65800 }, { "epoch": 1.9402919062361788, "grad_norm": 2.7565932932409805, "learning_rate": 2.507334219174418e-06, "loss": 1.3073, "step": 65805 }, { "epoch": 1.9404393336281882, "grad_norm": 2.7795850762112266, "learning_rate": 2.506727306875018e-06, "loss": 1.284, "step": 65810 }, { "epoch": 1.9405867610201977, "grad_norm": 2.7896881636734143, "learning_rate": 2.506120431160338e-06, "loss": 1.2453, "step": 65815 }, { "epoch": 1.9407341884122071, "grad_norm": 2.978585128884503, "learning_rate": 2.5055135920482336e-06, "loss": 1.2736, "step": 65820 }, { "epoch": 1.9408816158042166, "grad_norm": 2.8380160372588654, "learning_rate": 2.5049067895565645e-06, "loss": 1.2118, "step": 65825 }, { "epoch": 1.9410290431962258, "grad_norm": 2.719851974707099, "learning_rate": 2.5043000237031844e-06, "loss": 1.2333, "step": 65830 }, { "epoch": 1.9411764705882353, "grad_norm": 2.795399896143167, "learning_rate": 2.5036932945059475e-06, "loss": 1.2656, "step": 65835 }, { "epoch": 1.9413238979802447, "grad_norm": 2.78943628540338, "learning_rate": 2.5030866019827096e-06, "loss": 1.2904, "step": 65840 }, { "epoch": 1.9414713253722542, "grad_norm": 2.8699773847795966, "learning_rate": 2.502479946151321e-06, "loss": 1.3092, "step": 65845 }, { "epoch": 1.9416187527642637, "grad_norm": 2.7342136700511253, "learning_rate": 2.501873327029635e-06, "loss": 1.3007, "step": 65850 }, { "epoch": 1.941766180156273, "grad_norm": 2.9109722973048164, "learning_rate": 2.5012667446355027e-06, "loss": 1.2776, "step": 65855 }, { "epoch": 1.9419136075482824, "grad_norm": 2.8196686848610506, "learning_rate": 2.5006601989867712e-06, "loss": 1.3337, "step": 65860 }, { "epoch": 1.9420610349402918, "grad_norm": 2.8411550120828615, "learning_rate": 2.5000536901012894e-06, "loss": 1.2574, "step": 65865 }, { "epoch": 1.9422084623323013, "grad_norm": 2.961992573217159, "learning_rate": 2.4994472179969063e-06, "loss": 1.3259, "step": 65870 }, { "epoch": 1.9423558897243107, "grad_norm": 2.7207654069748437, "learning_rate": 2.4988407826914665e-06, "loss": 1.2641, "step": 65875 }, { "epoch": 1.9425033171163202, "grad_norm": 2.7706447150700835, "learning_rate": 2.4982343842028155e-06, "loss": 1.2856, "step": 65880 }, { "epoch": 1.9426507445083296, "grad_norm": 2.7459738907478926, "learning_rate": 2.497628022548796e-06, "loss": 1.2583, "step": 65885 }, { "epoch": 1.942798171900339, "grad_norm": 2.739138592663123, "learning_rate": 2.4970216977472535e-06, "loss": 1.2432, "step": 65890 }, { "epoch": 1.9429455992923486, "grad_norm": 2.7852546824908213, "learning_rate": 2.4964154098160284e-06, "loss": 1.3302, "step": 65895 }, { "epoch": 1.943093026684358, "grad_norm": 2.796014981507224, "learning_rate": 2.495809158772959e-06, "loss": 1.2959, "step": 65900 }, { "epoch": 1.9432404540763675, "grad_norm": 2.92666327451949, "learning_rate": 2.4952029446358896e-06, "loss": 1.323, "step": 65905 }, { "epoch": 1.943387881468377, "grad_norm": 2.7536549565713324, "learning_rate": 2.494596767422655e-06, "loss": 1.265, "step": 65910 }, { "epoch": 1.9435353088603864, "grad_norm": 2.8246456353774367, "learning_rate": 2.493990627151095e-06, "loss": 1.2405, "step": 65915 }, { "epoch": 1.9436827362523958, "grad_norm": 2.596894630909438, "learning_rate": 2.4933845238390433e-06, "loss": 1.2595, "step": 65920 }, { "epoch": 1.943830163644405, "grad_norm": 2.7364500752377845, "learning_rate": 2.4927784575043393e-06, "loss": 1.236, "step": 65925 }, { "epoch": 1.9439775910364145, "grad_norm": 2.6864459269653533, "learning_rate": 2.4921724281648135e-06, "loss": 1.2747, "step": 65930 }, { "epoch": 1.944125018428424, "grad_norm": 2.801628947773706, "learning_rate": 2.4915664358383008e-06, "loss": 1.2166, "step": 65935 }, { "epoch": 1.9442724458204335, "grad_norm": 2.7104960239078926, "learning_rate": 2.4909604805426315e-06, "loss": 1.2937, "step": 65940 }, { "epoch": 1.944419873212443, "grad_norm": 2.861449198128668, "learning_rate": 2.490354562295639e-06, "loss": 1.2563, "step": 65945 }, { "epoch": 1.9445673006044522, "grad_norm": 2.859051747501799, "learning_rate": 2.4897486811151503e-06, "loss": 1.2433, "step": 65950 }, { "epoch": 1.9447147279964616, "grad_norm": 2.8853592919020747, "learning_rate": 2.4891428370189976e-06, "loss": 1.2708, "step": 65955 }, { "epoch": 1.944862155388471, "grad_norm": 2.7607573169870507, "learning_rate": 2.4885370300250064e-06, "loss": 1.2769, "step": 65960 }, { "epoch": 1.9450095827804805, "grad_norm": 2.7417288050217516, "learning_rate": 2.487931260151003e-06, "loss": 1.2817, "step": 65965 }, { "epoch": 1.94515701017249, "grad_norm": 2.776104750114196, "learning_rate": 2.487325527414814e-06, "loss": 1.2798, "step": 65970 }, { "epoch": 1.9453044375644994, "grad_norm": 2.7204057061744615, "learning_rate": 2.4867198318342613e-06, "loss": 1.2569, "step": 65975 }, { "epoch": 1.945451864956509, "grad_norm": 2.8536707036749847, "learning_rate": 2.486114173427172e-06, "loss": 1.3135, "step": 65980 }, { "epoch": 1.9455992923485184, "grad_norm": 2.8371462553207314, "learning_rate": 2.4855085522113655e-06, "loss": 1.2386, "step": 65985 }, { "epoch": 1.9457467197405278, "grad_norm": 2.8779712833511018, "learning_rate": 2.4849029682046646e-06, "loss": 1.2683, "step": 65990 }, { "epoch": 1.9458941471325373, "grad_norm": 2.830536816055692, "learning_rate": 2.484297421424887e-06, "loss": 1.256, "step": 65995 }, { "epoch": 1.9460415745245467, "grad_norm": 2.7168455550681925, "learning_rate": 2.4836919118898543e-06, "loss": 1.2924, "step": 66000 }, { "epoch": 1.9460415745245467, "eval_loss": 1.0627408027648926, "eval_runtime": 4.2036, "eval_samples_per_second": 94.204, "eval_steps_per_second": 3.093, "step": 66000 }, { "epoch": 1.9461890019165562, "grad_norm": 2.7723324080905325, "learning_rate": 2.483086439617383e-06, "loss": 1.2584, "step": 66005 }, { "epoch": 1.9463364293085657, "grad_norm": 2.734501116553054, "learning_rate": 2.48248100462529e-06, "loss": 1.2666, "step": 66010 }, { "epoch": 1.946483856700575, "grad_norm": 2.860476407834542, "learning_rate": 2.481875606931389e-06, "loss": 1.3344, "step": 66015 }, { "epoch": 1.9466312840925846, "grad_norm": 2.7383570232897005, "learning_rate": 2.481270246553498e-06, "loss": 1.2871, "step": 66020 }, { "epoch": 1.9467787114845938, "grad_norm": 2.8855660589880636, "learning_rate": 2.480664923509429e-06, "loss": 1.2981, "step": 66025 }, { "epoch": 1.9469261388766033, "grad_norm": 2.7382636499120814, "learning_rate": 2.4800596378169923e-06, "loss": 1.284, "step": 66030 }, { "epoch": 1.9470735662686127, "grad_norm": 2.8304135778260306, "learning_rate": 2.479454389494003e-06, "loss": 1.2932, "step": 66035 }, { "epoch": 1.9472209936606222, "grad_norm": 2.75703233860642, "learning_rate": 2.4788491785582677e-06, "loss": 1.3011, "step": 66040 }, { "epoch": 1.9473684210526314, "grad_norm": 2.8179565073887582, "learning_rate": 2.478244005027598e-06, "loss": 1.2235, "step": 66045 }, { "epoch": 1.9475158484446409, "grad_norm": 2.723944735945119, "learning_rate": 2.4776388689197988e-06, "loss": 1.3113, "step": 66050 }, { "epoch": 1.9476632758366503, "grad_norm": 2.8638819792727777, "learning_rate": 2.4770337702526806e-06, "loss": 1.2827, "step": 66055 }, { "epoch": 1.9478107032286598, "grad_norm": 2.7373300756388876, "learning_rate": 2.476428709044046e-06, "loss": 1.3005, "step": 66060 }, { "epoch": 1.9479581306206692, "grad_norm": 2.7953174000871854, "learning_rate": 2.475823685311702e-06, "loss": 1.2843, "step": 66065 }, { "epoch": 1.9481055580126787, "grad_norm": 2.864386908143701, "learning_rate": 2.47521869907345e-06, "loss": 1.2685, "step": 66070 }, { "epoch": 1.9482529854046882, "grad_norm": 2.830979540080651, "learning_rate": 2.474613750347094e-06, "loss": 1.3048, "step": 66075 }, { "epoch": 1.9484004127966976, "grad_norm": 2.8043496179508294, "learning_rate": 2.4740088391504334e-06, "loss": 1.3231, "step": 66080 }, { "epoch": 1.948547840188707, "grad_norm": 2.74383014592694, "learning_rate": 2.4734039655012718e-06, "loss": 1.2488, "step": 66085 }, { "epoch": 1.9486952675807165, "grad_norm": 2.890818749820344, "learning_rate": 2.472799129417406e-06, "loss": 1.3108, "step": 66090 }, { "epoch": 1.948842694972726, "grad_norm": 2.9595217683686292, "learning_rate": 2.4721943309166334e-06, "loss": 1.3246, "step": 66095 }, { "epoch": 1.9489901223647355, "grad_norm": 2.8461836309512276, "learning_rate": 2.4715895700167525e-06, "loss": 1.2907, "step": 66100 }, { "epoch": 1.949137549756745, "grad_norm": 2.7544272276850945, "learning_rate": 2.4709848467355567e-06, "loss": 1.2478, "step": 66105 }, { "epoch": 1.9492849771487544, "grad_norm": 2.7676521025277854, "learning_rate": 2.470380161090844e-06, "loss": 1.2994, "step": 66110 }, { "epoch": 1.9494324045407638, "grad_norm": 2.675062879026979, "learning_rate": 2.4697755131004052e-06, "loss": 1.2846, "step": 66115 }, { "epoch": 1.949579831932773, "grad_norm": 2.8942269248901047, "learning_rate": 2.4691709027820347e-06, "loss": 1.2808, "step": 66120 }, { "epoch": 1.9497272593247825, "grad_norm": 2.8619347653657905, "learning_rate": 2.468566330153521e-06, "loss": 1.2532, "step": 66125 }, { "epoch": 1.949874686716792, "grad_norm": 2.7288668244262717, "learning_rate": 2.4679617952326583e-06, "loss": 1.2325, "step": 66130 }, { "epoch": 1.9500221141088014, "grad_norm": 2.917116553093547, "learning_rate": 2.467357298037233e-06, "loss": 1.3351, "step": 66135 }, { "epoch": 1.950169541500811, "grad_norm": 2.8132358877543058, "learning_rate": 2.4667528385850343e-06, "loss": 1.2774, "step": 66140 }, { "epoch": 1.9503169688928201, "grad_norm": 2.9349694311390557, "learning_rate": 2.466148416893847e-06, "loss": 1.3023, "step": 66145 }, { "epoch": 1.9504643962848296, "grad_norm": 2.8830644082768218, "learning_rate": 2.4655440329814605e-06, "loss": 1.3038, "step": 66150 }, { "epoch": 1.950611823676839, "grad_norm": 2.8064302100925103, "learning_rate": 2.464939686865657e-06, "loss": 1.2466, "step": 66155 }, { "epoch": 1.9507592510688485, "grad_norm": 2.9169535614787248, "learning_rate": 2.4643353785642193e-06, "loss": 1.2952, "step": 66160 }, { "epoch": 1.950906678460858, "grad_norm": 2.7536834186711947, "learning_rate": 2.4637311080949333e-06, "loss": 1.272, "step": 66165 }, { "epoch": 1.9510541058528674, "grad_norm": 2.7724413419034204, "learning_rate": 2.4631268754755768e-06, "loss": 1.3045, "step": 66170 }, { "epoch": 1.9512015332448769, "grad_norm": 2.756359839793278, "learning_rate": 2.4625226807239326e-06, "loss": 1.2868, "step": 66175 }, { "epoch": 1.9513489606368863, "grad_norm": 2.886269012924384, "learning_rate": 2.4619185238577763e-06, "loss": 1.2872, "step": 66180 }, { "epoch": 1.9514963880288958, "grad_norm": 2.8009251858049424, "learning_rate": 2.461314404894891e-06, "loss": 1.2614, "step": 66185 }, { "epoch": 1.9516438154209053, "grad_norm": 2.953169094729955, "learning_rate": 2.4607103238530493e-06, "loss": 1.2929, "step": 66190 }, { "epoch": 1.9517912428129147, "grad_norm": 2.7904062978170296, "learning_rate": 2.4601062807500298e-06, "loss": 1.2733, "step": 66195 }, { "epoch": 1.9519386702049242, "grad_norm": 2.9146700049066645, "learning_rate": 2.4595022756036035e-06, "loss": 1.3397, "step": 66200 }, { "epoch": 1.9520860975969336, "grad_norm": 2.7311726413994215, "learning_rate": 2.4588983084315485e-06, "loss": 1.2738, "step": 66205 }, { "epoch": 1.952233524988943, "grad_norm": 2.8539184388950725, "learning_rate": 2.4582943792516335e-06, "loss": 1.3075, "step": 66210 }, { "epoch": 1.9523809523809523, "grad_norm": 2.7914306925508123, "learning_rate": 2.4576904880816324e-06, "loss": 1.3347, "step": 66215 }, { "epoch": 1.9525283797729618, "grad_norm": 2.888178465207022, "learning_rate": 2.4570866349393152e-06, "loss": 1.24, "step": 66220 }, { "epoch": 1.9526758071649712, "grad_norm": 2.73135605323296, "learning_rate": 2.4564828198424485e-06, "loss": 1.2763, "step": 66225 }, { "epoch": 1.9528232345569807, "grad_norm": 2.75453302058912, "learning_rate": 2.4558790428088034e-06, "loss": 1.3344, "step": 66230 }, { "epoch": 1.9529706619489902, "grad_norm": 2.834914380385773, "learning_rate": 2.455275303856143e-06, "loss": 1.2785, "step": 66235 }, { "epoch": 1.9531180893409994, "grad_norm": 2.7210944378648825, "learning_rate": 2.4546716030022374e-06, "loss": 1.2618, "step": 66240 }, { "epoch": 1.9532655167330089, "grad_norm": 2.919599581923205, "learning_rate": 2.454067940264848e-06, "loss": 1.3274, "step": 66245 }, { "epoch": 1.9534129441250183, "grad_norm": 2.799891638854024, "learning_rate": 2.4534643156617396e-06, "loss": 1.3348, "step": 66250 }, { "epoch": 1.9535603715170278, "grad_norm": 2.761202653419381, "learning_rate": 2.4528607292106728e-06, "loss": 1.3368, "step": 66255 }, { "epoch": 1.9537077989090372, "grad_norm": 2.6864190041707423, "learning_rate": 2.4522571809294123e-06, "loss": 1.2416, "step": 66260 }, { "epoch": 1.9538552263010467, "grad_norm": 3.0050414853508847, "learning_rate": 2.4516536708357145e-06, "loss": 1.3055, "step": 66265 }, { "epoch": 1.9540026536930561, "grad_norm": 2.7926616678467076, "learning_rate": 2.4510501989473412e-06, "loss": 1.2233, "step": 66270 }, { "epoch": 1.9541500810850656, "grad_norm": 2.7957147523839048, "learning_rate": 2.450446765282048e-06, "loss": 1.276, "step": 66275 }, { "epoch": 1.954297508477075, "grad_norm": 2.6536469452709808, "learning_rate": 2.449843369857594e-06, "loss": 1.2448, "step": 66280 }, { "epoch": 1.9544449358690845, "grad_norm": 2.986765982539436, "learning_rate": 2.449240012691733e-06, "loss": 1.2981, "step": 66285 }, { "epoch": 1.954592363261094, "grad_norm": 2.8419907252214913, "learning_rate": 2.4486366938022183e-06, "loss": 1.2401, "step": 66290 }, { "epoch": 1.9547397906531034, "grad_norm": 2.6992803875932974, "learning_rate": 2.4480334132068066e-06, "loss": 1.2586, "step": 66295 }, { "epoch": 1.954887218045113, "grad_norm": 2.7452861670038162, "learning_rate": 2.447430170923248e-06, "loss": 1.3317, "step": 66300 }, { "epoch": 1.9550346454371224, "grad_norm": 2.7757719842609063, "learning_rate": 2.4468269669692947e-06, "loss": 1.2839, "step": 66305 }, { "epoch": 1.9551820728291318, "grad_norm": 2.836835840667018, "learning_rate": 2.4462238013626942e-06, "loss": 1.2245, "step": 66310 }, { "epoch": 1.955329500221141, "grad_norm": 2.856740913245933, "learning_rate": 2.4456206741211987e-06, "loss": 1.3037, "step": 66315 }, { "epoch": 1.9554769276131505, "grad_norm": 2.8444739156252608, "learning_rate": 2.4450175852625536e-06, "loss": 1.2506, "step": 66320 }, { "epoch": 1.95562435500516, "grad_norm": 2.832726414262898, "learning_rate": 2.4444145348045066e-06, "loss": 1.3299, "step": 66325 }, { "epoch": 1.9557717823971694, "grad_norm": 2.739986458761614, "learning_rate": 2.4438115227648016e-06, "loss": 1.3113, "step": 66330 }, { "epoch": 1.9559192097891789, "grad_norm": 2.877990094604258, "learning_rate": 2.4432085491611854e-06, "loss": 1.2863, "step": 66335 }, { "epoch": 1.9560666371811881, "grad_norm": 2.6438003313898957, "learning_rate": 2.4426056140113985e-06, "loss": 1.2395, "step": 66340 }, { "epoch": 1.9562140645731976, "grad_norm": 2.7438878158322826, "learning_rate": 2.442002717333186e-06, "loss": 1.3118, "step": 66345 }, { "epoch": 1.956361491965207, "grad_norm": 2.7517939947023806, "learning_rate": 2.4413998591442866e-06, "loss": 1.2638, "step": 66350 }, { "epoch": 1.9565089193572165, "grad_norm": 2.8737975763542165, "learning_rate": 2.44079703946244e-06, "loss": 1.338, "step": 66355 }, { "epoch": 1.956656346749226, "grad_norm": 2.8836232982511216, "learning_rate": 2.4401942583053864e-06, "loss": 1.3162, "step": 66360 }, { "epoch": 1.9568037741412354, "grad_norm": 2.915826267203096, "learning_rate": 2.4395915156908605e-06, "loss": 1.2373, "step": 66365 }, { "epoch": 1.9569512015332449, "grad_norm": 3.1090893972719753, "learning_rate": 2.4389888116366025e-06, "loss": 1.3136, "step": 66370 }, { "epoch": 1.9570986289252543, "grad_norm": 2.823342210813918, "learning_rate": 2.438386146160345e-06, "loss": 1.3021, "step": 66375 }, { "epoch": 1.9572460563172638, "grad_norm": 2.6857643139952145, "learning_rate": 2.4377835192798235e-06, "loss": 1.3401, "step": 66380 }, { "epoch": 1.9573934837092732, "grad_norm": 2.7968840990334525, "learning_rate": 2.4371809310127687e-06, "loss": 1.255, "step": 66385 }, { "epoch": 1.9575409111012827, "grad_norm": 2.8842935530728475, "learning_rate": 2.4365783813769156e-06, "loss": 1.2905, "step": 66390 }, { "epoch": 1.9576883384932922, "grad_norm": 2.7596540319765954, "learning_rate": 2.435975870389993e-06, "loss": 1.2493, "step": 66395 }, { "epoch": 1.9578357658853016, "grad_norm": 2.883025002785637, "learning_rate": 2.4353733980697314e-06, "loss": 1.3033, "step": 66400 }, { "epoch": 1.957983193277311, "grad_norm": 2.8315661642406984, "learning_rate": 2.434770964433857e-06, "loss": 1.2557, "step": 66405 }, { "epoch": 1.9581306206693203, "grad_norm": 2.8440002589189612, "learning_rate": 2.4341685695001003e-06, "loss": 1.3011, "step": 66410 }, { "epoch": 1.9582780480613298, "grad_norm": 2.7435705885971777, "learning_rate": 2.4335662132861866e-06, "loss": 1.2591, "step": 66415 }, { "epoch": 1.9584254754533392, "grad_norm": 2.8606654807715612, "learning_rate": 2.4329638958098383e-06, "loss": 1.2475, "step": 66420 }, { "epoch": 1.9585729028453487, "grad_norm": 2.845476523175498, "learning_rate": 2.432361617088783e-06, "loss": 1.2501, "step": 66425 }, { "epoch": 1.9587203302373581, "grad_norm": 2.772092685578986, "learning_rate": 2.43175937714074e-06, "loss": 1.305, "step": 66430 }, { "epoch": 1.9588677576293674, "grad_norm": 2.8441208437341183, "learning_rate": 2.4311571759834344e-06, "loss": 1.3129, "step": 66435 }, { "epoch": 1.9590151850213768, "grad_norm": 2.9242160581182186, "learning_rate": 2.430555013634583e-06, "loss": 1.3273, "step": 66440 }, { "epoch": 1.9591626124133863, "grad_norm": 2.9069995874837518, "learning_rate": 2.4299528901119083e-06, "loss": 1.2384, "step": 66445 }, { "epoch": 1.9593100398053958, "grad_norm": 2.7711588771938875, "learning_rate": 2.429350805433127e-06, "loss": 1.2775, "step": 66450 }, { "epoch": 1.9594574671974052, "grad_norm": 2.8047402533524792, "learning_rate": 2.4287487596159565e-06, "loss": 1.306, "step": 66455 }, { "epoch": 1.9596048945894147, "grad_norm": 2.8162402311688166, "learning_rate": 2.428146752678111e-06, "loss": 1.3483, "step": 66460 }, { "epoch": 1.9597523219814241, "grad_norm": 2.793704604047149, "learning_rate": 2.4275447846373087e-06, "loss": 1.2747, "step": 66465 }, { "epoch": 1.9598997493734336, "grad_norm": 2.709406479160198, "learning_rate": 2.426942855511259e-06, "loss": 1.2511, "step": 66470 }, { "epoch": 1.960047176765443, "grad_norm": 2.9076816671123376, "learning_rate": 2.4263409653176787e-06, "loss": 1.2846, "step": 66475 }, { "epoch": 1.9601946041574525, "grad_norm": 2.76191218678419, "learning_rate": 2.425739114074276e-06, "loss": 1.3015, "step": 66480 }, { "epoch": 1.960342031549462, "grad_norm": 2.7388943762685547, "learning_rate": 2.425137301798762e-06, "loss": 1.2902, "step": 66485 }, { "epoch": 1.9604894589414714, "grad_norm": 3.1270250584717694, "learning_rate": 2.424535528508846e-06, "loss": 1.3195, "step": 66490 }, { "epoch": 1.9606368863334809, "grad_norm": 2.897542731040442, "learning_rate": 2.4239337942222345e-06, "loss": 1.2822, "step": 66495 }, { "epoch": 1.9607843137254903, "grad_norm": 2.7345568876349877, "learning_rate": 2.423332098956637e-06, "loss": 1.2897, "step": 66500 }, { "epoch": 1.9607843137254903, "eval_loss": 1.0616830587387085, "eval_runtime": 4.1802, "eval_samples_per_second": 94.733, "eval_steps_per_second": 3.11, "step": 66500 }, { "epoch": 1.9609317411174998, "grad_norm": 2.682600893034413, "learning_rate": 2.4227304427297564e-06, "loss": 1.3076, "step": 66505 }, { "epoch": 1.961079168509509, "grad_norm": 3.0354771467278763, "learning_rate": 2.422128825559299e-06, "loss": 1.2696, "step": 66510 }, { "epoch": 1.9612265959015185, "grad_norm": 2.7751981042718876, "learning_rate": 2.421527247462965e-06, "loss": 1.2574, "step": 66515 }, { "epoch": 1.961374023293528, "grad_norm": 2.818378680702095, "learning_rate": 2.4209257084584613e-06, "loss": 1.3168, "step": 66520 }, { "epoch": 1.9615214506855374, "grad_norm": 2.749855484708555, "learning_rate": 2.420324208563485e-06, "loss": 1.2579, "step": 66525 }, { "epoch": 1.9616688780775469, "grad_norm": 2.840300135089686, "learning_rate": 2.4197227477957374e-06, "loss": 1.2913, "step": 66530 }, { "epoch": 1.961816305469556, "grad_norm": 2.8855757258441788, "learning_rate": 2.4191213261729158e-06, "loss": 1.2996, "step": 66535 }, { "epoch": 1.9619637328615656, "grad_norm": 2.7050389684294416, "learning_rate": 2.4185199437127205e-06, "loss": 1.2766, "step": 66540 }, { "epoch": 1.962111160253575, "grad_norm": 2.8218382522474834, "learning_rate": 2.417918600432847e-06, "loss": 1.2854, "step": 66545 }, { "epoch": 1.9622585876455845, "grad_norm": 2.7790734539592226, "learning_rate": 2.417317296350987e-06, "loss": 1.2373, "step": 66550 }, { "epoch": 1.962406015037594, "grad_norm": 2.864781438615419, "learning_rate": 2.41671603148484e-06, "loss": 1.2965, "step": 66555 }, { "epoch": 1.9625534424296034, "grad_norm": 2.781865856508178, "learning_rate": 2.4161148058520947e-06, "loss": 1.3042, "step": 66560 }, { "epoch": 1.9627008698216128, "grad_norm": 2.856218223630519, "learning_rate": 2.4155136194704454e-06, "loss": 1.2993, "step": 66565 }, { "epoch": 1.9628482972136223, "grad_norm": 2.8087185106625694, "learning_rate": 2.4149124723575795e-06, "loss": 1.3101, "step": 66570 }, { "epoch": 1.9629957246056318, "grad_norm": 2.8841701800608535, "learning_rate": 2.4143113645311905e-06, "loss": 1.2756, "step": 66575 }, { "epoch": 1.9631431519976412, "grad_norm": 2.8756802460185504, "learning_rate": 2.413710296008964e-06, "loss": 1.3011, "step": 66580 }, { "epoch": 1.9632905793896507, "grad_norm": 2.863741638347058, "learning_rate": 2.4131092668085893e-06, "loss": 1.2824, "step": 66585 }, { "epoch": 1.9634380067816601, "grad_norm": 2.7385111106433233, "learning_rate": 2.4125082769477487e-06, "loss": 1.2353, "step": 66590 }, { "epoch": 1.9635854341736696, "grad_norm": 2.8750665965040243, "learning_rate": 2.4119073264441314e-06, "loss": 1.2885, "step": 66595 }, { "epoch": 1.963732861565679, "grad_norm": 2.769598307240505, "learning_rate": 2.4113064153154175e-06, "loss": 1.2905, "step": 66600 }, { "epoch": 1.9638802889576883, "grad_norm": 2.7989275146850976, "learning_rate": 2.410705543579292e-06, "loss": 1.2812, "step": 66605 }, { "epoch": 1.9640277163496977, "grad_norm": 2.902336499494615, "learning_rate": 2.410104711253435e-06, "loss": 1.2922, "step": 66610 }, { "epoch": 1.9641751437417072, "grad_norm": 2.888841105803894, "learning_rate": 2.409503918355527e-06, "loss": 1.308, "step": 66615 }, { "epoch": 1.9643225711337167, "grad_norm": 2.9752743803549246, "learning_rate": 2.4089031649032467e-06, "loss": 1.3397, "step": 66620 }, { "epoch": 1.9644699985257261, "grad_norm": 2.780710720205088, "learning_rate": 2.4083024509142714e-06, "loss": 1.3046, "step": 66625 }, { "epoch": 1.9646174259177354, "grad_norm": 2.91707074931865, "learning_rate": 2.4077017764062797e-06, "loss": 1.3147, "step": 66630 }, { "epoch": 1.9647648533097448, "grad_norm": 2.827525708702611, "learning_rate": 2.407101141396945e-06, "loss": 1.2766, "step": 66635 }, { "epoch": 1.9649122807017543, "grad_norm": 2.857261001045537, "learning_rate": 2.4065005459039433e-06, "loss": 1.2545, "step": 66640 }, { "epoch": 1.9650597080937637, "grad_norm": 2.7339191709621384, "learning_rate": 2.4058999899449465e-06, "loss": 1.3376, "step": 66645 }, { "epoch": 1.9652071354857732, "grad_norm": 2.9729161403541258, "learning_rate": 2.405299473537628e-06, "loss": 1.2424, "step": 66650 }, { "epoch": 1.9653545628777827, "grad_norm": 2.8949829512671976, "learning_rate": 2.404698996699658e-06, "loss": 1.2843, "step": 66655 }, { "epoch": 1.965501990269792, "grad_norm": 2.8801818487299364, "learning_rate": 2.404098559448706e-06, "loss": 1.2838, "step": 66660 }, { "epoch": 1.9656494176618016, "grad_norm": 2.802418805547889, "learning_rate": 2.4034981618024396e-06, "loss": 1.2548, "step": 66665 }, { "epoch": 1.965796845053811, "grad_norm": 2.8082796277160558, "learning_rate": 2.402897803778529e-06, "loss": 1.2888, "step": 66670 }, { "epoch": 1.9659442724458205, "grad_norm": 2.8845165413499605, "learning_rate": 2.402297485394639e-06, "loss": 1.2705, "step": 66675 }, { "epoch": 1.96609169983783, "grad_norm": 3.044739633039177, "learning_rate": 2.4016972066684323e-06, "loss": 1.263, "step": 66680 }, { "epoch": 1.9662391272298394, "grad_norm": 2.8169271224432375, "learning_rate": 2.4010969676175767e-06, "loss": 1.2703, "step": 66685 }, { "epoch": 1.9663865546218489, "grad_norm": 2.7665039645240825, "learning_rate": 2.4004967682597328e-06, "loss": 1.2946, "step": 66690 }, { "epoch": 1.9665339820138583, "grad_norm": 2.916295965644209, "learning_rate": 2.3998966086125624e-06, "loss": 1.3193, "step": 66695 }, { "epoch": 1.9666814094058678, "grad_norm": 3.4795127477191747, "learning_rate": 2.3992964886937247e-06, "loss": 1.2773, "step": 66700 }, { "epoch": 1.966828836797877, "grad_norm": 3.020588732217859, "learning_rate": 2.398696408520882e-06, "loss": 1.2535, "step": 66705 }, { "epoch": 1.9669762641898865, "grad_norm": 2.753428671509482, "learning_rate": 2.3980963681116897e-06, "loss": 1.2532, "step": 66710 }, { "epoch": 1.967123691581896, "grad_norm": 2.886072185487443, "learning_rate": 2.3974963674838053e-06, "loss": 1.323, "step": 66715 }, { "epoch": 1.9672711189739054, "grad_norm": 2.8972638480215935, "learning_rate": 2.3968964066548845e-06, "loss": 1.2932, "step": 66720 }, { "epoch": 1.9674185463659146, "grad_norm": 2.7529393838699336, "learning_rate": 2.396296485642583e-06, "loss": 1.2191, "step": 66725 }, { "epoch": 1.967565973757924, "grad_norm": 2.80190190311354, "learning_rate": 2.395696604464551e-06, "loss": 1.2614, "step": 66730 }, { "epoch": 1.9677134011499335, "grad_norm": 2.8289630549997447, "learning_rate": 2.3950967631384454e-06, "loss": 1.2951, "step": 66735 }, { "epoch": 1.967860828541943, "grad_norm": 2.7496524100263313, "learning_rate": 2.394496961681914e-06, "loss": 1.3068, "step": 66740 }, { "epoch": 1.9680082559339525, "grad_norm": 2.847326590297118, "learning_rate": 2.3938972001126068e-06, "loss": 1.3052, "step": 66745 }, { "epoch": 1.968155683325962, "grad_norm": 2.79518417676964, "learning_rate": 2.3932974784481744e-06, "loss": 1.2878, "step": 66750 }, { "epoch": 1.9683031107179714, "grad_norm": 2.8685097589952897, "learning_rate": 2.3926977967062603e-06, "loss": 1.3098, "step": 66755 }, { "epoch": 1.9684505381099808, "grad_norm": 2.792743574493318, "learning_rate": 2.392098154904516e-06, "loss": 1.2275, "step": 66760 }, { "epoch": 1.9685979655019903, "grad_norm": 2.7813071226741086, "learning_rate": 2.3914985530605824e-06, "loss": 1.244, "step": 66765 }, { "epoch": 1.9687453928939997, "grad_norm": 2.688328594310336, "learning_rate": 2.3908989911921063e-06, "loss": 1.2767, "step": 66770 }, { "epoch": 1.9688928202860092, "grad_norm": 2.8403022200765395, "learning_rate": 2.3902994693167275e-06, "loss": 1.2845, "step": 66775 }, { "epoch": 1.9690402476780187, "grad_norm": 2.9235565476975007, "learning_rate": 2.389699987452091e-06, "loss": 1.3144, "step": 66780 }, { "epoch": 1.9691876750700281, "grad_norm": 2.860896798212553, "learning_rate": 2.3891005456158348e-06, "loss": 1.2991, "step": 66785 }, { "epoch": 1.9693351024620376, "grad_norm": 2.855784010059742, "learning_rate": 2.3885011438256e-06, "loss": 1.2513, "step": 66790 }, { "epoch": 1.969482529854047, "grad_norm": 2.8681679953579517, "learning_rate": 2.3879017820990223e-06, "loss": 1.2915, "step": 66795 }, { "epoch": 1.9696299572460563, "grad_norm": 2.7301938254595575, "learning_rate": 2.3873024604537416e-06, "loss": 1.2937, "step": 66800 }, { "epoch": 1.9697773846380657, "grad_norm": 2.7566748552841798, "learning_rate": 2.386703178907392e-06, "loss": 1.3227, "step": 66805 }, { "epoch": 1.9699248120300752, "grad_norm": 2.8110474822059848, "learning_rate": 2.386103937477607e-06, "loss": 1.3172, "step": 66810 }, { "epoch": 1.9700722394220846, "grad_norm": 2.894386050418679, "learning_rate": 2.385504736182021e-06, "loss": 1.3005, "step": 66815 }, { "epoch": 1.970219666814094, "grad_norm": 2.8744397331652856, "learning_rate": 2.3849055750382666e-06, "loss": 1.2688, "step": 66820 }, { "epoch": 1.9703670942061033, "grad_norm": 2.615065036750673, "learning_rate": 2.3843064540639752e-06, "loss": 1.2941, "step": 66825 }, { "epoch": 1.9705145215981128, "grad_norm": 2.99689632044169, "learning_rate": 2.3837073732767745e-06, "loss": 1.3049, "step": 66830 }, { "epoch": 1.9706619489901223, "grad_norm": 2.888517868039612, "learning_rate": 2.3831083326942954e-06, "loss": 1.299, "step": 66835 }, { "epoch": 1.9708093763821317, "grad_norm": 2.7733426384876116, "learning_rate": 2.3825093323341643e-06, "loss": 1.2613, "step": 66840 }, { "epoch": 1.9709568037741412, "grad_norm": 2.801228920804406, "learning_rate": 2.381910372214008e-06, "loss": 1.2956, "step": 66845 }, { "epoch": 1.9711042311661506, "grad_norm": 2.7129831702902085, "learning_rate": 2.3813114523514496e-06, "loss": 1.2951, "step": 66850 }, { "epoch": 1.97125165855816, "grad_norm": 2.8254160250033857, "learning_rate": 2.3807125727641163e-06, "loss": 1.2917, "step": 66855 }, { "epoch": 1.9713990859501695, "grad_norm": 2.9179118325534548, "learning_rate": 2.3801137334696273e-06, "loss": 1.2556, "step": 66860 }, { "epoch": 1.971546513342179, "grad_norm": 2.8846611411883187, "learning_rate": 2.379514934485608e-06, "loss": 1.2759, "step": 66865 }, { "epoch": 1.9716939407341885, "grad_norm": 2.8556544728465294, "learning_rate": 2.378916175829676e-06, "loss": 1.2433, "step": 66870 }, { "epoch": 1.971841368126198, "grad_norm": 2.707057595906164, "learning_rate": 2.378317457519451e-06, "loss": 1.2483, "step": 66875 }, { "epoch": 1.9719887955182074, "grad_norm": 2.8891046212299996, "learning_rate": 2.3777187795725512e-06, "loss": 1.2536, "step": 66880 }, { "epoch": 1.9721362229102168, "grad_norm": 2.756063999303918, "learning_rate": 2.3771201420065925e-06, "loss": 1.3045, "step": 66885 }, { "epoch": 1.9722836503022263, "grad_norm": 2.9753814866182218, "learning_rate": 2.3765215448391926e-06, "loss": 1.2462, "step": 66890 }, { "epoch": 1.9724310776942355, "grad_norm": 2.87423381823208, "learning_rate": 2.3759229880879635e-06, "loss": 1.243, "step": 66895 }, { "epoch": 1.972578505086245, "grad_norm": 2.738120608720924, "learning_rate": 2.3753244717705208e-06, "loss": 1.2953, "step": 66900 }, { "epoch": 1.9727259324782545, "grad_norm": 2.8419633619859734, "learning_rate": 2.3747259959044735e-06, "loss": 1.2578, "step": 66905 }, { "epoch": 1.972873359870264, "grad_norm": 2.9706182375600543, "learning_rate": 2.3741275605074353e-06, "loss": 1.2949, "step": 66910 }, { "epoch": 1.9730207872622734, "grad_norm": 2.939088033179954, "learning_rate": 2.3735291655970143e-06, "loss": 1.4101, "step": 66915 }, { "epoch": 1.9731682146542826, "grad_norm": 2.82357604260193, "learning_rate": 2.37293081119082e-06, "loss": 1.3004, "step": 66920 }, { "epoch": 1.973315642046292, "grad_norm": 2.8171930559243332, "learning_rate": 2.3723324973064575e-06, "loss": 1.3102, "step": 66925 }, { "epoch": 1.9734630694383015, "grad_norm": 2.7971454876733683, "learning_rate": 2.3717342239615364e-06, "loss": 1.2461, "step": 66930 }, { "epoch": 1.973610496830311, "grad_norm": 2.8142555387650385, "learning_rate": 2.371135991173659e-06, "loss": 1.2846, "step": 66935 }, { "epoch": 1.9737579242223204, "grad_norm": 2.7648550110209347, "learning_rate": 2.370537798960428e-06, "loss": 1.2673, "step": 66940 }, { "epoch": 1.97390535161433, "grad_norm": 2.6368146869928673, "learning_rate": 2.369939647339449e-06, "loss": 1.1741, "step": 66945 }, { "epoch": 1.9740527790063394, "grad_norm": 2.7575543232564437, "learning_rate": 2.3693415363283216e-06, "loss": 1.2197, "step": 66950 }, { "epoch": 1.9742002063983488, "grad_norm": 2.624252697409444, "learning_rate": 2.368743465944646e-06, "loss": 1.2557, "step": 66955 }, { "epoch": 1.9743476337903583, "grad_norm": 2.7207883576385203, "learning_rate": 2.36814543620602e-06, "loss": 1.2756, "step": 66960 }, { "epoch": 1.9744950611823677, "grad_norm": 2.79286401413435, "learning_rate": 2.367547447130044e-06, "loss": 1.2949, "step": 66965 }, { "epoch": 1.9746424885743772, "grad_norm": 2.7853729423358455, "learning_rate": 2.3669494987343123e-06, "loss": 1.3096, "step": 66970 }, { "epoch": 1.9747899159663866, "grad_norm": 2.7816444481290494, "learning_rate": 2.3663515910364217e-06, "loss": 1.2721, "step": 66975 }, { "epoch": 1.974937343358396, "grad_norm": 2.8271191828352213, "learning_rate": 2.3657537240539638e-06, "loss": 1.3202, "step": 66980 }, { "epoch": 1.9750847707504056, "grad_norm": 2.8614492926015576, "learning_rate": 2.365155897804534e-06, "loss": 1.2544, "step": 66985 }, { "epoch": 1.975232198142415, "grad_norm": 2.8394303856060383, "learning_rate": 2.364558112305723e-06, "loss": 1.259, "step": 66990 }, { "epoch": 1.9753796255344243, "grad_norm": 2.835293318057978, "learning_rate": 2.3639603675751223e-06, "loss": 1.2587, "step": 66995 }, { "epoch": 1.9755270529264337, "grad_norm": 2.6502879727701303, "learning_rate": 2.3633626636303203e-06, "loss": 1.2819, "step": 67000 }, { "epoch": 1.9755270529264337, "eval_loss": 1.0605965852737427, "eval_runtime": 4.1606, "eval_samples_per_second": 95.179, "eval_steps_per_second": 3.125, "step": 67000 }, { "epoch": 1.9756744803184432, "grad_norm": 2.811797247996619, "learning_rate": 2.362765000488905e-06, "loss": 1.3301, "step": 67005 }, { "epoch": 1.9758219077104526, "grad_norm": 2.9182703443193327, "learning_rate": 2.3621673781684647e-06, "loss": 1.2512, "step": 67010 }, { "epoch": 1.975969335102462, "grad_norm": 2.861988833830992, "learning_rate": 2.3615697966865817e-06, "loss": 1.3178, "step": 67015 }, { "epoch": 1.9761167624944713, "grad_norm": 2.927154603301907, "learning_rate": 2.360972256060845e-06, "loss": 1.2714, "step": 67020 }, { "epoch": 1.9762641898864808, "grad_norm": 2.7821275784025805, "learning_rate": 2.3603747563088348e-06, "loss": 1.1899, "step": 67025 }, { "epoch": 1.9764116172784902, "grad_norm": 2.810429595800393, "learning_rate": 2.3597772974481346e-06, "loss": 1.2775, "step": 67030 }, { "epoch": 1.9765590446704997, "grad_norm": 2.7866987727364885, "learning_rate": 2.3591798794963242e-06, "loss": 1.314, "step": 67035 }, { "epoch": 1.9767064720625092, "grad_norm": 2.813834327883716, "learning_rate": 2.3585825024709846e-06, "loss": 1.2693, "step": 67040 }, { "epoch": 1.9768538994545186, "grad_norm": 2.732664014515515, "learning_rate": 2.3579851663896935e-06, "loss": 1.2322, "step": 67045 }, { "epoch": 1.977001326846528, "grad_norm": 2.827051140499815, "learning_rate": 2.3573878712700293e-06, "loss": 1.3189, "step": 67050 }, { "epoch": 1.9771487542385375, "grad_norm": 2.7511668029887058, "learning_rate": 2.356790617129565e-06, "loss": 1.2858, "step": 67055 }, { "epoch": 1.977296181630547, "grad_norm": 2.818399232407926, "learning_rate": 2.3561934039858793e-06, "loss": 1.2985, "step": 67060 }, { "epoch": 1.9774436090225564, "grad_norm": 2.927135141324902, "learning_rate": 2.3555962318565446e-06, "loss": 1.2283, "step": 67065 }, { "epoch": 1.977591036414566, "grad_norm": 2.7361813605287035, "learning_rate": 2.3549991007591313e-06, "loss": 1.2681, "step": 67070 }, { "epoch": 1.9777384638065754, "grad_norm": 2.828768698426209, "learning_rate": 2.354402010711214e-06, "loss": 1.3099, "step": 67075 }, { "epoch": 1.9778858911985848, "grad_norm": 2.767822261145236, "learning_rate": 2.353804961730361e-06, "loss": 1.3091, "step": 67080 }, { "epoch": 1.9780333185905943, "grad_norm": 2.8641015851557423, "learning_rate": 2.353207953834141e-06, "loss": 1.2857, "step": 67085 }, { "epoch": 1.9781807459826035, "grad_norm": 2.786949801506282, "learning_rate": 2.352610987040121e-06, "loss": 1.2652, "step": 67090 }, { "epoch": 1.978328173374613, "grad_norm": 2.918911127630489, "learning_rate": 2.3520140613658697e-06, "loss": 1.3056, "step": 67095 }, { "epoch": 1.9784756007666224, "grad_norm": 2.6572238454053907, "learning_rate": 2.35141717682895e-06, "loss": 1.2183, "step": 67100 }, { "epoch": 1.978623028158632, "grad_norm": 2.819281228079205, "learning_rate": 2.350820333446928e-06, "loss": 1.2569, "step": 67105 }, { "epoch": 1.9787704555506413, "grad_norm": 2.8194488936139543, "learning_rate": 2.3502235312373635e-06, "loss": 1.253, "step": 67110 }, { "epoch": 1.9789178829426506, "grad_norm": 2.845725548810177, "learning_rate": 2.3496267702178215e-06, "loss": 1.2822, "step": 67115 }, { "epoch": 1.97906531033466, "grad_norm": 2.857038399691139, "learning_rate": 2.34903005040586e-06, "loss": 1.3099, "step": 67120 }, { "epoch": 1.9792127377266695, "grad_norm": 2.975339182764634, "learning_rate": 2.34843337181904e-06, "loss": 1.3202, "step": 67125 }, { "epoch": 1.979360165118679, "grad_norm": 2.7131789067928005, "learning_rate": 2.3478367344749185e-06, "loss": 1.289, "step": 67130 }, { "epoch": 1.9795075925106884, "grad_norm": 2.932459401483751, "learning_rate": 2.3472401383910517e-06, "loss": 1.2828, "step": 67135 }, { "epoch": 1.9796550199026979, "grad_norm": 2.8959755729727448, "learning_rate": 2.346643583584996e-06, "loss": 1.2965, "step": 67140 }, { "epoch": 1.9798024472947073, "grad_norm": 2.9301214710622476, "learning_rate": 2.3460470700743038e-06, "loss": 1.3512, "step": 67145 }, { "epoch": 1.9799498746867168, "grad_norm": 2.8208911033122956, "learning_rate": 2.345450597876531e-06, "loss": 1.3044, "step": 67150 }, { "epoch": 1.9800973020787263, "grad_norm": 2.7442670090297203, "learning_rate": 2.3448541670092285e-06, "loss": 1.3146, "step": 67155 }, { "epoch": 1.9802447294707357, "grad_norm": 2.7901074398652908, "learning_rate": 2.344257777489946e-06, "loss": 1.2927, "step": 67160 }, { "epoch": 1.9803921568627452, "grad_norm": 2.826227760155904, "learning_rate": 2.343661429336233e-06, "loss": 1.2891, "step": 67165 }, { "epoch": 1.9805395842547546, "grad_norm": 2.865528611524335, "learning_rate": 2.3430651225656397e-06, "loss": 1.3285, "step": 67170 }, { "epoch": 1.980687011646764, "grad_norm": 2.8256522482909263, "learning_rate": 2.342468857195711e-06, "loss": 1.2735, "step": 67175 }, { "epoch": 1.9808344390387735, "grad_norm": 2.9738711015183954, "learning_rate": 2.341872633243994e-06, "loss": 1.2982, "step": 67180 }, { "epoch": 1.980981866430783, "grad_norm": 2.709100002010751, "learning_rate": 2.341276450728031e-06, "loss": 1.2416, "step": 67185 }, { "epoch": 1.9811292938227922, "grad_norm": 2.7465198747846724, "learning_rate": 2.340680309665369e-06, "loss": 1.2297, "step": 67190 }, { "epoch": 1.9812767212148017, "grad_norm": 2.684567678572792, "learning_rate": 2.340084210073548e-06, "loss": 1.2568, "step": 67195 }, { "epoch": 1.9814241486068112, "grad_norm": 2.6852070680100377, "learning_rate": 2.339488151970107e-06, "loss": 1.2261, "step": 67200 }, { "epoch": 1.9815715759988206, "grad_norm": 2.72571681777372, "learning_rate": 2.3388921353725896e-06, "loss": 1.2487, "step": 67205 }, { "epoch": 1.98171900339083, "grad_norm": 2.804069833588457, "learning_rate": 2.3382961602985323e-06, "loss": 1.2637, "step": 67210 }, { "epoch": 1.9818664307828393, "grad_norm": 2.898338384961783, "learning_rate": 2.337700226765473e-06, "loss": 1.2658, "step": 67215 }, { "epoch": 1.9820138581748488, "grad_norm": 2.7314478972116665, "learning_rate": 2.3371043347909453e-06, "loss": 1.2585, "step": 67220 }, { "epoch": 1.9821612855668582, "grad_norm": 2.784483984675458, "learning_rate": 2.336508484392487e-06, "loss": 1.2107, "step": 67225 }, { "epoch": 1.9823087129588677, "grad_norm": 2.9909172905449206, "learning_rate": 2.3359126755876307e-06, "loss": 1.2799, "step": 67230 }, { "epoch": 1.9824561403508771, "grad_norm": 2.9196310201730498, "learning_rate": 2.3353169083939087e-06, "loss": 1.3034, "step": 67235 }, { "epoch": 1.9826035677428866, "grad_norm": 3.096971661360548, "learning_rate": 2.3347211828288508e-06, "loss": 1.2379, "step": 67240 }, { "epoch": 1.982750995134896, "grad_norm": 2.7778920627641295, "learning_rate": 2.33412549890999e-06, "loss": 1.2886, "step": 67245 }, { "epoch": 1.9828984225269055, "grad_norm": 2.8168203125618287, "learning_rate": 2.3335298566548514e-06, "loss": 1.2381, "step": 67250 }, { "epoch": 1.983045849918915, "grad_norm": 2.9327962442374886, "learning_rate": 2.3329342560809657e-06, "loss": 1.2752, "step": 67255 }, { "epoch": 1.9831932773109244, "grad_norm": 2.7539780302913823, "learning_rate": 2.332338697205858e-06, "loss": 1.2605, "step": 67260 }, { "epoch": 1.9833407047029339, "grad_norm": 2.82739207052843, "learning_rate": 2.331743180047052e-06, "loss": 1.2603, "step": 67265 }, { "epoch": 1.9834881320949433, "grad_norm": 2.8446951635098285, "learning_rate": 2.331147704622073e-06, "loss": 1.297, "step": 67270 }, { "epoch": 1.9836355594869528, "grad_norm": 3.058367049540776, "learning_rate": 2.330552270948441e-06, "loss": 1.2435, "step": 67275 }, { "epoch": 1.9837829868789623, "grad_norm": 2.789205907647317, "learning_rate": 2.3299568790436816e-06, "loss": 1.3244, "step": 67280 }, { "epoch": 1.9839304142709715, "grad_norm": 2.7930656839879213, "learning_rate": 2.329361528925312e-06, "loss": 1.2984, "step": 67285 }, { "epoch": 1.984077841662981, "grad_norm": 2.961435430886566, "learning_rate": 2.3287662206108513e-06, "loss": 1.3167, "step": 67290 }, { "epoch": 1.9842252690549904, "grad_norm": 2.7449546199732295, "learning_rate": 2.3281709541178167e-06, "loss": 1.2508, "step": 67295 }, { "epoch": 1.9843726964469999, "grad_norm": 3.075548748081558, "learning_rate": 2.3275757294637263e-06, "loss": 1.2893, "step": 67300 }, { "epoch": 1.9845201238390093, "grad_norm": 2.9010014739389955, "learning_rate": 2.326980546666094e-06, "loss": 1.2988, "step": 67305 }, { "epoch": 1.9846675512310186, "grad_norm": 2.738233133384714, "learning_rate": 2.326385405742434e-06, "loss": 1.3444, "step": 67310 }, { "epoch": 1.984814978623028, "grad_norm": 2.757084298638828, "learning_rate": 2.325790306710259e-06, "loss": 1.2707, "step": 67315 }, { "epoch": 1.9849624060150375, "grad_norm": 2.8407029441539478, "learning_rate": 2.3251952495870813e-06, "loss": 1.2906, "step": 67320 }, { "epoch": 1.985109833407047, "grad_norm": 2.8283507942737547, "learning_rate": 2.3246002343904097e-06, "loss": 1.26, "step": 67325 }, { "epoch": 1.9852572607990564, "grad_norm": 2.7357904682944616, "learning_rate": 2.3240052611377526e-06, "loss": 1.2651, "step": 67330 }, { "epoch": 1.9854046881910659, "grad_norm": 2.7289018245090215, "learning_rate": 2.3234103298466214e-06, "loss": 1.3141, "step": 67335 }, { "epoch": 1.9855521155830753, "grad_norm": 2.7473127974937466, "learning_rate": 2.3228154405345186e-06, "loss": 1.2379, "step": 67340 }, { "epoch": 1.9856995429750848, "grad_norm": 2.7267002384232417, "learning_rate": 2.322220593218952e-06, "loss": 1.2483, "step": 67345 }, { "epoch": 1.9858469703670942, "grad_norm": 2.892596325990848, "learning_rate": 2.321625787917424e-06, "loss": 1.2781, "step": 67350 }, { "epoch": 1.9859943977591037, "grad_norm": 2.8485779596099254, "learning_rate": 2.3210310246474387e-06, "loss": 1.2604, "step": 67355 }, { "epoch": 1.9861418251511131, "grad_norm": 2.8127985486502762, "learning_rate": 2.320436303426497e-06, "loss": 1.3057, "step": 67360 }, { "epoch": 1.9862892525431226, "grad_norm": 2.7961136473848915, "learning_rate": 2.3198416242721e-06, "loss": 1.2785, "step": 67365 }, { "epoch": 1.986436679935132, "grad_norm": 2.7933558040598143, "learning_rate": 2.3192469872017447e-06, "loss": 1.2397, "step": 67370 }, { "epoch": 1.9865841073271415, "grad_norm": 2.879816355559967, "learning_rate": 2.3186523922329324e-06, "loss": 1.3127, "step": 67375 }, { "epoch": 1.986731534719151, "grad_norm": 2.74869152010151, "learning_rate": 2.318057839383156e-06, "loss": 1.2891, "step": 67380 }, { "epoch": 1.9868789621111602, "grad_norm": 2.7900250710446777, "learning_rate": 2.3174633286699148e-06, "loss": 1.2744, "step": 67385 }, { "epoch": 1.9870263895031697, "grad_norm": 2.863644207614288, "learning_rate": 2.3168688601107008e-06, "loss": 1.3066, "step": 67390 }, { "epoch": 1.9871738168951791, "grad_norm": 2.7207344429049254, "learning_rate": 2.316274433723006e-06, "loss": 1.2712, "step": 67395 }, { "epoch": 1.9873212442871886, "grad_norm": 2.7812529130519854, "learning_rate": 2.3156800495243236e-06, "loss": 1.2871, "step": 67400 }, { "epoch": 1.9874686716791978, "grad_norm": 2.740182764133502, "learning_rate": 2.315085707532143e-06, "loss": 1.2973, "step": 67405 }, { "epoch": 1.9876160990712073, "grad_norm": 2.8052623040651428, "learning_rate": 2.314491407763955e-06, "loss": 1.2386, "step": 67410 }, { "epoch": 1.9877635264632167, "grad_norm": 2.7592183348045376, "learning_rate": 2.313897150237245e-06, "loss": 1.2682, "step": 67415 }, { "epoch": 1.9879109538552262, "grad_norm": 2.9020357221801114, "learning_rate": 2.313302934969503e-06, "loss": 1.2506, "step": 67420 }, { "epoch": 1.9880583812472357, "grad_norm": 2.901941595978027, "learning_rate": 2.312708761978211e-06, "loss": 1.3368, "step": 67425 }, { "epoch": 1.9882058086392451, "grad_norm": 2.8376999161881438, "learning_rate": 2.312114631280856e-06, "loss": 1.3277, "step": 67430 }, { "epoch": 1.9883532360312546, "grad_norm": 2.9535682755628354, "learning_rate": 2.3115205428949195e-06, "loss": 1.3225, "step": 67435 }, { "epoch": 1.988500663423264, "grad_norm": 2.8350915304182345, "learning_rate": 2.3109264968378835e-06, "loss": 1.2641, "step": 67440 }, { "epoch": 1.9886480908152735, "grad_norm": 2.9055447776322456, "learning_rate": 2.3103324931272287e-06, "loss": 1.283, "step": 67445 }, { "epoch": 1.988795518207283, "grad_norm": 2.7550767055318173, "learning_rate": 2.3097385317804348e-06, "loss": 1.2439, "step": 67450 }, { "epoch": 1.9889429455992924, "grad_norm": 2.74409316041548, "learning_rate": 2.3091446128149775e-06, "loss": 1.2102, "step": 67455 }, { "epoch": 1.9890903729913019, "grad_norm": 2.6225484630903586, "learning_rate": 2.308550736248337e-06, "loss": 1.2403, "step": 67460 }, { "epoch": 1.9892378003833113, "grad_norm": 2.75797009652723, "learning_rate": 2.307956902097987e-06, "loss": 1.2432, "step": 67465 }, { "epoch": 1.9893852277753208, "grad_norm": 2.7593968673064895, "learning_rate": 2.307363110381401e-06, "loss": 1.2883, "step": 67470 }, { "epoch": 1.9895326551673302, "grad_norm": 2.877932388241155, "learning_rate": 2.3067693611160535e-06, "loss": 1.2563, "step": 67475 }, { "epoch": 1.9896800825593395, "grad_norm": 2.8748076772184534, "learning_rate": 2.3061756543194145e-06, "loss": 1.2811, "step": 67480 }, { "epoch": 1.989827509951349, "grad_norm": 2.8315011763356615, "learning_rate": 2.3055819900089566e-06, "loss": 1.2745, "step": 67485 }, { "epoch": 1.9899749373433584, "grad_norm": 2.86098267284367, "learning_rate": 2.3049883682021475e-06, "loss": 1.3171, "step": 67490 }, { "epoch": 1.9901223647353679, "grad_norm": 2.8408547112384452, "learning_rate": 2.3043947889164563e-06, "loss": 1.2828, "step": 67495 }, { "epoch": 1.9902697921273773, "grad_norm": 2.7118663816974062, "learning_rate": 2.303801252169347e-06, "loss": 1.2331, "step": 67500 }, { "epoch": 1.9902697921273773, "eval_loss": 1.060346007347107, "eval_runtime": 4.2674, "eval_samples_per_second": 92.797, "eval_steps_per_second": 3.046, "step": 67500 }, { "epoch": 1.9904172195193865, "grad_norm": 2.851223636325769, "learning_rate": 2.30320775797829e-06, "loss": 1.2954, "step": 67505 }, { "epoch": 1.990564646911396, "grad_norm": 2.7525171070231256, "learning_rate": 2.3026143063607448e-06, "loss": 1.2847, "step": 67510 }, { "epoch": 1.9907120743034055, "grad_norm": 2.7991611438964483, "learning_rate": 2.3020208973341774e-06, "loss": 1.2674, "step": 67515 }, { "epoch": 1.990859501695415, "grad_norm": 2.8692814859012135, "learning_rate": 2.301427530916048e-06, "loss": 1.2715, "step": 67520 }, { "epoch": 1.9910069290874244, "grad_norm": 2.6801032449915327, "learning_rate": 2.3008342071238186e-06, "loss": 1.3221, "step": 67525 }, { "epoch": 1.9911543564794338, "grad_norm": 2.8460495942947706, "learning_rate": 2.3002409259749473e-06, "loss": 1.3237, "step": 67530 }, { "epoch": 1.9913017838714433, "grad_norm": 2.723711795969826, "learning_rate": 2.299647687486891e-06, "loss": 1.2868, "step": 67535 }, { "epoch": 1.9914492112634528, "grad_norm": 2.963780995145901, "learning_rate": 2.2990544916771083e-06, "loss": 1.3017, "step": 67540 }, { "epoch": 1.9915966386554622, "grad_norm": 2.654997567266006, "learning_rate": 2.298461338563054e-06, "loss": 1.2798, "step": 67545 }, { "epoch": 1.9917440660474717, "grad_norm": 2.854863053052354, "learning_rate": 2.2978682281621827e-06, "loss": 1.3316, "step": 67550 }, { "epoch": 1.9918914934394811, "grad_norm": 2.887030924251587, "learning_rate": 2.2972751604919454e-06, "loss": 1.2838, "step": 67555 }, { "epoch": 1.9920389208314906, "grad_norm": 2.7676985589897987, "learning_rate": 2.2966821355697974e-06, "loss": 1.2576, "step": 67560 }, { "epoch": 1.9921863482235, "grad_norm": 2.7582254003660296, "learning_rate": 2.2960891534131855e-06, "loss": 1.316, "step": 67565 }, { "epoch": 1.9923337756155095, "grad_norm": 2.7217435408121324, "learning_rate": 2.295496214039561e-06, "loss": 1.251, "step": 67570 }, { "epoch": 1.9924812030075187, "grad_norm": 2.7148004960744427, "learning_rate": 2.294903317466371e-06, "loss": 1.2969, "step": 67575 }, { "epoch": 1.9926286303995282, "grad_norm": 2.823840130176967, "learning_rate": 2.2943104637110636e-06, "loss": 1.2761, "step": 67580 }, { "epoch": 1.9927760577915377, "grad_norm": 2.8666322461063367, "learning_rate": 2.2937176527910807e-06, "loss": 1.2756, "step": 67585 }, { "epoch": 1.9929234851835471, "grad_norm": 2.9041851163135393, "learning_rate": 2.293124884723871e-06, "loss": 1.2825, "step": 67590 }, { "epoch": 1.9930709125755566, "grad_norm": 2.7075396700258216, "learning_rate": 2.2925321595268748e-06, "loss": 1.2622, "step": 67595 }, { "epoch": 1.9932183399675658, "grad_norm": 2.773705741048659, "learning_rate": 2.2919394772175333e-06, "loss": 1.2546, "step": 67600 }, { "epoch": 1.9933657673595753, "grad_norm": 2.814438703417794, "learning_rate": 2.2913468378132892e-06, "loss": 1.2698, "step": 67605 }, { "epoch": 1.9935131947515847, "grad_norm": 2.756705689435058, "learning_rate": 2.2907542413315775e-06, "loss": 1.2572, "step": 67610 }, { "epoch": 1.9936606221435942, "grad_norm": 2.795458566798593, "learning_rate": 2.2901616877898406e-06, "loss": 1.3194, "step": 67615 }, { "epoch": 1.9938080495356036, "grad_norm": 2.79390827653465, "learning_rate": 2.2895691772055118e-06, "loss": 1.2692, "step": 67620 }, { "epoch": 1.993955476927613, "grad_norm": 2.8362568101318377, "learning_rate": 2.288976709596028e-06, "loss": 1.2917, "step": 67625 }, { "epoch": 1.9941029043196226, "grad_norm": 2.755138571893284, "learning_rate": 2.2883842849788225e-06, "loss": 1.2112, "step": 67630 }, { "epoch": 1.994250331711632, "grad_norm": 2.7595378752004858, "learning_rate": 2.287791903371329e-06, "loss": 1.2697, "step": 67635 }, { "epoch": 1.9943977591036415, "grad_norm": 2.7617579213565873, "learning_rate": 2.287199564790977e-06, "loss": 1.2209, "step": 67640 }, { "epoch": 1.994545186495651, "grad_norm": 2.751095916804453, "learning_rate": 2.2866072692552003e-06, "loss": 1.2831, "step": 67645 }, { "epoch": 1.9946926138876604, "grad_norm": 2.7090403810530246, "learning_rate": 2.2860150167814244e-06, "loss": 1.2726, "step": 67650 }, { "epoch": 1.9948400412796699, "grad_norm": 2.8129812809457095, "learning_rate": 2.285422807387079e-06, "loss": 1.2792, "step": 67655 }, { "epoch": 1.9949874686716793, "grad_norm": 2.8150033548839515, "learning_rate": 2.28483064108959e-06, "loss": 1.2559, "step": 67660 }, { "epoch": 1.9951348960636888, "grad_norm": 2.7703491675586855, "learning_rate": 2.284238517906381e-06, "loss": 1.3128, "step": 67665 }, { "epoch": 1.9952823234556982, "grad_norm": 2.820606672372687, "learning_rate": 2.283646437854879e-06, "loss": 1.2877, "step": 67670 }, { "epoch": 1.9954297508477075, "grad_norm": 2.8643349147225456, "learning_rate": 2.283054400952504e-06, "loss": 1.2523, "step": 67675 }, { "epoch": 1.995577178239717, "grad_norm": 2.7910211406996392, "learning_rate": 2.2824624072166794e-06, "loss": 1.2543, "step": 67680 }, { "epoch": 1.9957246056317264, "grad_norm": 2.7279385196976853, "learning_rate": 2.2818704566648223e-06, "loss": 1.2325, "step": 67685 }, { "epoch": 1.9958720330237358, "grad_norm": 2.878389858045955, "learning_rate": 2.2812785493143548e-06, "loss": 1.3572, "step": 67690 }, { "epoch": 1.9960194604157453, "grad_norm": 2.7769236374547144, "learning_rate": 2.280686685182693e-06, "loss": 1.2844, "step": 67695 }, { "epoch": 1.9961668878077545, "grad_norm": 2.8789137419552557, "learning_rate": 2.2800948642872526e-06, "loss": 1.2883, "step": 67700 }, { "epoch": 1.996314315199764, "grad_norm": 2.733370077583121, "learning_rate": 2.2795030866454496e-06, "loss": 1.233, "step": 67705 }, { "epoch": 1.9964617425917734, "grad_norm": 2.826373511014775, "learning_rate": 2.2789113522746983e-06, "loss": 1.2631, "step": 67710 }, { "epoch": 1.996609169983783, "grad_norm": 2.687938251967589, "learning_rate": 2.2783196611924085e-06, "loss": 1.2731, "step": 67715 }, { "epoch": 1.9967565973757924, "grad_norm": 2.776746710680107, "learning_rate": 2.277728013415995e-06, "loss": 1.2818, "step": 67720 }, { "epoch": 1.9969040247678018, "grad_norm": 2.800595170689384, "learning_rate": 2.2771364089628652e-06, "loss": 1.2984, "step": 67725 }, { "epoch": 1.9970514521598113, "grad_norm": 2.8671667708444177, "learning_rate": 2.276544847850428e-06, "loss": 1.2591, "step": 67730 }, { "epoch": 1.9971988795518207, "grad_norm": 2.879030748362099, "learning_rate": 2.275953330096092e-06, "loss": 1.3462, "step": 67735 }, { "epoch": 1.9973463069438302, "grad_norm": 2.729810119017831, "learning_rate": 2.2753618557172612e-06, "loss": 1.2966, "step": 67740 }, { "epoch": 1.9974937343358397, "grad_norm": 2.9241522892853453, "learning_rate": 2.2747704247313427e-06, "loss": 1.2726, "step": 67745 }, { "epoch": 1.9976411617278491, "grad_norm": 2.8309372791841136, "learning_rate": 2.2741790371557387e-06, "loss": 1.2496, "step": 67750 }, { "epoch": 1.9977885891198586, "grad_norm": 2.97153066771903, "learning_rate": 2.2735876930078523e-06, "loss": 1.3153, "step": 67755 }, { "epoch": 1.997936016511868, "grad_norm": 2.7317231930946124, "learning_rate": 2.272996392305082e-06, "loss": 1.2679, "step": 67760 }, { "epoch": 1.9980834439038775, "grad_norm": 2.7238859718304766, "learning_rate": 2.2724051350648316e-06, "loss": 1.214, "step": 67765 }, { "epoch": 1.9982308712958867, "grad_norm": 2.729886521018894, "learning_rate": 2.271813921304495e-06, "loss": 1.2744, "step": 67770 }, { "epoch": 1.9983782986878962, "grad_norm": 2.7171093717851162, "learning_rate": 2.2712227510414733e-06, "loss": 1.2999, "step": 67775 }, { "epoch": 1.9985257260799056, "grad_norm": 2.9275475814605274, "learning_rate": 2.2706316242931605e-06, "loss": 1.258, "step": 67780 }, { "epoch": 1.998673153471915, "grad_norm": 2.941311212806831, "learning_rate": 2.270040541076952e-06, "loss": 1.3075, "step": 67785 }, { "epoch": 1.9988205808639246, "grad_norm": 2.8282194305853383, "learning_rate": 2.269449501410241e-06, "loss": 1.3325, "step": 67790 }, { "epoch": 1.9989680082559338, "grad_norm": 2.8956417001982953, "learning_rate": 2.2688585053104165e-06, "loss": 1.2632, "step": 67795 }, { "epoch": 1.9991154356479433, "grad_norm": 2.6898412996739816, "learning_rate": 2.2682675527948743e-06, "loss": 1.2179, "step": 67800 }, { "epoch": 1.9992628630399527, "grad_norm": 2.9505666665271217, "learning_rate": 2.2676766438810002e-06, "loss": 1.2606, "step": 67805 }, { "epoch": 1.9994102904319622, "grad_norm": 2.8300594288037138, "learning_rate": 2.267085778586184e-06, "loss": 1.3171, "step": 67810 }, { "epoch": 1.9995577178239716, "grad_norm": 2.802450238783491, "learning_rate": 2.2664949569278104e-06, "loss": 1.2833, "step": 67815 }, { "epoch": 1.999705145215981, "grad_norm": 2.80901398499398, "learning_rate": 2.2659041789232688e-06, "loss": 1.2942, "step": 67820 }, { "epoch": 1.9998525726079905, "grad_norm": 2.78391152465712, "learning_rate": 2.2653134445899396e-06, "loss": 1.2949, "step": 67825 }, { "epoch": 2.0, "grad_norm": 2.8380632155009353, "learning_rate": 2.264722753945209e-06, "loss": 1.2655, "step": 67830 }, { "epoch": 2.0001474273920095, "grad_norm": 3.402000042612853, "learning_rate": 2.2641321070064562e-06, "loss": 1.0635, "step": 67835 }, { "epoch": 2.000294854784019, "grad_norm": 3.8785280213518574, "learning_rate": 2.2635415037910643e-06, "loss": 1.0875, "step": 67840 }, { "epoch": 2.0004422821760284, "grad_norm": 3.1730713698527078, "learning_rate": 2.2629509443164097e-06, "loss": 1.0462, "step": 67845 }, { "epoch": 2.000589709568038, "grad_norm": 2.9682865118824573, "learning_rate": 2.2623604285998727e-06, "loss": 1.0406, "step": 67850 }, { "epoch": 2.0007371369600473, "grad_norm": 3.1699555588396313, "learning_rate": 2.2617699566588285e-06, "loss": 1.0747, "step": 67855 }, { "epoch": 2.0008845643520567, "grad_norm": 3.158981004129435, "learning_rate": 2.2611795285106528e-06, "loss": 1.0685, "step": 67860 }, { "epoch": 2.001031991744066, "grad_norm": 3.117958888836187, "learning_rate": 2.2605891441727193e-06, "loss": 1.0731, "step": 67865 }, { "epoch": 2.0011794191360757, "grad_norm": 3.243477148531734, "learning_rate": 2.2599988036624e-06, "loss": 1.1094, "step": 67870 }, { "epoch": 2.001326846528085, "grad_norm": 3.3604485813554574, "learning_rate": 2.259408506997069e-06, "loss": 1.0571, "step": 67875 }, { "epoch": 2.001474273920094, "grad_norm": 3.1642542435253307, "learning_rate": 2.2588182541940936e-06, "loss": 1.0097, "step": 67880 }, { "epoch": 2.0016217013121036, "grad_norm": 3.2670081050403814, "learning_rate": 2.2582280452708445e-06, "loss": 1.0961, "step": 67885 }, { "epoch": 2.001769128704113, "grad_norm": 3.122659327620947, "learning_rate": 2.257637880244687e-06, "loss": 1.029, "step": 67890 }, { "epoch": 2.0019165560961225, "grad_norm": 3.341492884533415, "learning_rate": 2.257047759132991e-06, "loss": 1.0614, "step": 67895 }, { "epoch": 2.002063983488132, "grad_norm": 3.1995350637726894, "learning_rate": 2.2564576819531175e-06, "loss": 1.0341, "step": 67900 }, { "epoch": 2.0022114108801414, "grad_norm": 3.058339420793374, "learning_rate": 2.255867648722434e-06, "loss": 1.0255, "step": 67905 }, { "epoch": 2.002358838272151, "grad_norm": 3.177940876683098, "learning_rate": 2.2552776594583e-06, "loss": 1.0821, "step": 67910 }, { "epoch": 2.0025062656641603, "grad_norm": 3.12758679051782, "learning_rate": 2.254687714178079e-06, "loss": 1.0741, "step": 67915 }, { "epoch": 2.00265369305617, "grad_norm": 3.08859787137413, "learning_rate": 2.254097812899129e-06, "loss": 1.0541, "step": 67920 }, { "epoch": 2.0028011204481793, "grad_norm": 3.3162504771412, "learning_rate": 2.253507955638807e-06, "loss": 1.0957, "step": 67925 }, { "epoch": 2.0029485478401887, "grad_norm": 3.165268919889141, "learning_rate": 2.252918142414475e-06, "loss": 1.0948, "step": 67930 }, { "epoch": 2.003095975232198, "grad_norm": 3.2238566930543286, "learning_rate": 2.252328373243484e-06, "loss": 1.0579, "step": 67935 }, { "epoch": 2.0032434026242076, "grad_norm": 3.2476451613385646, "learning_rate": 2.251738648143192e-06, "loss": 1.0385, "step": 67940 }, { "epoch": 2.003390830016217, "grad_norm": 3.2369881907755755, "learning_rate": 2.2511489671309502e-06, "loss": 1.0444, "step": 67945 }, { "epoch": 2.0035382574082266, "grad_norm": 3.1914064435155063, "learning_rate": 2.2505593302241132e-06, "loss": 1.0157, "step": 67950 }, { "epoch": 2.003685684800236, "grad_norm": 3.3251349715183403, "learning_rate": 2.2499697374400275e-06, "loss": 1.0567, "step": 67955 }, { "epoch": 2.0038331121922455, "grad_norm": 3.185254888979692, "learning_rate": 2.2493801887960476e-06, "loss": 1.0866, "step": 67960 }, { "epoch": 2.003980539584255, "grad_norm": 3.179738732347238, "learning_rate": 2.2487906843095178e-06, "loss": 1.0782, "step": 67965 }, { "epoch": 2.0041279669762644, "grad_norm": 3.22117014810191, "learning_rate": 2.2482012239977877e-06, "loss": 1.0815, "step": 67970 }, { "epoch": 2.004275394368274, "grad_norm": 3.24895579800393, "learning_rate": 2.247611807878199e-06, "loss": 1.1039, "step": 67975 }, { "epoch": 2.004422821760283, "grad_norm": 3.240120027363462, "learning_rate": 2.2470224359681006e-06, "loss": 1.0504, "step": 67980 }, { "epoch": 2.0045702491522923, "grad_norm": 3.31404284838246, "learning_rate": 2.246433108284833e-06, "loss": 1.0477, "step": 67985 }, { "epoch": 2.0047176765443018, "grad_norm": 3.17028318550871, "learning_rate": 2.2458438248457377e-06, "loss": 1.0486, "step": 67990 }, { "epoch": 2.0048651039363112, "grad_norm": 3.3586072672441434, "learning_rate": 2.2452545856681555e-06, "loss": 1.0922, "step": 67995 }, { "epoch": 2.0050125313283207, "grad_norm": 3.2405136747519188, "learning_rate": 2.2446653907694243e-06, "loss": 1.0402, "step": 68000 }, { "epoch": 2.0050125313283207, "eval_loss": 1.0836808681488037, "eval_runtime": 4.161, "eval_samples_per_second": 95.17, "eval_steps_per_second": 3.124, "step": 68000 }, { "epoch": 2.00515995872033, "grad_norm": 3.3974956810547834, "learning_rate": 2.2440762401668846e-06, "loss": 1.0416, "step": 68005 }, { "epoch": 2.0053073861123396, "grad_norm": 3.2276835359568055, "learning_rate": 2.24348713387787e-06, "loss": 1.0479, "step": 68010 }, { "epoch": 2.005454813504349, "grad_norm": 3.5363979592398596, "learning_rate": 2.2428980719197177e-06, "loss": 1.0907, "step": 68015 }, { "epoch": 2.0056022408963585, "grad_norm": 3.2794262696717644, "learning_rate": 2.2423090543097584e-06, "loss": 1.0654, "step": 68020 }, { "epoch": 2.005749668288368, "grad_norm": 3.337440391482492, "learning_rate": 2.241720081065329e-06, "loss": 1.0477, "step": 68025 }, { "epoch": 2.0058970956803774, "grad_norm": 3.27447456242082, "learning_rate": 2.241131152203757e-06, "loss": 1.0535, "step": 68030 }, { "epoch": 2.006044523072387, "grad_norm": 3.222940318930257, "learning_rate": 2.240542267742375e-06, "loss": 1.0869, "step": 68035 }, { "epoch": 2.0061919504643964, "grad_norm": 3.2907550848834886, "learning_rate": 2.23995342769851e-06, "loss": 1.0514, "step": 68040 }, { "epoch": 2.006339377856406, "grad_norm": 3.2529414088081547, "learning_rate": 2.23936463208949e-06, "loss": 1.0914, "step": 68045 }, { "epoch": 2.0064868052484153, "grad_norm": 3.297100096395825, "learning_rate": 2.238775880932641e-06, "loss": 1.0294, "step": 68050 }, { "epoch": 2.0066342326404247, "grad_norm": 3.2408393720941024, "learning_rate": 2.2381871742452854e-06, "loss": 1.0426, "step": 68055 }, { "epoch": 2.006781660032434, "grad_norm": 3.1735260139334933, "learning_rate": 2.2375985120447505e-06, "loss": 1.0654, "step": 68060 }, { "epoch": 2.0069290874244436, "grad_norm": 3.2296049648589604, "learning_rate": 2.2370098943483555e-06, "loss": 1.05, "step": 68065 }, { "epoch": 2.007076514816453, "grad_norm": 3.2296740974885942, "learning_rate": 2.2364213211734224e-06, "loss": 1.0725, "step": 68070 }, { "epoch": 2.007223942208462, "grad_norm": 3.101182655854648, "learning_rate": 2.2358327925372694e-06, "loss": 1.0464, "step": 68075 }, { "epoch": 2.0073713696004716, "grad_norm": 3.217032987264476, "learning_rate": 2.2352443084572165e-06, "loss": 1.0594, "step": 68080 }, { "epoch": 2.007518796992481, "grad_norm": 3.150346040329662, "learning_rate": 2.2346558689505776e-06, "loss": 1.0417, "step": 68085 }, { "epoch": 2.0076662243844905, "grad_norm": 3.1814257353033577, "learning_rate": 2.2340674740346728e-06, "loss": 1.0306, "step": 68090 }, { "epoch": 2.0078136517765, "grad_norm": 3.11668667774031, "learning_rate": 2.233479123726812e-06, "loss": 0.9936, "step": 68095 }, { "epoch": 2.0079610791685094, "grad_norm": 3.2207552529156165, "learning_rate": 2.2328908180443106e-06, "loss": 1.0658, "step": 68100 }, { "epoch": 2.008108506560519, "grad_norm": 3.0992718009123768, "learning_rate": 2.2323025570044775e-06, "loss": 1.0611, "step": 68105 }, { "epoch": 2.0082559339525283, "grad_norm": 3.089617480210097, "learning_rate": 2.2317143406246263e-06, "loss": 1.0534, "step": 68110 }, { "epoch": 2.008403361344538, "grad_norm": 3.2834239868891943, "learning_rate": 2.2311261689220646e-06, "loss": 1.065, "step": 68115 }, { "epoch": 2.0085507887365472, "grad_norm": 3.323673303702118, "learning_rate": 2.230538041914099e-06, "loss": 1.0714, "step": 68120 }, { "epoch": 2.0086982161285567, "grad_norm": 3.240298281163259, "learning_rate": 2.229949959618037e-06, "loss": 1.1088, "step": 68125 }, { "epoch": 2.008845643520566, "grad_norm": 3.233686433443702, "learning_rate": 2.229361922051182e-06, "loss": 1.0378, "step": 68130 }, { "epoch": 2.0089930709125756, "grad_norm": 3.4891718649817185, "learning_rate": 2.2287739292308397e-06, "loss": 1.038, "step": 68135 }, { "epoch": 2.009140498304585, "grad_norm": 3.1992075908216275, "learning_rate": 2.2281859811743113e-06, "loss": 1.0306, "step": 68140 }, { "epoch": 2.0092879256965945, "grad_norm": 3.264290652120574, "learning_rate": 2.2275980778988987e-06, "loss": 1.0477, "step": 68145 }, { "epoch": 2.009435353088604, "grad_norm": 3.2332825823246667, "learning_rate": 2.2270102194219e-06, "loss": 1.0254, "step": 68150 }, { "epoch": 2.0095827804806135, "grad_norm": 3.138589076865646, "learning_rate": 2.226422405760616e-06, "loss": 1.0489, "step": 68155 }, { "epoch": 2.009730207872623, "grad_norm": 3.1526579031759394, "learning_rate": 2.225834636932341e-06, "loss": 1.0767, "step": 68160 }, { "epoch": 2.0098776352646324, "grad_norm": 3.4189117001084637, "learning_rate": 2.225246912954373e-06, "loss": 1.0598, "step": 68165 }, { "epoch": 2.0100250626566414, "grad_norm": 3.2675711621456784, "learning_rate": 2.224659233844005e-06, "loss": 1.0485, "step": 68170 }, { "epoch": 2.010172490048651, "grad_norm": 3.18804644057262, "learning_rate": 2.2240715996185323e-06, "loss": 1.0876, "step": 68175 }, { "epoch": 2.0103199174406603, "grad_norm": 3.2382479871474357, "learning_rate": 2.223484010295245e-06, "loss": 1.0611, "step": 68180 }, { "epoch": 2.0104673448326698, "grad_norm": 3.401114423621638, "learning_rate": 2.2228964658914324e-06, "loss": 1.0897, "step": 68185 }, { "epoch": 2.010614772224679, "grad_norm": 3.281122193922984, "learning_rate": 2.222308966424386e-06, "loss": 1.0423, "step": 68190 }, { "epoch": 2.0107621996166887, "grad_norm": 3.198613321000945, "learning_rate": 2.2217215119113927e-06, "loss": 1.0465, "step": 68195 }, { "epoch": 2.010909627008698, "grad_norm": 3.3534180556078304, "learning_rate": 2.221134102369739e-06, "loss": 1.0386, "step": 68200 }, { "epoch": 2.0110570544007076, "grad_norm": 3.2267023575217224, "learning_rate": 2.2205467378167094e-06, "loss": 1.053, "step": 68205 }, { "epoch": 2.011204481792717, "grad_norm": 3.354426000787842, "learning_rate": 2.2199594182695893e-06, "loss": 1.0392, "step": 68210 }, { "epoch": 2.0113519091847265, "grad_norm": 3.364574664275042, "learning_rate": 2.219372143745659e-06, "loss": 1.0754, "step": 68215 }, { "epoch": 2.011499336576736, "grad_norm": 3.1175470933503604, "learning_rate": 2.2187849142622037e-06, "loss": 1.0091, "step": 68220 }, { "epoch": 2.0116467639687454, "grad_norm": 3.3192724082882674, "learning_rate": 2.2181977298364987e-06, "loss": 1.0707, "step": 68225 }, { "epoch": 2.011794191360755, "grad_norm": 3.3827962589118976, "learning_rate": 2.217610590485826e-06, "loss": 1.0751, "step": 68230 }, { "epoch": 2.0119416187527643, "grad_norm": 3.302384506757616, "learning_rate": 2.21702349622746e-06, "loss": 1.07, "step": 68235 }, { "epoch": 2.012089046144774, "grad_norm": 3.2015079324289784, "learning_rate": 2.2164364470786795e-06, "loss": 1.0393, "step": 68240 }, { "epoch": 2.0122364735367833, "grad_norm": 3.144350940658862, "learning_rate": 2.2158494430567575e-06, "loss": 1.067, "step": 68245 }, { "epoch": 2.0123839009287927, "grad_norm": 3.268742706747316, "learning_rate": 2.215262484178967e-06, "loss": 1.0702, "step": 68250 }, { "epoch": 2.012531328320802, "grad_norm": 3.1094163532007126, "learning_rate": 2.214675570462581e-06, "loss": 1.0398, "step": 68255 }, { "epoch": 2.0126787557128116, "grad_norm": 3.217145488858265, "learning_rate": 2.2140887019248678e-06, "loss": 1.0521, "step": 68260 }, { "epoch": 2.012826183104821, "grad_norm": 3.193763301721656, "learning_rate": 2.2135018785831003e-06, "loss": 1.0501, "step": 68265 }, { "epoch": 2.01297361049683, "grad_norm": 3.17228185978539, "learning_rate": 2.2129151004545435e-06, "loss": 1.077, "step": 68270 }, { "epoch": 2.0131210378888396, "grad_norm": 3.1500064068983744, "learning_rate": 2.212328367556466e-06, "loss": 1.0407, "step": 68275 }, { "epoch": 2.013268465280849, "grad_norm": 3.2138159711249665, "learning_rate": 2.2117416799061305e-06, "loss": 1.0529, "step": 68280 }, { "epoch": 2.0134158926728585, "grad_norm": 3.307461923344951, "learning_rate": 2.2111550375208037e-06, "loss": 1.0772, "step": 68285 }, { "epoch": 2.013563320064868, "grad_norm": 3.2574351185538917, "learning_rate": 2.210568440417746e-06, "loss": 1.044, "step": 68290 }, { "epoch": 2.0137107474568774, "grad_norm": 3.2399344514956208, "learning_rate": 2.2099818886142212e-06, "loss": 1.0369, "step": 68295 }, { "epoch": 2.013858174848887, "grad_norm": 3.421753872699723, "learning_rate": 2.2093953821274873e-06, "loss": 1.0795, "step": 68300 }, { "epoch": 2.0140056022408963, "grad_norm": 3.3333228485012047, "learning_rate": 2.208808920974804e-06, "loss": 1.0693, "step": 68305 }, { "epoch": 2.0141530296329058, "grad_norm": 3.0428861391726882, "learning_rate": 2.208222505173428e-06, "loss": 1.0278, "step": 68310 }, { "epoch": 2.0143004570249152, "grad_norm": 3.264673933552158, "learning_rate": 2.2076361347406137e-06, "loss": 1.0561, "step": 68315 }, { "epoch": 2.0144478844169247, "grad_norm": 3.353842488927946, "learning_rate": 2.207049809693619e-06, "loss": 1.0562, "step": 68320 }, { "epoch": 2.014595311808934, "grad_norm": 3.2829387954779414, "learning_rate": 2.206463530049695e-06, "loss": 1.0484, "step": 68325 }, { "epoch": 2.0147427392009436, "grad_norm": 3.11758344378314, "learning_rate": 2.2058772958260945e-06, "loss": 1.0485, "step": 68330 }, { "epoch": 2.014890166592953, "grad_norm": 3.4148911079255626, "learning_rate": 2.2052911070400667e-06, "loss": 1.0603, "step": 68335 }, { "epoch": 2.0150375939849625, "grad_norm": 3.29410585085009, "learning_rate": 2.204704963708863e-06, "loss": 1.0695, "step": 68340 }, { "epoch": 2.015185021376972, "grad_norm": 3.2030203980349787, "learning_rate": 2.2041188658497294e-06, "loss": 1.0444, "step": 68345 }, { "epoch": 2.0153324487689814, "grad_norm": 3.2122341233266942, "learning_rate": 2.2035328134799145e-06, "loss": 1.0757, "step": 68350 }, { "epoch": 2.015479876160991, "grad_norm": 3.461857597093626, "learning_rate": 2.2029468066166614e-06, "loss": 1.058, "step": 68355 }, { "epoch": 2.0156273035530003, "grad_norm": 3.377319241388079, "learning_rate": 2.202360845277216e-06, "loss": 1.0696, "step": 68360 }, { "epoch": 2.0157747309450094, "grad_norm": 3.176534502627973, "learning_rate": 2.2017749294788187e-06, "loss": 1.0437, "step": 68365 }, { "epoch": 2.015922158337019, "grad_norm": 3.129037290798058, "learning_rate": 2.2011890592387134e-06, "loss": 1.0524, "step": 68370 }, { "epoch": 2.0160695857290283, "grad_norm": 3.544271005571151, "learning_rate": 2.2006032345741382e-06, "loss": 1.0674, "step": 68375 }, { "epoch": 2.0162170131210377, "grad_norm": 3.4237249062845385, "learning_rate": 2.200017455502332e-06, "loss": 1.0062, "step": 68380 }, { "epoch": 2.016364440513047, "grad_norm": 3.2652224805663836, "learning_rate": 2.1994317220405324e-06, "loss": 1.0425, "step": 68385 }, { "epoch": 2.0165118679050567, "grad_norm": 3.1797758123785353, "learning_rate": 2.1988460342059734e-06, "loss": 1.0924, "step": 68390 }, { "epoch": 2.016659295297066, "grad_norm": 3.270750361843639, "learning_rate": 2.1982603920158925e-06, "loss": 1.0879, "step": 68395 }, { "epoch": 2.0168067226890756, "grad_norm": 3.428948027571656, "learning_rate": 2.197674795487521e-06, "loss": 1.0836, "step": 68400 }, { "epoch": 2.016954150081085, "grad_norm": 3.241824545433827, "learning_rate": 2.1970892446380916e-06, "loss": 1.0403, "step": 68405 }, { "epoch": 2.0171015774730945, "grad_norm": 3.3949475615488014, "learning_rate": 2.1965037394848334e-06, "loss": 1.0626, "step": 68410 }, { "epoch": 2.017249004865104, "grad_norm": 3.2513866016263937, "learning_rate": 2.195918280044978e-06, "loss": 0.969, "step": 68415 }, { "epoch": 2.0173964322571134, "grad_norm": 3.1865108547613374, "learning_rate": 2.19533286633575e-06, "loss": 1.0765, "step": 68420 }, { "epoch": 2.017543859649123, "grad_norm": 3.411577691460936, "learning_rate": 2.194747498374379e-06, "loss": 1.0842, "step": 68425 }, { "epoch": 2.0176912870411323, "grad_norm": 3.1928480816193052, "learning_rate": 2.1941621761780884e-06, "loss": 1.0569, "step": 68430 }, { "epoch": 2.017838714433142, "grad_norm": 3.136460966372188, "learning_rate": 2.193576899764103e-06, "loss": 1.0075, "step": 68435 }, { "epoch": 2.0179861418251512, "grad_norm": 3.4065334386702584, "learning_rate": 2.1929916691496446e-06, "loss": 1.052, "step": 68440 }, { "epoch": 2.0181335692171607, "grad_norm": 3.258747811947046, "learning_rate": 2.1924064843519326e-06, "loss": 1.0467, "step": 68445 }, { "epoch": 2.01828099660917, "grad_norm": 3.3598502850738527, "learning_rate": 2.19182134538819e-06, "loss": 1.0607, "step": 68450 }, { "epoch": 2.0184284240011796, "grad_norm": 3.286919007075537, "learning_rate": 2.191236252275633e-06, "loss": 1.0511, "step": 68455 }, { "epoch": 2.018575851393189, "grad_norm": 3.3027874583472827, "learning_rate": 2.1906512050314795e-06, "loss": 1.0553, "step": 68460 }, { "epoch": 2.018723278785198, "grad_norm": 3.3216303689219853, "learning_rate": 2.190066203672944e-06, "loss": 1.0744, "step": 68465 }, { "epoch": 2.0188707061772075, "grad_norm": 3.3124418328099736, "learning_rate": 2.1894812482172425e-06, "loss": 1.0458, "step": 68470 }, { "epoch": 2.019018133569217, "grad_norm": 3.254377875790627, "learning_rate": 2.188896338681586e-06, "loss": 1.0214, "step": 68475 }, { "epoch": 2.0191655609612265, "grad_norm": 3.2964616491449155, "learning_rate": 2.1883114750831897e-06, "loss": 1.0916, "step": 68480 }, { "epoch": 2.019312988353236, "grad_norm": 3.290693544515214, "learning_rate": 2.187726657439259e-06, "loss": 1.0467, "step": 68485 }, { "epoch": 2.0194604157452454, "grad_norm": 3.440892322511445, "learning_rate": 2.1871418857670074e-06, "loss": 1.0382, "step": 68490 }, { "epoch": 2.019607843137255, "grad_norm": 3.1781226033045193, "learning_rate": 2.186557160083639e-06, "loss": 1.0741, "step": 68495 }, { "epoch": 2.0197552705292643, "grad_norm": 3.2939197627953405, "learning_rate": 2.185972480406362e-06, "loss": 1.0995, "step": 68500 }, { "epoch": 2.0197552705292643, "eval_loss": 1.0853028297424316, "eval_runtime": 4.2625, "eval_samples_per_second": 92.903, "eval_steps_per_second": 3.05, "step": 68500 }, { "epoch": 2.0199026979212737, "grad_norm": 3.4100337196266484, "learning_rate": 2.1853878467523817e-06, "loss": 1.0269, "step": 68505 }, { "epoch": 2.020050125313283, "grad_norm": 3.5170028155088673, "learning_rate": 2.184803259138898e-06, "loss": 1.0613, "step": 68510 }, { "epoch": 2.0201975527052927, "grad_norm": 3.3261891714451632, "learning_rate": 2.1842187175831185e-06, "loss": 1.0571, "step": 68515 }, { "epoch": 2.020344980097302, "grad_norm": 3.288187712916523, "learning_rate": 2.1836342221022394e-06, "loss": 1.0743, "step": 68520 }, { "epoch": 2.0204924074893116, "grad_norm": 3.1743298335585077, "learning_rate": 2.1830497727134624e-06, "loss": 1.0087, "step": 68525 }, { "epoch": 2.020639834881321, "grad_norm": 3.3295846864921548, "learning_rate": 2.182465369433985e-06, "loss": 1.0291, "step": 68530 }, { "epoch": 2.0207872622733305, "grad_norm": 3.239215867393445, "learning_rate": 2.1818810122810036e-06, "loss": 1.0416, "step": 68535 }, { "epoch": 2.02093468966534, "grad_norm": 3.2527812793068898, "learning_rate": 2.1812967012717146e-06, "loss": 1.0366, "step": 68540 }, { "epoch": 2.0210821170573494, "grad_norm": 3.2654055619635307, "learning_rate": 2.180712436423311e-06, "loss": 1.023, "step": 68545 }, { "epoch": 2.021229544449359, "grad_norm": 3.2829104966483738, "learning_rate": 2.180128217752986e-06, "loss": 1.0391, "step": 68550 }, { "epoch": 2.0213769718413683, "grad_norm": 3.2492143274683127, "learning_rate": 2.1795440452779323e-06, "loss": 1.0723, "step": 68555 }, { "epoch": 2.0215243992333773, "grad_norm": 3.163838425284601, "learning_rate": 2.1789599190153353e-06, "loss": 1.0457, "step": 68560 }, { "epoch": 2.021671826625387, "grad_norm": 3.478215584311213, "learning_rate": 2.1783758389823897e-06, "loss": 1.0341, "step": 68565 }, { "epoch": 2.0218192540173963, "grad_norm": 3.2848937909640914, "learning_rate": 2.1777918051962782e-06, "loss": 1.0653, "step": 68570 }, { "epoch": 2.0219666814094057, "grad_norm": 3.2409743837002734, "learning_rate": 2.1772078176741882e-06, "loss": 1.0781, "step": 68575 }, { "epoch": 2.022114108801415, "grad_norm": 3.2193725444301493, "learning_rate": 2.1766238764333055e-06, "loss": 1.0542, "step": 68580 }, { "epoch": 2.0222615361934246, "grad_norm": 3.347804125993529, "learning_rate": 2.1760399814908087e-06, "loss": 1.0713, "step": 68585 }, { "epoch": 2.022408963585434, "grad_norm": 3.246214357729553, "learning_rate": 2.175456132863886e-06, "loss": 1.0685, "step": 68590 }, { "epoch": 2.0225563909774436, "grad_norm": 3.2877271424925274, "learning_rate": 2.1748723305697126e-06, "loss": 1.0556, "step": 68595 }, { "epoch": 2.022703818369453, "grad_norm": 3.3021080506961935, "learning_rate": 2.1742885746254702e-06, "loss": 1.0647, "step": 68600 }, { "epoch": 2.0228512457614625, "grad_norm": 3.267937047565773, "learning_rate": 2.1737048650483353e-06, "loss": 1.034, "step": 68605 }, { "epoch": 2.022998673153472, "grad_norm": 3.3422425340979496, "learning_rate": 2.173121201855484e-06, "loss": 1.0595, "step": 68610 }, { "epoch": 2.0231461005454814, "grad_norm": 3.575684555030549, "learning_rate": 2.172537585064093e-06, "loss": 1.0948, "step": 68615 }, { "epoch": 2.023293527937491, "grad_norm": 3.480195482375923, "learning_rate": 2.1719540146913347e-06, "loss": 1.0937, "step": 68620 }, { "epoch": 2.0234409553295003, "grad_norm": 3.3187026916875855, "learning_rate": 2.171370490754381e-06, "loss": 1.0517, "step": 68625 }, { "epoch": 2.0235883827215098, "grad_norm": 3.4482160114437455, "learning_rate": 2.170787013270405e-06, "loss": 1.0686, "step": 68630 }, { "epoch": 2.023735810113519, "grad_norm": 3.0851750441561117, "learning_rate": 2.1702035822565727e-06, "loss": 0.9995, "step": 68635 }, { "epoch": 2.0238832375055287, "grad_norm": 3.274600887265043, "learning_rate": 2.169620197730054e-06, "loss": 1.0586, "step": 68640 }, { "epoch": 2.024030664897538, "grad_norm": 3.2705313410690318, "learning_rate": 2.169036859708016e-06, "loss": 1.0042, "step": 68645 }, { "epoch": 2.0241780922895476, "grad_norm": 3.20201871302567, "learning_rate": 2.1684535682076232e-06, "loss": 1.0326, "step": 68650 }, { "epoch": 2.024325519681557, "grad_norm": 3.365580324204669, "learning_rate": 2.1678703232460415e-06, "loss": 1.0976, "step": 68655 }, { "epoch": 2.024472947073566, "grad_norm": 3.3852729186127806, "learning_rate": 2.1672871248404298e-06, "loss": 1.0419, "step": 68660 }, { "epoch": 2.0246203744655755, "grad_norm": 3.175744589178088, "learning_rate": 2.166703973007954e-06, "loss": 1.0537, "step": 68665 }, { "epoch": 2.024767801857585, "grad_norm": 3.280291134122203, "learning_rate": 2.166120867765769e-06, "loss": 1.0631, "step": 68670 }, { "epoch": 2.0249152292495944, "grad_norm": 3.229945516924208, "learning_rate": 2.165537809131039e-06, "loss": 1.0528, "step": 68675 }, { "epoch": 2.025062656641604, "grad_norm": 3.0449966096136674, "learning_rate": 2.164954797120917e-06, "loss": 0.9978, "step": 68680 }, { "epoch": 2.0252100840336134, "grad_norm": 3.2406231840968154, "learning_rate": 2.1643718317525595e-06, "loss": 1.0648, "step": 68685 }, { "epoch": 2.025357511425623, "grad_norm": 3.3754143796308194, "learning_rate": 2.163788913043122e-06, "loss": 1.0219, "step": 68690 }, { "epoch": 2.0255049388176323, "grad_norm": 3.36214818984622, "learning_rate": 2.163206041009757e-06, "loss": 1.0647, "step": 68695 }, { "epoch": 2.0256523662096417, "grad_norm": 3.41679918493395, "learning_rate": 2.1626232156696175e-06, "loss": 1.0227, "step": 68700 }, { "epoch": 2.025799793601651, "grad_norm": 3.310060096519927, "learning_rate": 2.1620404370398506e-06, "loss": 1.0494, "step": 68705 }, { "epoch": 2.0259472209936606, "grad_norm": 3.217092870206649, "learning_rate": 2.161457705137608e-06, "loss": 1.0303, "step": 68710 }, { "epoch": 2.02609464838567, "grad_norm": 3.395376838400806, "learning_rate": 2.1608750199800354e-06, "loss": 1.0442, "step": 68715 }, { "epoch": 2.0262420757776796, "grad_norm": 3.3695499734949848, "learning_rate": 2.1602923815842805e-06, "loss": 1.0861, "step": 68720 }, { "epoch": 2.026389503169689, "grad_norm": 3.373948411316422, "learning_rate": 2.159709789967487e-06, "loss": 1.0451, "step": 68725 }, { "epoch": 2.0265369305616985, "grad_norm": 3.3585512107475943, "learning_rate": 2.159127245146801e-06, "loss": 1.0475, "step": 68730 }, { "epoch": 2.026684357953708, "grad_norm": 3.4210501488834564, "learning_rate": 2.158544747139359e-06, "loss": 1.0606, "step": 68735 }, { "epoch": 2.0268317853457174, "grad_norm": 3.3760647400145225, "learning_rate": 2.1579622959623083e-06, "loss": 1.1096, "step": 68740 }, { "epoch": 2.026979212737727, "grad_norm": 3.236304629216482, "learning_rate": 2.1573798916327818e-06, "loss": 1.0838, "step": 68745 }, { "epoch": 2.0271266401297363, "grad_norm": 3.360369216915078, "learning_rate": 2.1567975341679234e-06, "loss": 1.0773, "step": 68750 }, { "epoch": 2.0272740675217453, "grad_norm": 3.203354827491303, "learning_rate": 2.1562152235848657e-06, "loss": 0.995, "step": 68755 }, { "epoch": 2.027421494913755, "grad_norm": 3.33753500096053, "learning_rate": 2.155632959900744e-06, "loss": 1.0561, "step": 68760 }, { "epoch": 2.0275689223057642, "grad_norm": 3.3007092966183698, "learning_rate": 2.155050743132696e-06, "loss": 1.0484, "step": 68765 }, { "epoch": 2.0277163496977737, "grad_norm": 3.2877623575636252, "learning_rate": 2.154468573297847e-06, "loss": 1.0703, "step": 68770 }, { "epoch": 2.027863777089783, "grad_norm": 3.479994767997442, "learning_rate": 2.153886450413336e-06, "loss": 1.0687, "step": 68775 }, { "epoch": 2.0280112044817926, "grad_norm": 3.3250671891367536, "learning_rate": 2.1533043744962865e-06, "loss": 1.0747, "step": 68780 }, { "epoch": 2.028158631873802, "grad_norm": 3.3021880123181084, "learning_rate": 2.1527223455638294e-06, "loss": 1.0674, "step": 68785 }, { "epoch": 2.0283060592658115, "grad_norm": 3.290337301938233, "learning_rate": 2.152140363633091e-06, "loss": 1.0383, "step": 68790 }, { "epoch": 2.028453486657821, "grad_norm": 3.2592160950056637, "learning_rate": 2.151558428721197e-06, "loss": 1.0414, "step": 68795 }, { "epoch": 2.0286009140498305, "grad_norm": 3.325457801745984, "learning_rate": 2.150976540845272e-06, "loss": 1.0497, "step": 68800 }, { "epoch": 2.02874834144184, "grad_norm": 3.284994314971305, "learning_rate": 2.150394700022437e-06, "loss": 1.0531, "step": 68805 }, { "epoch": 2.0288957688338494, "grad_norm": 3.390191473131954, "learning_rate": 2.1498129062698153e-06, "loss": 1.0514, "step": 68810 }, { "epoch": 2.029043196225859, "grad_norm": 3.3491238919912334, "learning_rate": 2.1492311596045276e-06, "loss": 1.0303, "step": 68815 }, { "epoch": 2.0291906236178683, "grad_norm": 3.2517874787371297, "learning_rate": 2.1486494600436877e-06, "loss": 1.0577, "step": 68820 }, { "epoch": 2.0293380510098777, "grad_norm": 3.3367388854503095, "learning_rate": 2.1480678076044194e-06, "loss": 1.0591, "step": 68825 }, { "epoch": 2.029485478401887, "grad_norm": 3.4896746309303226, "learning_rate": 2.147486202303833e-06, "loss": 1.0995, "step": 68830 }, { "epoch": 2.0296329057938967, "grad_norm": 3.3396856237834585, "learning_rate": 2.1469046441590453e-06, "loss": 1.0631, "step": 68835 }, { "epoch": 2.029780333185906, "grad_norm": 3.290335357334856, "learning_rate": 2.1463231331871706e-06, "loss": 1.0766, "step": 68840 }, { "epoch": 2.0299277605779156, "grad_norm": 3.3093955379867044, "learning_rate": 2.1457416694053158e-06, "loss": 1.0175, "step": 68845 }, { "epoch": 2.030075187969925, "grad_norm": 3.2976402251685553, "learning_rate": 2.145160252830597e-06, "loss": 1.072, "step": 68850 }, { "epoch": 2.030222615361934, "grad_norm": 3.3693370978194954, "learning_rate": 2.144578883480119e-06, "loss": 1.0636, "step": 68855 }, { "epoch": 2.0303700427539435, "grad_norm": 3.2671008460923887, "learning_rate": 2.1439975613709908e-06, "loss": 1.0779, "step": 68860 }, { "epoch": 2.030517470145953, "grad_norm": 3.38093638942178, "learning_rate": 2.1434162865203176e-06, "loss": 1.0439, "step": 68865 }, { "epoch": 2.0306648975379624, "grad_norm": 3.1315837903525705, "learning_rate": 2.1428350589452053e-06, "loss": 1.0006, "step": 68870 }, { "epoch": 2.030812324929972, "grad_norm": 3.314787939950769, "learning_rate": 2.1422538786627563e-06, "loss": 1.073, "step": 68875 }, { "epoch": 2.0309597523219813, "grad_norm": 3.3217968891471004, "learning_rate": 2.1416727456900725e-06, "loss": 1.0381, "step": 68880 }, { "epoch": 2.031107179713991, "grad_norm": 3.4709629534057034, "learning_rate": 2.1410916600442553e-06, "loss": 1.0588, "step": 68885 }, { "epoch": 2.0312546071060003, "grad_norm": 3.4164458526859556, "learning_rate": 2.140510621742404e-06, "loss": 1.0647, "step": 68890 }, { "epoch": 2.0314020344980097, "grad_norm": 3.2936787814759465, "learning_rate": 2.1399296308016148e-06, "loss": 1.0818, "step": 68895 }, { "epoch": 2.031549461890019, "grad_norm": 3.2626473703225045, "learning_rate": 2.139348687238984e-06, "loss": 1.0593, "step": 68900 }, { "epoch": 2.0316968892820286, "grad_norm": 3.249327763477123, "learning_rate": 2.1387677910716076e-06, "loss": 1.0078, "step": 68905 }, { "epoch": 2.031844316674038, "grad_norm": 3.236152253828322, "learning_rate": 2.1381869423165785e-06, "loss": 1.0621, "step": 68910 }, { "epoch": 2.0319917440660475, "grad_norm": 3.2440091604369363, "learning_rate": 2.1376061409909908e-06, "loss": 1.0798, "step": 68915 }, { "epoch": 2.032139171458057, "grad_norm": 3.504365085054937, "learning_rate": 2.1370253871119307e-06, "loss": 1.0943, "step": 68920 }, { "epoch": 2.0322865988500665, "grad_norm": 3.1696865286401574, "learning_rate": 2.136444680696493e-06, "loss": 1.02, "step": 68925 }, { "epoch": 2.032434026242076, "grad_norm": 3.4842597756411013, "learning_rate": 2.135864021761761e-06, "loss": 1.0696, "step": 68930 }, { "epoch": 2.0325814536340854, "grad_norm": 3.238969453892221, "learning_rate": 2.1352834103248256e-06, "loss": 1.0134, "step": 68935 }, { "epoch": 2.032728881026095, "grad_norm": 3.470520116087111, "learning_rate": 2.134702846402768e-06, "loss": 1.0647, "step": 68940 }, { "epoch": 2.0328763084181043, "grad_norm": 3.244033521059376, "learning_rate": 2.1341223300126737e-06, "loss": 1.0427, "step": 68945 }, { "epoch": 2.0330237358101133, "grad_norm": 3.2703714225519165, "learning_rate": 2.133541861171625e-06, "loss": 1.0665, "step": 68950 }, { "epoch": 2.0331711632021228, "grad_norm": 3.3953264246906847, "learning_rate": 2.1329614398967027e-06, "loss": 1.0268, "step": 68955 }, { "epoch": 2.0333185905941322, "grad_norm": 3.094424162657514, "learning_rate": 2.1323810662049886e-06, "loss": 1.0316, "step": 68960 }, { "epoch": 2.0334660179861417, "grad_norm": 3.149034351979597, "learning_rate": 2.1318007401135573e-06, "loss": 1.079, "step": 68965 }, { "epoch": 2.033613445378151, "grad_norm": 3.2736738317075296, "learning_rate": 2.131220461639486e-06, "loss": 1.0266, "step": 68970 }, { "epoch": 2.0337608727701606, "grad_norm": 3.4057013985912787, "learning_rate": 2.1306402307998524e-06, "loss": 1.0723, "step": 68975 }, { "epoch": 2.03390830016217, "grad_norm": 3.3398622209382887, "learning_rate": 2.1300600476117282e-06, "loss": 1.0924, "step": 68980 }, { "epoch": 2.0340557275541795, "grad_norm": 3.4596410946937475, "learning_rate": 2.129479912092187e-06, "loss": 1.0559, "step": 68985 }, { "epoch": 2.034203154946189, "grad_norm": 3.257304989203643, "learning_rate": 2.1288998242583013e-06, "loss": 1.0472, "step": 68990 }, { "epoch": 2.0343505823381984, "grad_norm": 3.273870535258819, "learning_rate": 2.1283197841271365e-06, "loss": 1.0456, "step": 68995 }, { "epoch": 2.034498009730208, "grad_norm": 3.227232097687979, "learning_rate": 2.1277397917157666e-06, "loss": 1.063, "step": 69000 }, { "epoch": 2.034498009730208, "eval_loss": 1.0859278440475464, "eval_runtime": 4.169, "eval_samples_per_second": 94.988, "eval_steps_per_second": 3.118, "step": 69000 }, { "epoch": 2.0346454371222173, "grad_norm": 3.3451420684743614, "learning_rate": 2.1271598470412533e-06, "loss": 1.0601, "step": 69005 }, { "epoch": 2.034792864514227, "grad_norm": 3.4691574299670394, "learning_rate": 2.126579950120667e-06, "loss": 1.0889, "step": 69010 }, { "epoch": 2.0349402919062363, "grad_norm": 3.147172406935149, "learning_rate": 2.1260001009710686e-06, "loss": 1.0484, "step": 69015 }, { "epoch": 2.0350877192982457, "grad_norm": 3.2898973379834486, "learning_rate": 2.1254202996095207e-06, "loss": 1.0792, "step": 69020 }, { "epoch": 2.035235146690255, "grad_norm": 3.311887190715968, "learning_rate": 2.1248405460530877e-06, "loss": 1.0272, "step": 69025 }, { "epoch": 2.0353825740822646, "grad_norm": 3.277115951035665, "learning_rate": 2.1242608403188244e-06, "loss": 1.0723, "step": 69030 }, { "epoch": 2.035530001474274, "grad_norm": 3.272670845624608, "learning_rate": 2.123681182423795e-06, "loss": 1.0574, "step": 69035 }, { "epoch": 2.0356774288662836, "grad_norm": 3.3024672306450564, "learning_rate": 2.1231015723850525e-06, "loss": 1.0445, "step": 69040 }, { "epoch": 2.0358248562582926, "grad_norm": 3.2426969554246003, "learning_rate": 2.122522010219654e-06, "loss": 1.0393, "step": 69045 }, { "epoch": 2.035972283650302, "grad_norm": 3.345493860414146, "learning_rate": 2.1219424959446536e-06, "loss": 1.0455, "step": 69050 }, { "epoch": 2.0361197110423115, "grad_norm": 3.2381142128997586, "learning_rate": 2.1213630295771045e-06, "loss": 1.0795, "step": 69055 }, { "epoch": 2.036267138434321, "grad_norm": 3.3270483307655, "learning_rate": 2.1207836111340582e-06, "loss": 1.0676, "step": 69060 }, { "epoch": 2.0364145658263304, "grad_norm": 3.33935406860625, "learning_rate": 2.1202042406325635e-06, "loss": 1.0097, "step": 69065 }, { "epoch": 2.03656199321834, "grad_norm": 3.372632492959214, "learning_rate": 2.119624918089671e-06, "loss": 1.069, "step": 69070 }, { "epoch": 2.0367094206103493, "grad_norm": 3.328599302068072, "learning_rate": 2.119045643522428e-06, "loss": 1.0823, "step": 69075 }, { "epoch": 2.0368568480023588, "grad_norm": 3.2725109777850454, "learning_rate": 2.1184664169478763e-06, "loss": 1.0471, "step": 69080 }, { "epoch": 2.0370042753943682, "grad_norm": 3.332159659578329, "learning_rate": 2.117887238383067e-06, "loss": 1.0625, "step": 69085 }, { "epoch": 2.0371517027863777, "grad_norm": 3.4147244217988306, "learning_rate": 2.1173081078450372e-06, "loss": 1.0702, "step": 69090 }, { "epoch": 2.037299130178387, "grad_norm": 3.345963222626381, "learning_rate": 2.116729025350831e-06, "loss": 1.0566, "step": 69095 }, { "epoch": 2.0374465575703966, "grad_norm": 3.2603628653335033, "learning_rate": 2.1161499909174894e-06, "loss": 1.0116, "step": 69100 }, { "epoch": 2.037593984962406, "grad_norm": 3.368123995515117, "learning_rate": 2.115571004562047e-06, "loss": 1.1181, "step": 69105 }, { "epoch": 2.0377414123544155, "grad_norm": 3.2866660472136764, "learning_rate": 2.114992066301547e-06, "loss": 1.0379, "step": 69110 }, { "epoch": 2.037888839746425, "grad_norm": 3.2437009293116343, "learning_rate": 2.1144131761530216e-06, "loss": 1.0434, "step": 69115 }, { "epoch": 2.0380362671384344, "grad_norm": 3.4098370356576044, "learning_rate": 2.1138343341335053e-06, "loss": 1.0295, "step": 69120 }, { "epoch": 2.038183694530444, "grad_norm": 3.3079901907912963, "learning_rate": 2.113255540260033e-06, "loss": 1.0854, "step": 69125 }, { "epoch": 2.0383311219224534, "grad_norm": 3.2890417145926016, "learning_rate": 2.1126767945496346e-06, "loss": 1.0438, "step": 69130 }, { "epoch": 2.038478549314463, "grad_norm": 3.403351954119557, "learning_rate": 2.1120980970193414e-06, "loss": 1.0478, "step": 69135 }, { "epoch": 2.0386259767064723, "grad_norm": 3.4342305453139645, "learning_rate": 2.111519447686182e-06, "loss": 1.0586, "step": 69140 }, { "epoch": 2.0387734040984813, "grad_norm": 3.2754720663763677, "learning_rate": 2.1109408465671836e-06, "loss": 1.0252, "step": 69145 }, { "epoch": 2.0389208314904907, "grad_norm": 3.3083381203735795, "learning_rate": 2.1103622936793743e-06, "loss": 1.0633, "step": 69150 }, { "epoch": 2.0390682588825, "grad_norm": 3.101937562813606, "learning_rate": 2.109783789039775e-06, "loss": 1.0553, "step": 69155 }, { "epoch": 2.0392156862745097, "grad_norm": 3.4320408941234453, "learning_rate": 2.109205332665411e-06, "loss": 1.065, "step": 69160 }, { "epoch": 2.039363113666519, "grad_norm": 3.4437614283179125, "learning_rate": 2.108626924573303e-06, "loss": 1.0183, "step": 69165 }, { "epoch": 2.0395105410585286, "grad_norm": 3.379940183908759, "learning_rate": 2.108048564780472e-06, "loss": 1.0809, "step": 69170 }, { "epoch": 2.039657968450538, "grad_norm": 3.3267181479225303, "learning_rate": 2.107470253303939e-06, "loss": 1.0548, "step": 69175 }, { "epoch": 2.0398053958425475, "grad_norm": 3.3308172591171137, "learning_rate": 2.106891990160716e-06, "loss": 1.0396, "step": 69180 }, { "epoch": 2.039952823234557, "grad_norm": 3.2654493783038134, "learning_rate": 2.1063137753678255e-06, "loss": 1.0563, "step": 69185 }, { "epoch": 2.0401002506265664, "grad_norm": 3.4524167074244017, "learning_rate": 2.1057356089422763e-06, "loss": 1.0751, "step": 69190 }, { "epoch": 2.040247678018576, "grad_norm": 3.1205890327714036, "learning_rate": 2.105157490901087e-06, "loss": 1.0369, "step": 69195 }, { "epoch": 2.0403951054105853, "grad_norm": 3.2988812101495717, "learning_rate": 2.1045794212612655e-06, "loss": 1.0464, "step": 69200 }, { "epoch": 2.040542532802595, "grad_norm": 3.2192923446981174, "learning_rate": 2.104001400039823e-06, "loss": 1.063, "step": 69205 }, { "epoch": 2.0406899601946042, "grad_norm": 3.2771325092742702, "learning_rate": 2.1034234272537694e-06, "loss": 1.0287, "step": 69210 }, { "epoch": 2.0408373875866137, "grad_norm": 3.1352638688334356, "learning_rate": 2.1028455029201112e-06, "loss": 1.0551, "step": 69215 }, { "epoch": 2.040984814978623, "grad_norm": 3.4788745457076757, "learning_rate": 2.1022676270558565e-06, "loss": 1.0797, "step": 69220 }, { "epoch": 2.0411322423706326, "grad_norm": 3.4532372226707637, "learning_rate": 2.101689799678007e-06, "loss": 1.0863, "step": 69225 }, { "epoch": 2.041279669762642, "grad_norm": 3.301016431446618, "learning_rate": 2.1011120208035675e-06, "loss": 1.0939, "step": 69230 }, { "epoch": 2.0414270971546515, "grad_norm": 3.3897378920760577, "learning_rate": 2.1005342904495396e-06, "loss": 1.0399, "step": 69235 }, { "epoch": 2.0415745245466606, "grad_norm": 3.335984660684549, "learning_rate": 2.0999566086329233e-06, "loss": 1.0319, "step": 69240 }, { "epoch": 2.04172195193867, "grad_norm": 3.303562591775547, "learning_rate": 2.099378975370718e-06, "loss": 1.0875, "step": 69245 }, { "epoch": 2.0418693793306795, "grad_norm": 3.432507316911765, "learning_rate": 2.0988013906799234e-06, "loss": 1.07, "step": 69250 }, { "epoch": 2.042016806722689, "grad_norm": 3.2286109373861716, "learning_rate": 2.0982238545775295e-06, "loss": 1.0638, "step": 69255 }, { "epoch": 2.0421642341146984, "grad_norm": 3.5272463326506487, "learning_rate": 2.097646367080538e-06, "loss": 1.0846, "step": 69260 }, { "epoch": 2.042311661506708, "grad_norm": 3.2603909965137388, "learning_rate": 2.097068928205936e-06, "loss": 1.042, "step": 69265 }, { "epoch": 2.0424590888987173, "grad_norm": 3.3336164970439413, "learning_rate": 2.0964915379707216e-06, "loss": 1.0612, "step": 69270 }, { "epoch": 2.0426065162907268, "grad_norm": 3.311367059250554, "learning_rate": 2.0959141963918797e-06, "loss": 1.08, "step": 69275 }, { "epoch": 2.042753943682736, "grad_norm": 3.3136061317050522, "learning_rate": 2.095336903486402e-06, "loss": 1.0648, "step": 69280 }, { "epoch": 2.0429013710747457, "grad_norm": 3.4666275909209068, "learning_rate": 2.094759659271276e-06, "loss": 1.0582, "step": 69285 }, { "epoch": 2.043048798466755, "grad_norm": 3.350045697237515, "learning_rate": 2.094182463763485e-06, "loss": 1.0441, "step": 69290 }, { "epoch": 2.0431962258587646, "grad_norm": 3.29265195704189, "learning_rate": 2.093605316980018e-06, "loss": 1.037, "step": 69295 }, { "epoch": 2.043343653250774, "grad_norm": 3.1810561956703434, "learning_rate": 2.093028218937855e-06, "loss": 1.0172, "step": 69300 }, { "epoch": 2.0434910806427835, "grad_norm": 3.2695008549796056, "learning_rate": 2.0924511696539786e-06, "loss": 1.0605, "step": 69305 }, { "epoch": 2.043638508034793, "grad_norm": 3.4074623337067838, "learning_rate": 2.0918741691453687e-06, "loss": 1.1017, "step": 69310 }, { "epoch": 2.0437859354268024, "grad_norm": 3.5317753908947283, "learning_rate": 2.0912972174290054e-06, "loss": 1.0723, "step": 69315 }, { "epoch": 2.043933362818812, "grad_norm": 3.2858835620058757, "learning_rate": 2.0907203145218654e-06, "loss": 1.06, "step": 69320 }, { "epoch": 2.0440807902108213, "grad_norm": 3.442461128872114, "learning_rate": 2.090143460440925e-06, "loss": 1.0613, "step": 69325 }, { "epoch": 2.044228217602831, "grad_norm": 3.3486269846590115, "learning_rate": 2.089566655203158e-06, "loss": 1.0404, "step": 69330 }, { "epoch": 2.0443756449948403, "grad_norm": 3.280683379047726, "learning_rate": 2.08898989882554e-06, "loss": 0.9829, "step": 69335 }, { "epoch": 2.0445230723868493, "grad_norm": 3.3801415854120607, "learning_rate": 2.088413191325038e-06, "loss": 1.0754, "step": 69340 }, { "epoch": 2.0446704997788587, "grad_norm": 3.435347917913494, "learning_rate": 2.087836532718628e-06, "loss": 1.0416, "step": 69345 }, { "epoch": 2.044817927170868, "grad_norm": 3.3550916385023, "learning_rate": 2.0872599230232747e-06, "loss": 1.084, "step": 69350 }, { "epoch": 2.0449653545628776, "grad_norm": 3.4147618451922095, "learning_rate": 2.0866833622559474e-06, "loss": 1.0979, "step": 69355 }, { "epoch": 2.045112781954887, "grad_norm": 3.454389327961588, "learning_rate": 2.086106850433612e-06, "loss": 1.08, "step": 69360 }, { "epoch": 2.0452602093468966, "grad_norm": 3.1668859838421364, "learning_rate": 2.0855303875732295e-06, "loss": 1.0792, "step": 69365 }, { "epoch": 2.045407636738906, "grad_norm": 3.3468604648241724, "learning_rate": 2.08495397369177e-06, "loss": 1.0556, "step": 69370 }, { "epoch": 2.0455550641309155, "grad_norm": 3.4596324674262506, "learning_rate": 2.0843776088061886e-06, "loss": 1.1118, "step": 69375 }, { "epoch": 2.045702491522925, "grad_norm": 3.3420653493688395, "learning_rate": 2.0838012929334484e-06, "loss": 1.0556, "step": 69380 }, { "epoch": 2.0458499189149344, "grad_norm": 3.0771813784766002, "learning_rate": 2.0832250260905073e-06, "loss": 1.0166, "step": 69385 }, { "epoch": 2.045997346306944, "grad_norm": 3.2178831043687994, "learning_rate": 2.0826488082943234e-06, "loss": 1.0355, "step": 69390 }, { "epoch": 2.0461447736989533, "grad_norm": 3.326237584962653, "learning_rate": 2.082072639561853e-06, "loss": 1.0474, "step": 69395 }, { "epoch": 2.0462922010909628, "grad_norm": 3.3368775586142267, "learning_rate": 2.0814965199100485e-06, "loss": 1.0453, "step": 69400 }, { "epoch": 2.0464396284829722, "grad_norm": 3.5017508208572425, "learning_rate": 2.0809204493558646e-06, "loss": 1.0443, "step": 69405 }, { "epoch": 2.0465870558749817, "grad_norm": 3.5532341390572153, "learning_rate": 2.0803444279162543e-06, "loss": 1.0792, "step": 69410 }, { "epoch": 2.046734483266991, "grad_norm": 3.2058123234787406, "learning_rate": 2.0797684556081637e-06, "loss": 1.0365, "step": 69415 }, { "epoch": 2.0468819106590006, "grad_norm": 3.1876503763241493, "learning_rate": 2.0791925324485437e-06, "loss": 1.0095, "step": 69420 }, { "epoch": 2.04702933805101, "grad_norm": 3.3738853929011468, "learning_rate": 2.0786166584543412e-06, "loss": 1.0664, "step": 69425 }, { "epoch": 2.0471767654430195, "grad_norm": 3.322196681979673, "learning_rate": 2.0780408336425025e-06, "loss": 1.0441, "step": 69430 }, { "epoch": 2.0473241928350285, "grad_norm": 3.3627866904999095, "learning_rate": 2.077465058029972e-06, "loss": 1.0442, "step": 69435 }, { "epoch": 2.047471620227038, "grad_norm": 3.58913717736573, "learning_rate": 2.0768893316336902e-06, "loss": 1.0956, "step": 69440 }, { "epoch": 2.0476190476190474, "grad_norm": 3.328457941424868, "learning_rate": 2.0763136544706023e-06, "loss": 1.0456, "step": 69445 }, { "epoch": 2.047766475011057, "grad_norm": 3.377423658047555, "learning_rate": 2.0757380265576437e-06, "loss": 1.1043, "step": 69450 }, { "epoch": 2.0479139024030664, "grad_norm": 3.231448172520557, "learning_rate": 2.075162447911759e-06, "loss": 1.0644, "step": 69455 }, { "epoch": 2.048061329795076, "grad_norm": 3.2719120058112816, "learning_rate": 2.0745869185498787e-06, "loss": 1.0415, "step": 69460 }, { "epoch": 2.0482087571870853, "grad_norm": 3.3862473262341286, "learning_rate": 2.0740114384889427e-06, "loss": 1.072, "step": 69465 }, { "epoch": 2.0483561845790947, "grad_norm": 3.2632149008948943, "learning_rate": 2.073436007745884e-06, "loss": 1.0488, "step": 69470 }, { "epoch": 2.048503611971104, "grad_norm": 3.4421310567925545, "learning_rate": 2.0728606263376347e-06, "loss": 1.0416, "step": 69475 }, { "epoch": 2.0486510393631137, "grad_norm": 3.299138534742052, "learning_rate": 2.072285294281128e-06, "loss": 1.0737, "step": 69480 }, { "epoch": 2.048798466755123, "grad_norm": 3.4424460981854197, "learning_rate": 2.0717100115932907e-06, "loss": 1.0632, "step": 69485 }, { "epoch": 2.0489458941471326, "grad_norm": 3.309612977958748, "learning_rate": 2.0711347782910533e-06, "loss": 1.0262, "step": 69490 }, { "epoch": 2.049093321539142, "grad_norm": 3.2965162630242126, "learning_rate": 2.070559594391342e-06, "loss": 1.047, "step": 69495 }, { "epoch": 2.0492407489311515, "grad_norm": 3.3224392160665355, "learning_rate": 2.069984459911082e-06, "loss": 1.0377, "step": 69500 }, { "epoch": 2.0492407489311515, "eval_loss": 1.086931824684143, "eval_runtime": 4.2811, "eval_samples_per_second": 92.5, "eval_steps_per_second": 3.037, "step": 69500 }, { "epoch": 2.049388176323161, "grad_norm": 3.3215640782968854, "learning_rate": 2.069409374867198e-06, "loss": 1.0715, "step": 69505 }, { "epoch": 2.0495356037151704, "grad_norm": 3.2670209327903286, "learning_rate": 2.0688343392766127e-06, "loss": 1.0551, "step": 69510 }, { "epoch": 2.04968303110718, "grad_norm": 3.393975488788303, "learning_rate": 2.068259353156245e-06, "loss": 1.0471, "step": 69515 }, { "epoch": 2.0498304584991893, "grad_norm": 3.3184172014164974, "learning_rate": 2.0676844165230178e-06, "loss": 1.0495, "step": 69520 }, { "epoch": 2.049977885891199, "grad_norm": 3.4147854332443734, "learning_rate": 2.067109529393846e-06, "loss": 1.046, "step": 69525 }, { "epoch": 2.050125313283208, "grad_norm": 3.4643749987940233, "learning_rate": 2.0665346917856504e-06, "loss": 1.0847, "step": 69530 }, { "epoch": 2.0502727406752173, "grad_norm": 3.471988011111532, "learning_rate": 2.0659599037153423e-06, "loss": 1.0561, "step": 69535 }, { "epoch": 2.0504201680672267, "grad_norm": 3.5257192799664394, "learning_rate": 2.0653851651998367e-06, "loss": 1.0706, "step": 69540 }, { "epoch": 2.050567595459236, "grad_norm": 3.4158141871625554, "learning_rate": 2.0648104762560466e-06, "loss": 1.0554, "step": 69545 }, { "epoch": 2.0507150228512456, "grad_norm": 3.4635119903893705, "learning_rate": 2.0642358369008823e-06, "loss": 1.0252, "step": 69550 }, { "epoch": 2.050862450243255, "grad_norm": 3.3182988190205998, "learning_rate": 2.0636612471512555e-06, "loss": 1.0341, "step": 69555 }, { "epoch": 2.0510098776352645, "grad_norm": 3.346028649819127, "learning_rate": 2.06308670702407e-06, "loss": 1.0451, "step": 69560 }, { "epoch": 2.051157305027274, "grad_norm": 3.298411514934974, "learning_rate": 2.0625122165362345e-06, "loss": 1.0773, "step": 69565 }, { "epoch": 2.0513047324192835, "grad_norm": 3.496095270161652, "learning_rate": 2.061937775704654e-06, "loss": 1.0804, "step": 69570 }, { "epoch": 2.051452159811293, "grad_norm": 3.3282250945807537, "learning_rate": 2.0613633845462325e-06, "loss": 1.0488, "step": 69575 }, { "epoch": 2.0515995872033024, "grad_norm": 3.3833798915805735, "learning_rate": 2.060789043077871e-06, "loss": 1.0515, "step": 69580 }, { "epoch": 2.051747014595312, "grad_norm": 3.3405535788843275, "learning_rate": 2.060214751316471e-06, "loss": 1.0314, "step": 69585 }, { "epoch": 2.0518944419873213, "grad_norm": 3.3253130539079865, "learning_rate": 2.059640509278932e-06, "loss": 1.0762, "step": 69590 }, { "epoch": 2.0520418693793308, "grad_norm": 3.3157655415379508, "learning_rate": 2.0590663169821513e-06, "loss": 1.0673, "step": 69595 }, { "epoch": 2.05218929677134, "grad_norm": 3.515127459515221, "learning_rate": 2.0584921744430233e-06, "loss": 1.0655, "step": 69600 }, { "epoch": 2.0523367241633497, "grad_norm": 3.508298998584328, "learning_rate": 2.057918081678447e-06, "loss": 1.0276, "step": 69605 }, { "epoch": 2.052484151555359, "grad_norm": 3.3555716912413933, "learning_rate": 2.057344038705312e-06, "loss": 1.0045, "step": 69610 }, { "epoch": 2.0526315789473686, "grad_norm": 3.1987825527382654, "learning_rate": 2.0567700455405116e-06, "loss": 1.0017, "step": 69615 }, { "epoch": 2.052779006339378, "grad_norm": 3.4305310027936895, "learning_rate": 2.056196102200937e-06, "loss": 1.045, "step": 69620 }, { "epoch": 2.0529264337313875, "grad_norm": 3.3477899038317287, "learning_rate": 2.055622208703474e-06, "loss": 1.0841, "step": 69625 }, { "epoch": 2.0530738611233965, "grad_norm": 3.452863177989308, "learning_rate": 2.0550483650650147e-06, "loss": 1.0589, "step": 69630 }, { "epoch": 2.053221288515406, "grad_norm": 3.4399377131225006, "learning_rate": 2.0544745713024418e-06, "loss": 1.0614, "step": 69635 }, { "epoch": 2.0533687159074154, "grad_norm": 3.2378025592443076, "learning_rate": 2.0539008274326404e-06, "loss": 1.0756, "step": 69640 }, { "epoch": 2.053516143299425, "grad_norm": 3.399616211720732, "learning_rate": 2.0533271334724936e-06, "loss": 1.0411, "step": 69645 }, { "epoch": 2.0536635706914343, "grad_norm": 3.4949131570419083, "learning_rate": 2.052753489438883e-06, "loss": 1.0225, "step": 69650 }, { "epoch": 2.053810998083444, "grad_norm": 3.4554370175784515, "learning_rate": 2.0521798953486896e-06, "loss": 1.0869, "step": 69655 }, { "epoch": 2.0539584254754533, "grad_norm": 3.45296179567069, "learning_rate": 2.0516063512187922e-06, "loss": 1.0566, "step": 69660 }, { "epoch": 2.0541058528674627, "grad_norm": 3.329653005785365, "learning_rate": 2.051032857066066e-06, "loss": 1.0665, "step": 69665 }, { "epoch": 2.054253280259472, "grad_norm": 3.3426299752624278, "learning_rate": 2.0504594129073902e-06, "loss": 1.0493, "step": 69670 }, { "epoch": 2.0544007076514816, "grad_norm": 3.368361991110816, "learning_rate": 2.049886018759634e-06, "loss": 1.0847, "step": 69675 }, { "epoch": 2.054548135043491, "grad_norm": 3.3816116755514187, "learning_rate": 2.0493126746396755e-06, "loss": 1.0564, "step": 69680 }, { "epoch": 2.0546955624355006, "grad_norm": 3.389882707729191, "learning_rate": 2.048739380564382e-06, "loss": 1.0201, "step": 69685 }, { "epoch": 2.05484298982751, "grad_norm": 3.2204783044423526, "learning_rate": 2.0481661365506253e-06, "loss": 1.0546, "step": 69690 }, { "epoch": 2.0549904172195195, "grad_norm": 3.3626655636734064, "learning_rate": 2.0475929426152743e-06, "loss": 1.0493, "step": 69695 }, { "epoch": 2.055137844611529, "grad_norm": 3.2428439879250366, "learning_rate": 2.0470197987751924e-06, "loss": 1.0626, "step": 69700 }, { "epoch": 2.0552852720035384, "grad_norm": 3.456436024068012, "learning_rate": 2.0464467050472504e-06, "loss": 1.0596, "step": 69705 }, { "epoch": 2.055432699395548, "grad_norm": 3.3191255844078467, "learning_rate": 2.0458736614483064e-06, "loss": 1.0431, "step": 69710 }, { "epoch": 2.0555801267875573, "grad_norm": 3.278712429761889, "learning_rate": 2.0453006679952283e-06, "loss": 1.0376, "step": 69715 }, { "epoch": 2.0557275541795668, "grad_norm": 3.4945102743061045, "learning_rate": 2.0447277247048736e-06, "loss": 1.0798, "step": 69720 }, { "epoch": 2.055874981571576, "grad_norm": 3.3555857021446145, "learning_rate": 2.044154831594103e-06, "loss": 1.0687, "step": 69725 }, { "epoch": 2.0560224089635852, "grad_norm": 3.425563484394801, "learning_rate": 2.043581988679774e-06, "loss": 1.0663, "step": 69730 }, { "epoch": 2.0561698363555947, "grad_norm": 3.3537840195153503, "learning_rate": 2.0430091959787434e-06, "loss": 1.1013, "step": 69735 }, { "epoch": 2.056317263747604, "grad_norm": 3.4070654088215053, "learning_rate": 2.0424364535078667e-06, "loss": 1.029, "step": 69740 }, { "epoch": 2.0564646911396136, "grad_norm": 3.4452791898662873, "learning_rate": 2.0418637612839987e-06, "loss": 1.0757, "step": 69745 }, { "epoch": 2.056612118531623, "grad_norm": 3.374299443166215, "learning_rate": 2.0412911193239883e-06, "loss": 1.0494, "step": 69750 }, { "epoch": 2.0567595459236325, "grad_norm": 3.2005815656409675, "learning_rate": 2.040718527644688e-06, "loss": 1.0251, "step": 69755 }, { "epoch": 2.056906973315642, "grad_norm": 3.4188537350903414, "learning_rate": 2.040145986262947e-06, "loss": 1.0589, "step": 69760 }, { "epoch": 2.0570544007076514, "grad_norm": 3.3951632606046918, "learning_rate": 2.039573495195612e-06, "loss": 1.093, "step": 69765 }, { "epoch": 2.057201828099661, "grad_norm": 3.5457868183833514, "learning_rate": 2.0390010544595324e-06, "loss": 1.0566, "step": 69770 }, { "epoch": 2.0573492554916704, "grad_norm": 3.3360998436281815, "learning_rate": 2.038428664071547e-06, "loss": 1.0468, "step": 69775 }, { "epoch": 2.05749668288368, "grad_norm": 3.21316746946531, "learning_rate": 2.037856324048506e-06, "loss": 1.0246, "step": 69780 }, { "epoch": 2.0576441102756893, "grad_norm": 3.3736878881029795, "learning_rate": 2.037284034407245e-06, "loss": 1.0414, "step": 69785 }, { "epoch": 2.0577915376676987, "grad_norm": 3.3918990267247695, "learning_rate": 2.0367117951646086e-06, "loss": 1.0955, "step": 69790 }, { "epoch": 2.057938965059708, "grad_norm": 3.1451731774205447, "learning_rate": 2.0361396063374335e-06, "loss": 1.038, "step": 69795 }, { "epoch": 2.0580863924517176, "grad_norm": 3.2260746386686643, "learning_rate": 2.035567467942558e-06, "loss": 1.0353, "step": 69800 }, { "epoch": 2.058233819843727, "grad_norm": 3.320990387061516, "learning_rate": 2.0349953799968166e-06, "loss": 1.067, "step": 69805 }, { "epoch": 2.0583812472357366, "grad_norm": 3.430134384041786, "learning_rate": 2.0344233425170443e-06, "loss": 1.0529, "step": 69810 }, { "epoch": 2.058528674627746, "grad_norm": 3.2938270904030005, "learning_rate": 2.033851355520076e-06, "loss": 1.0144, "step": 69815 }, { "epoch": 2.0586761020197555, "grad_norm": 3.6041437508127667, "learning_rate": 2.0332794190227395e-06, "loss": 1.055, "step": 69820 }, { "epoch": 2.0588235294117645, "grad_norm": 3.511541227646144, "learning_rate": 2.032707533041867e-06, "loss": 1.0631, "step": 69825 }, { "epoch": 2.058970956803774, "grad_norm": 3.38307246422828, "learning_rate": 2.0321356975942857e-06, "loss": 1.0526, "step": 69830 }, { "epoch": 2.0591183841957834, "grad_norm": 3.5391177411150534, "learning_rate": 2.0315639126968236e-06, "loss": 1.0633, "step": 69835 }, { "epoch": 2.059265811587793, "grad_norm": 3.3091108886890477, "learning_rate": 2.0309921783663057e-06, "loss": 1.0424, "step": 69840 }, { "epoch": 2.0594132389798023, "grad_norm": 3.3694861946982337, "learning_rate": 2.030420494619555e-06, "loss": 1.071, "step": 69845 }, { "epoch": 2.059560666371812, "grad_norm": 3.34166157635754, "learning_rate": 2.0298488614733954e-06, "loss": 1.0608, "step": 69850 }, { "epoch": 2.0597080937638212, "grad_norm": 3.3387414739208685, "learning_rate": 2.0292772789446486e-06, "loss": 1.0604, "step": 69855 }, { "epoch": 2.0598555211558307, "grad_norm": 3.5302475810664466, "learning_rate": 2.0287057470501295e-06, "loss": 1.0344, "step": 69860 }, { "epoch": 2.06000294854784, "grad_norm": 3.3552433834621285, "learning_rate": 2.0281342658066627e-06, "loss": 1.0435, "step": 69865 }, { "epoch": 2.0601503759398496, "grad_norm": 3.4048843605868258, "learning_rate": 2.02756283523106e-06, "loss": 1.0704, "step": 69870 }, { "epoch": 2.060297803331859, "grad_norm": 3.4981291791717464, "learning_rate": 2.026991455340137e-06, "loss": 1.111, "step": 69875 }, { "epoch": 2.0604452307238685, "grad_norm": 3.437197143883403, "learning_rate": 2.0264201261507093e-06, "loss": 1.0481, "step": 69880 }, { "epoch": 2.060592658115878, "grad_norm": 3.3431854167846415, "learning_rate": 2.0258488476795855e-06, "loss": 1.0653, "step": 69885 }, { "epoch": 2.0607400855078875, "grad_norm": 3.396918068014988, "learning_rate": 2.0252776199435804e-06, "loss": 1.0436, "step": 69890 }, { "epoch": 2.060887512899897, "grad_norm": 3.3103350856090468, "learning_rate": 2.024706442959499e-06, "loss": 1.0436, "step": 69895 }, { "epoch": 2.0610349402919064, "grad_norm": 3.394139273200654, "learning_rate": 2.024135316744152e-06, "loss": 1.1085, "step": 69900 }, { "epoch": 2.061182367683916, "grad_norm": 3.2555646293845415, "learning_rate": 2.0235642413143426e-06, "loss": 1.0597, "step": 69905 }, { "epoch": 2.0613297950759253, "grad_norm": 3.3396190925777427, "learning_rate": 2.022993216686877e-06, "loss": 1.055, "step": 69910 }, { "epoch": 2.0614772224679347, "grad_norm": 3.150259344083833, "learning_rate": 2.0224222428785586e-06, "loss": 1.0446, "step": 69915 }, { "epoch": 2.0616246498599438, "grad_norm": 3.3428893605889995, "learning_rate": 2.0218513199061882e-06, "loss": 1.0656, "step": 69920 }, { "epoch": 2.061772077251953, "grad_norm": 3.225749680319671, "learning_rate": 2.0212804477865657e-06, "loss": 1.0457, "step": 69925 }, { "epoch": 2.0619195046439627, "grad_norm": 3.424058157108215, "learning_rate": 2.0207096265364916e-06, "loss": 1.044, "step": 69930 }, { "epoch": 2.062066932035972, "grad_norm": 3.1760345106035115, "learning_rate": 2.0201388561727586e-06, "loss": 1.0326, "step": 69935 }, { "epoch": 2.0622143594279816, "grad_norm": 3.281700691601361, "learning_rate": 2.019568136712168e-06, "loss": 1.0341, "step": 69940 }, { "epoch": 2.062361786819991, "grad_norm": 3.2902718501426165, "learning_rate": 2.0189974681715095e-06, "loss": 1.0092, "step": 69945 }, { "epoch": 2.0625092142120005, "grad_norm": 3.3731825582516617, "learning_rate": 2.018426850567577e-06, "loss": 1.1118, "step": 69950 }, { "epoch": 2.06265664160401, "grad_norm": 3.296199154881913, "learning_rate": 2.017856283917163e-06, "loss": 1.0442, "step": 69955 }, { "epoch": 2.0628040689960194, "grad_norm": 3.1803587179147375, "learning_rate": 2.017285768237053e-06, "loss": 1.0003, "step": 69960 }, { "epoch": 2.062951496388029, "grad_norm": 3.239466066236586, "learning_rate": 2.0167153035440406e-06, "loss": 1.0411, "step": 69965 }, { "epoch": 2.0630989237800383, "grad_norm": 3.461128187256106, "learning_rate": 2.0161448898549067e-06, "loss": 1.0697, "step": 69970 }, { "epoch": 2.063246351172048, "grad_norm": 3.315554738743651, "learning_rate": 2.0155745271864424e-06, "loss": 1.0503, "step": 69975 }, { "epoch": 2.0633937785640573, "grad_norm": 3.321848710118285, "learning_rate": 2.0150042155554265e-06, "loss": 1.048, "step": 69980 }, { "epoch": 2.0635412059560667, "grad_norm": 3.4040351505712487, "learning_rate": 2.0144339549786427e-06, "loss": 1.0521, "step": 69985 }, { "epoch": 2.063688633348076, "grad_norm": 3.4865541762737573, "learning_rate": 2.0138637454728714e-06, "loss": 1.0871, "step": 69990 }, { "epoch": 2.0638360607400856, "grad_norm": 3.3784581724961975, "learning_rate": 2.0132935870548924e-06, "loss": 1.0784, "step": 69995 }, { "epoch": 2.063983488132095, "grad_norm": 3.2402555605669603, "learning_rate": 2.0127234797414824e-06, "loss": 1.0493, "step": 70000 }, { "epoch": 2.063983488132095, "eval_loss": 1.0864062309265137, "eval_runtime": 4.189, "eval_samples_per_second": 94.533, "eval_steps_per_second": 3.103, "step": 70000 }, { "epoch": 2.0641309155241045, "grad_norm": 3.431022738383874, "learning_rate": 2.012153423549419e-06, "loss": 1.0663, "step": 70005 }, { "epoch": 2.064278342916114, "grad_norm": 3.4172282776235123, "learning_rate": 2.0115834184954746e-06, "loss": 1.0617, "step": 70010 }, { "epoch": 2.064425770308123, "grad_norm": 3.353324457821921, "learning_rate": 2.011013464596423e-06, "loss": 1.0702, "step": 70015 }, { "epoch": 2.0645731977001325, "grad_norm": 3.4508420237409547, "learning_rate": 2.0104435618690357e-06, "loss": 1.043, "step": 70020 }, { "epoch": 2.064720625092142, "grad_norm": 3.309712366686592, "learning_rate": 2.0098737103300825e-06, "loss": 1.0507, "step": 70025 }, { "epoch": 2.0648680524841514, "grad_norm": 3.1246251450205276, "learning_rate": 2.009303909996335e-06, "loss": 1.0128, "step": 70030 }, { "epoch": 2.065015479876161, "grad_norm": 3.342622813262161, "learning_rate": 2.0087341608845537e-06, "loss": 1.0138, "step": 70035 }, { "epoch": 2.0651629072681703, "grad_norm": 3.326556965550311, "learning_rate": 2.0081644630115116e-06, "loss": 1.0731, "step": 70040 }, { "epoch": 2.0653103346601798, "grad_norm": 3.4925581915502946, "learning_rate": 2.007594816393966e-06, "loss": 1.0727, "step": 70045 }, { "epoch": 2.0654577620521892, "grad_norm": 3.35510166572178, "learning_rate": 2.007025221048685e-06, "loss": 1.0098, "step": 70050 }, { "epoch": 2.0656051894441987, "grad_norm": 3.38337611995369, "learning_rate": 2.0064556769924265e-06, "loss": 1.0486, "step": 70055 }, { "epoch": 2.065752616836208, "grad_norm": 3.3692877461401745, "learning_rate": 2.00588618424195e-06, "loss": 1.0211, "step": 70060 }, { "epoch": 2.0659000442282176, "grad_norm": 3.422552210388866, "learning_rate": 2.0053167428140144e-06, "loss": 1.0158, "step": 70065 }, { "epoch": 2.066047471620227, "grad_norm": 3.2766077200461727, "learning_rate": 2.004747352725376e-06, "loss": 1.0296, "step": 70070 }, { "epoch": 2.0661948990122365, "grad_norm": 3.441934044527495, "learning_rate": 2.0041780139927908e-06, "loss": 1.0759, "step": 70075 }, { "epoch": 2.066342326404246, "grad_norm": 3.370095247296358, "learning_rate": 2.0036087266330096e-06, "loss": 1.0941, "step": 70080 }, { "epoch": 2.0664897537962554, "grad_norm": 3.656508682808422, "learning_rate": 2.003039490662786e-06, "loss": 1.0829, "step": 70085 }, { "epoch": 2.066637181188265, "grad_norm": 3.2472053055601435, "learning_rate": 2.00247030609887e-06, "loss": 1.0141, "step": 70090 }, { "epoch": 2.0667846085802744, "grad_norm": 3.358199768456287, "learning_rate": 2.0019011729580106e-06, "loss": 1.0578, "step": 70095 }, { "epoch": 2.066932035972284, "grad_norm": 3.339696709240737, "learning_rate": 2.0013320912569558e-06, "loss": 1.0298, "step": 70100 }, { "epoch": 2.0670794633642933, "grad_norm": 3.383283190801111, "learning_rate": 2.0007630610124502e-06, "loss": 1.0441, "step": 70105 }, { "epoch": 2.0672268907563027, "grad_norm": 3.309923731356119, "learning_rate": 2.0001940822412395e-06, "loss": 1.0849, "step": 70110 }, { "epoch": 2.067374318148312, "grad_norm": 3.5375144515393684, "learning_rate": 1.9996251549600677e-06, "loss": 1.0472, "step": 70115 }, { "epoch": 2.067521745540321, "grad_norm": 3.2428588951777457, "learning_rate": 1.999056279185671e-06, "loss": 1.0571, "step": 70120 }, { "epoch": 2.0676691729323307, "grad_norm": 3.3218772278594333, "learning_rate": 1.9984874549347958e-06, "loss": 1.0426, "step": 70125 }, { "epoch": 2.06781660032434, "grad_norm": 3.4043290008040175, "learning_rate": 1.9979186822241755e-06, "loss": 1.0452, "step": 70130 }, { "epoch": 2.0679640277163496, "grad_norm": 3.2229107152208933, "learning_rate": 1.997349961070549e-06, "loss": 1.0607, "step": 70135 }, { "epoch": 2.068111455108359, "grad_norm": 3.3319765364558287, "learning_rate": 1.996781291490653e-06, "loss": 1.0651, "step": 70140 }, { "epoch": 2.0682588825003685, "grad_norm": 3.349393788362367, "learning_rate": 1.996212673501216e-06, "loss": 0.9754, "step": 70145 }, { "epoch": 2.068406309892378, "grad_norm": 3.3682266875715507, "learning_rate": 1.9956441071189764e-06, "loss": 0.9994, "step": 70150 }, { "epoch": 2.0685537372843874, "grad_norm": 3.403543445661146, "learning_rate": 1.9950755923606613e-06, "loss": 1.0809, "step": 70155 }, { "epoch": 2.068701164676397, "grad_norm": 3.2483705219093406, "learning_rate": 1.9945071292430003e-06, "loss": 1.0442, "step": 70160 }, { "epoch": 2.0688485920684063, "grad_norm": 3.2970538438697825, "learning_rate": 1.9939387177827222e-06, "loss": 1.0238, "step": 70165 }, { "epoch": 2.068996019460416, "grad_norm": 3.4019534858280185, "learning_rate": 1.9933703579965522e-06, "loss": 1.0753, "step": 70170 }, { "epoch": 2.0691434468524252, "grad_norm": 3.292979910204609, "learning_rate": 1.9928020499012154e-06, "loss": 1.034, "step": 70175 }, { "epoch": 2.0692908742444347, "grad_norm": 3.461354190452239, "learning_rate": 1.9922337935134344e-06, "loss": 1.0501, "step": 70180 }, { "epoch": 2.069438301636444, "grad_norm": 3.515862591709059, "learning_rate": 1.9916655888499315e-06, "loss": 1.0492, "step": 70185 }, { "epoch": 2.0695857290284536, "grad_norm": 3.306430300386202, "learning_rate": 1.991097435927428e-06, "loss": 1.0348, "step": 70190 }, { "epoch": 2.069733156420463, "grad_norm": 3.178886916569, "learning_rate": 1.990529334762638e-06, "loss": 1.0521, "step": 70195 }, { "epoch": 2.0698805838124725, "grad_norm": 3.5159695457779887, "learning_rate": 1.9899612853722838e-06, "loss": 1.0488, "step": 70200 }, { "epoch": 2.070028011204482, "grad_norm": 3.265811091247162, "learning_rate": 1.9893932877730775e-06, "loss": 1.0353, "step": 70205 }, { "epoch": 2.0701754385964914, "grad_norm": 3.507388941796904, "learning_rate": 1.9888253419817343e-06, "loss": 1.0556, "step": 70210 }, { "epoch": 2.0703228659885005, "grad_norm": 3.495951471689176, "learning_rate": 1.9882574480149674e-06, "loss": 1.0602, "step": 70215 }, { "epoch": 2.07047029338051, "grad_norm": 3.4518448070706045, "learning_rate": 1.987689605889484e-06, "loss": 1.0831, "step": 70220 }, { "epoch": 2.0706177207725194, "grad_norm": 3.396918159864593, "learning_rate": 1.9871218156219996e-06, "loss": 1.0612, "step": 70225 }, { "epoch": 2.070765148164529, "grad_norm": 3.370289633003551, "learning_rate": 1.9865540772292155e-06, "loss": 1.0401, "step": 70230 }, { "epoch": 2.0709125755565383, "grad_norm": 3.436514737810689, "learning_rate": 1.985986390727845e-06, "loss": 1.0593, "step": 70235 }, { "epoch": 2.0710600029485478, "grad_norm": 3.277833992219236, "learning_rate": 1.9854187561345867e-06, "loss": 1.0334, "step": 70240 }, { "epoch": 2.071207430340557, "grad_norm": 3.4072471142354415, "learning_rate": 1.984851173466147e-06, "loss": 1.0538, "step": 70245 }, { "epoch": 2.0713548577325667, "grad_norm": 3.290081012299578, "learning_rate": 1.9842836427392276e-06, "loss": 1.0561, "step": 70250 }, { "epoch": 2.071502285124576, "grad_norm": 3.623249891792139, "learning_rate": 1.9837161639705274e-06, "loss": 1.0588, "step": 70255 }, { "epoch": 2.0716497125165856, "grad_norm": 3.233906053191371, "learning_rate": 1.983148737176746e-06, "loss": 1.011, "step": 70260 }, { "epoch": 2.071797139908595, "grad_norm": 3.1747983590481583, "learning_rate": 1.9825813623745823e-06, "loss": 1.0057, "step": 70265 }, { "epoch": 2.0719445673006045, "grad_norm": 3.413033109178517, "learning_rate": 1.982014039580729e-06, "loss": 1.0641, "step": 70270 }, { "epoch": 2.072091994692614, "grad_norm": 3.5448089740114237, "learning_rate": 1.981446768811881e-06, "loss": 1.098, "step": 70275 }, { "epoch": 2.0722394220846234, "grad_norm": 3.298915258488972, "learning_rate": 1.9808795500847313e-06, "loss": 1.0499, "step": 70280 }, { "epoch": 2.072386849476633, "grad_norm": 3.4951102192744576, "learning_rate": 1.980312383415971e-06, "loss": 1.0415, "step": 70285 }, { "epoch": 2.0725342768686423, "grad_norm": 3.361552458266202, "learning_rate": 1.9797452688222913e-06, "loss": 1.0863, "step": 70290 }, { "epoch": 2.072681704260652, "grad_norm": 3.2108056730067465, "learning_rate": 1.9791782063203754e-06, "loss": 1.0693, "step": 70295 }, { "epoch": 2.0728291316526612, "grad_norm": 3.3905668580146444, "learning_rate": 1.978611195926916e-06, "loss": 1.0766, "step": 70300 }, { "epoch": 2.0729765590446707, "grad_norm": 3.438509543683611, "learning_rate": 1.978044237658591e-06, "loss": 1.04, "step": 70305 }, { "epoch": 2.0731239864366797, "grad_norm": 3.5086824084456167, "learning_rate": 1.9774773315320914e-06, "loss": 1.0456, "step": 70310 }, { "epoch": 2.073271413828689, "grad_norm": 3.467487528312058, "learning_rate": 1.9769104775640937e-06, "loss": 1.0679, "step": 70315 }, { "epoch": 2.0734188412206986, "grad_norm": 3.3527091365141812, "learning_rate": 1.97634367577128e-06, "loss": 1.0351, "step": 70320 }, { "epoch": 2.073566268612708, "grad_norm": 3.6862751352914085, "learning_rate": 1.975776926170329e-06, "loss": 1.0763, "step": 70325 }, { "epoch": 2.0737136960047176, "grad_norm": 3.388036615308413, "learning_rate": 1.9752102287779176e-06, "loss": 1.0813, "step": 70330 }, { "epoch": 2.073861123396727, "grad_norm": 3.258857811234583, "learning_rate": 1.9746435836107234e-06, "loss": 1.0184, "step": 70335 }, { "epoch": 2.0740085507887365, "grad_norm": 3.3846126086816293, "learning_rate": 1.974076990685418e-06, "loss": 1.0239, "step": 70340 }, { "epoch": 2.074155978180746, "grad_norm": 3.4673174131679407, "learning_rate": 1.9735104500186744e-06, "loss": 1.0532, "step": 70345 }, { "epoch": 2.0743034055727554, "grad_norm": 3.3440365334848163, "learning_rate": 1.9729439616271643e-06, "loss": 1.0448, "step": 70350 }, { "epoch": 2.074450832964765, "grad_norm": 3.510780612946346, "learning_rate": 1.972377525527557e-06, "loss": 1.1064, "step": 70355 }, { "epoch": 2.0745982603567743, "grad_norm": 3.1972254283431774, "learning_rate": 1.971811141736522e-06, "loss": 1.012, "step": 70360 }, { "epoch": 2.0747456877487838, "grad_norm": 3.353353776475648, "learning_rate": 1.971244810270724e-06, "loss": 1.0559, "step": 70365 }, { "epoch": 2.074893115140793, "grad_norm": 3.351151286660876, "learning_rate": 1.9706785311468287e-06, "loss": 1.0418, "step": 70370 }, { "epoch": 2.0750405425328027, "grad_norm": 3.507675025401953, "learning_rate": 1.9701123043815004e-06, "loss": 1.0823, "step": 70375 }, { "epoch": 2.075187969924812, "grad_norm": 3.2376211314181003, "learning_rate": 1.9695461299913977e-06, "loss": 1.0673, "step": 70380 }, { "epoch": 2.0753353973168216, "grad_norm": 3.320184354807253, "learning_rate": 1.9689800079931856e-06, "loss": 1.0565, "step": 70385 }, { "epoch": 2.075482824708831, "grad_norm": 3.270883465472327, "learning_rate": 1.9684139384035197e-06, "loss": 1.0202, "step": 70390 }, { "epoch": 2.0756302521008405, "grad_norm": 3.2130687209684394, "learning_rate": 1.967847921239057e-06, "loss": 1.0609, "step": 70395 }, { "epoch": 2.07577767949285, "grad_norm": 3.414690900868495, "learning_rate": 1.9672819565164563e-06, "loss": 1.0766, "step": 70400 }, { "epoch": 2.075925106884859, "grad_norm": 3.536194893070148, "learning_rate": 1.966716044252367e-06, "loss": 1.0999, "step": 70405 }, { "epoch": 2.0760725342768684, "grad_norm": 3.485939216758057, "learning_rate": 1.966150184463447e-06, "loss": 1.094, "step": 70410 }, { "epoch": 2.076219961668878, "grad_norm": 3.3165751015943266, "learning_rate": 1.965584377166343e-06, "loss": 1.0156, "step": 70415 }, { "epoch": 2.0763673890608874, "grad_norm": 3.3565350797640945, "learning_rate": 1.9650186223777066e-06, "loss": 1.0718, "step": 70420 }, { "epoch": 2.076514816452897, "grad_norm": 3.372354366280785, "learning_rate": 1.9644529201141854e-06, "loss": 1.0348, "step": 70425 }, { "epoch": 2.0766622438449063, "grad_norm": 3.347195017689407, "learning_rate": 1.9638872703924263e-06, "loss": 1.0365, "step": 70430 }, { "epoch": 2.0768096712369157, "grad_norm": 3.25601034203589, "learning_rate": 1.9633216732290734e-06, "loss": 1.0196, "step": 70435 }, { "epoch": 2.076957098628925, "grad_norm": 3.4309937548873517, "learning_rate": 1.9627561286407704e-06, "loss": 1.058, "step": 70440 }, { "epoch": 2.0771045260209346, "grad_norm": 3.4373151678030722, "learning_rate": 1.9621906366441596e-06, "loss": 1.0688, "step": 70445 }, { "epoch": 2.077251953412944, "grad_norm": 3.2449727064013025, "learning_rate": 1.9616251972558817e-06, "loss": 1.0422, "step": 70450 }, { "epoch": 2.0773993808049536, "grad_norm": 3.2487753458480766, "learning_rate": 1.9610598104925715e-06, "loss": 1.0445, "step": 70455 }, { "epoch": 2.077546808196963, "grad_norm": 3.3646750555852294, "learning_rate": 1.9604944763708725e-06, "loss": 1.0552, "step": 70460 }, { "epoch": 2.0776942355889725, "grad_norm": 3.448897362811302, "learning_rate": 1.959929194907415e-06, "loss": 1.0288, "step": 70465 }, { "epoch": 2.077841662980982, "grad_norm": 3.145232616458839, "learning_rate": 1.9593639661188355e-06, "loss": 1.0494, "step": 70470 }, { "epoch": 2.0779890903729914, "grad_norm": 3.2269099803330668, "learning_rate": 1.9587987900217673e-06, "loss": 1.015, "step": 70475 }, { "epoch": 2.078136517765001, "grad_norm": 3.1467168860905907, "learning_rate": 1.958233666632837e-06, "loss": 1.0688, "step": 70480 }, { "epoch": 2.0782839451570103, "grad_norm": 3.3210187616871143, "learning_rate": 1.95766859596868e-06, "loss": 1.0517, "step": 70485 }, { "epoch": 2.0784313725490198, "grad_norm": 3.278052465514213, "learning_rate": 1.957103578045918e-06, "loss": 1.0045, "step": 70490 }, { "epoch": 2.0785787999410292, "grad_norm": 3.2749437716191405, "learning_rate": 1.9565386128811837e-06, "loss": 1.0503, "step": 70495 }, { "epoch": 2.0787262273330387, "grad_norm": 3.455244950647114, "learning_rate": 1.9559737004910973e-06, "loss": 1.0835, "step": 70500 }, { "epoch": 2.0787262273330387, "eval_loss": 1.0868641138076782, "eval_runtime": 4.2429, "eval_samples_per_second": 93.332, "eval_steps_per_second": 3.064, "step": 70500 }, { "epoch": 2.0788736547250477, "grad_norm": 3.3602836371285014, "learning_rate": 1.955408840892283e-06, "loss": 1.0717, "step": 70505 }, { "epoch": 2.079021082117057, "grad_norm": 3.338905506613433, "learning_rate": 1.954844034101363e-06, "loss": 1.082, "step": 70510 }, { "epoch": 2.0791685095090666, "grad_norm": 3.4162006606997486, "learning_rate": 1.9542792801349572e-06, "loss": 0.9992, "step": 70515 }, { "epoch": 2.079315936901076, "grad_norm": 3.5313167373240053, "learning_rate": 1.9537145790096838e-06, "loss": 1.0763, "step": 70520 }, { "epoch": 2.0794633642930855, "grad_norm": 3.3619118564835455, "learning_rate": 1.9531499307421623e-06, "loss": 1.0391, "step": 70525 }, { "epoch": 2.079610791685095, "grad_norm": 3.3539337577772272, "learning_rate": 1.952585335349004e-06, "loss": 1.0582, "step": 70530 }, { "epoch": 2.0797582190771045, "grad_norm": 3.4138751795307716, "learning_rate": 1.952020792846824e-06, "loss": 1.0277, "step": 70535 }, { "epoch": 2.079905646469114, "grad_norm": 3.2834059731659697, "learning_rate": 1.9514563032522357e-06, "loss": 1.0345, "step": 70540 }, { "epoch": 2.0800530738611234, "grad_norm": 3.4255348597998374, "learning_rate": 1.950891866581849e-06, "loss": 1.0807, "step": 70545 }, { "epoch": 2.080200501253133, "grad_norm": 3.438847608527526, "learning_rate": 1.950327482852276e-06, "loss": 1.0633, "step": 70550 }, { "epoch": 2.0803479286451423, "grad_norm": 3.3817606471085897, "learning_rate": 1.9497631520801176e-06, "loss": 1.0386, "step": 70555 }, { "epoch": 2.0804953560371517, "grad_norm": 3.482337187386872, "learning_rate": 1.9491988742819877e-06, "loss": 1.077, "step": 70560 }, { "epoch": 2.080642783429161, "grad_norm": 3.4096340768616344, "learning_rate": 1.9486346494744842e-06, "loss": 1.0514, "step": 70565 }, { "epoch": 2.0807902108211707, "grad_norm": 3.3113349961502982, "learning_rate": 1.948070477674216e-06, "loss": 1.0433, "step": 70570 }, { "epoch": 2.08093763821318, "grad_norm": 3.321288397172966, "learning_rate": 1.9475063588977804e-06, "loss": 1.0819, "step": 70575 }, { "epoch": 2.0810850656051896, "grad_norm": 3.394935201272338, "learning_rate": 1.946942293161778e-06, "loss": 1.0834, "step": 70580 }, { "epoch": 2.081232492997199, "grad_norm": 3.4158081225283814, "learning_rate": 1.946378280482808e-06, "loss": 1.0619, "step": 70585 }, { "epoch": 2.0813799203892085, "grad_norm": 3.520731958204436, "learning_rate": 1.9458143208774663e-06, "loss": 1.0627, "step": 70590 }, { "epoch": 2.081527347781218, "grad_norm": 3.179719278679903, "learning_rate": 1.9452504143623503e-06, "loss": 1.0279, "step": 70595 }, { "epoch": 2.0816747751732274, "grad_norm": 3.3919516676477017, "learning_rate": 1.9446865609540494e-06, "loss": 1.0424, "step": 70600 }, { "epoch": 2.0818222025652364, "grad_norm": 3.484341565228483, "learning_rate": 1.9441227606691583e-06, "loss": 1.0727, "step": 70605 }, { "epoch": 2.081969629957246, "grad_norm": 3.3653798162870525, "learning_rate": 1.943559013524267e-06, "loss": 1.0695, "step": 70610 }, { "epoch": 2.0821170573492553, "grad_norm": 3.358457444629967, "learning_rate": 1.9429953195359647e-06, "loss": 1.0491, "step": 70615 }, { "epoch": 2.082264484741265, "grad_norm": 3.391842526291558, "learning_rate": 1.942431678720838e-06, "loss": 1.0797, "step": 70620 }, { "epoch": 2.0824119121332743, "grad_norm": 3.406762594079158, "learning_rate": 1.9418680910954733e-06, "loss": 1.0171, "step": 70625 }, { "epoch": 2.0825593395252837, "grad_norm": 3.1792452814001813, "learning_rate": 1.941304556676455e-06, "loss": 1.0592, "step": 70630 }, { "epoch": 2.082706766917293, "grad_norm": 3.31181735730089, "learning_rate": 1.940741075480367e-06, "loss": 1.0456, "step": 70635 }, { "epoch": 2.0828541943093026, "grad_norm": 3.4353734561910185, "learning_rate": 1.9401776475237857e-06, "loss": 1.0843, "step": 70640 }, { "epoch": 2.083001621701312, "grad_norm": 3.3825057021009846, "learning_rate": 1.9396142728232963e-06, "loss": 1.0738, "step": 70645 }, { "epoch": 2.0831490490933215, "grad_norm": 3.232009193529943, "learning_rate": 1.939050951395473e-06, "loss": 1.0515, "step": 70650 }, { "epoch": 2.083296476485331, "grad_norm": 3.4156383986556116, "learning_rate": 1.9384876832568933e-06, "loss": 1.0138, "step": 70655 }, { "epoch": 2.0834439038773405, "grad_norm": 3.5648388959007438, "learning_rate": 1.9379244684241335e-06, "loss": 1.0951, "step": 70660 }, { "epoch": 2.08359133126935, "grad_norm": 3.3269077561907205, "learning_rate": 1.9373613069137625e-06, "loss": 1.0294, "step": 70665 }, { "epoch": 2.0837387586613594, "grad_norm": 3.3616247560177457, "learning_rate": 1.936798198742358e-06, "loss": 0.9895, "step": 70670 }, { "epoch": 2.083886186053369, "grad_norm": 3.548077733090315, "learning_rate": 1.9362351439264854e-06, "loss": 1.0779, "step": 70675 }, { "epoch": 2.0840336134453783, "grad_norm": 3.4024519713676917, "learning_rate": 1.9356721424827147e-06, "loss": 1.0174, "step": 70680 }, { "epoch": 2.0841810408373878, "grad_norm": 5.8208787460105835, "learning_rate": 1.9351091944276127e-06, "loss": 0.999, "step": 70685 }, { "epoch": 2.084328468229397, "grad_norm": 3.468016050084249, "learning_rate": 1.9345462997777454e-06, "loss": 1.0722, "step": 70690 }, { "epoch": 2.0844758956214067, "grad_norm": 3.3729952983714733, "learning_rate": 1.933983458549676e-06, "loss": 1.0386, "step": 70695 }, { "epoch": 2.0846233230134157, "grad_norm": 3.441109765700638, "learning_rate": 1.9334206707599675e-06, "loss": 1.0995, "step": 70700 }, { "epoch": 2.084770750405425, "grad_norm": 3.5372872047230572, "learning_rate": 1.9328579364251797e-06, "loss": 1.0713, "step": 70705 }, { "epoch": 2.0849181777974346, "grad_norm": 3.2892759127812656, "learning_rate": 1.9322952555618733e-06, "loss": 1.06, "step": 70710 }, { "epoch": 2.085065605189444, "grad_norm": 3.316392131035863, "learning_rate": 1.931732628186602e-06, "loss": 1.0503, "step": 70715 }, { "epoch": 2.0852130325814535, "grad_norm": 3.625402619940994, "learning_rate": 1.9311700543159274e-06, "loss": 1.0879, "step": 70720 }, { "epoch": 2.085360459973463, "grad_norm": 3.5036907644201696, "learning_rate": 1.9306075339663997e-06, "loss": 1.0996, "step": 70725 }, { "epoch": 2.0855078873654724, "grad_norm": 3.479213826307781, "learning_rate": 1.9300450671545723e-06, "loss": 1.0543, "step": 70730 }, { "epoch": 2.085655314757482, "grad_norm": 3.483563901926104, "learning_rate": 1.929482653896998e-06, "loss": 1.0232, "step": 70735 }, { "epoch": 2.0858027421494914, "grad_norm": 3.309372431858128, "learning_rate": 1.9289202942102234e-06, "loss": 1.0368, "step": 70740 }, { "epoch": 2.085950169541501, "grad_norm": 3.432273966399155, "learning_rate": 1.928357988110801e-06, "loss": 1.0521, "step": 70745 }, { "epoch": 2.0860975969335103, "grad_norm": 3.407515467095712, "learning_rate": 1.927795735615272e-06, "loss": 1.044, "step": 70750 }, { "epoch": 2.0862450243255197, "grad_norm": 3.252951054175211, "learning_rate": 1.9272335367401874e-06, "loss": 1.0378, "step": 70755 }, { "epoch": 2.086392451717529, "grad_norm": 3.4650066844857688, "learning_rate": 1.9266713915020853e-06, "loss": 1.0328, "step": 70760 }, { "epoch": 2.0865398791095386, "grad_norm": 3.508446820888764, "learning_rate": 1.9261092999175104e-06, "loss": 1.001, "step": 70765 }, { "epoch": 2.086687306501548, "grad_norm": 3.4153021826271246, "learning_rate": 1.9255472620030016e-06, "loss": 1.0422, "step": 70770 }, { "epoch": 2.0868347338935576, "grad_norm": 3.4307206803088666, "learning_rate": 1.9249852777750975e-06, "loss": 1.0888, "step": 70775 }, { "epoch": 2.086982161285567, "grad_norm": 3.305784071949976, "learning_rate": 1.924423347250336e-06, "loss": 1.0126, "step": 70780 }, { "epoch": 2.0871295886775765, "grad_norm": 3.236148710567839, "learning_rate": 1.9238614704452536e-06, "loss": 1.0182, "step": 70785 }, { "epoch": 2.087277016069586, "grad_norm": 3.315055446957324, "learning_rate": 1.923299647376381e-06, "loss": 1.0077, "step": 70790 }, { "epoch": 2.087424443461595, "grad_norm": 3.3843924538668566, "learning_rate": 1.9227378780602518e-06, "loss": 1.059, "step": 70795 }, { "epoch": 2.0875718708536044, "grad_norm": 3.4759111436458876, "learning_rate": 1.9221761625133982e-06, "loss": 1.0081, "step": 70800 }, { "epoch": 2.087719298245614, "grad_norm": 3.4710554439085195, "learning_rate": 1.921614500752347e-06, "loss": 1.0539, "step": 70805 }, { "epoch": 2.0878667256376233, "grad_norm": 3.383652152579702, "learning_rate": 1.921052892793629e-06, "loss": 1.0659, "step": 70810 }, { "epoch": 2.088014153029633, "grad_norm": 3.234603344953185, "learning_rate": 1.920491338653765e-06, "loss": 1.0323, "step": 70815 }, { "epoch": 2.0881615804216422, "grad_norm": 3.20114415146981, "learning_rate": 1.9199298383492855e-06, "loss": 1.0339, "step": 70820 }, { "epoch": 2.0883090078136517, "grad_norm": 3.378270978424811, "learning_rate": 1.919368391896707e-06, "loss": 1.0651, "step": 70825 }, { "epoch": 2.088456435205661, "grad_norm": 3.5618027538931467, "learning_rate": 1.918806999312557e-06, "loss": 1.0981, "step": 70830 }, { "epoch": 2.0886038625976706, "grad_norm": 3.60534773124979, "learning_rate": 1.9182456606133507e-06, "loss": 1.1018, "step": 70835 }, { "epoch": 2.08875128998968, "grad_norm": 3.2292699611539786, "learning_rate": 1.9176843758156076e-06, "loss": 1.032, "step": 70840 }, { "epoch": 2.0888987173816895, "grad_norm": 3.6255849523527175, "learning_rate": 1.9171231449358434e-06, "loss": 1.1158, "step": 70845 }, { "epoch": 2.089046144773699, "grad_norm": 3.335136170558069, "learning_rate": 1.9165619679905743e-06, "loss": 1.0434, "step": 70850 }, { "epoch": 2.0891935721657084, "grad_norm": 3.48846515350584, "learning_rate": 1.9160008449963137e-06, "loss": 1.0533, "step": 70855 }, { "epoch": 2.089340999557718, "grad_norm": 3.4897285810742504, "learning_rate": 1.9154397759695714e-06, "loss": 1.0653, "step": 70860 }, { "epoch": 2.0894884269497274, "grad_norm": 3.348760318876051, "learning_rate": 1.9148787609268587e-06, "loss": 1.0436, "step": 70865 }, { "epoch": 2.089635854341737, "grad_norm": 3.42067525337708, "learning_rate": 1.9143177998846835e-06, "loss": 1.0552, "step": 70870 }, { "epoch": 2.0897832817337463, "grad_norm": 3.356323262208626, "learning_rate": 1.9137568928595535e-06, "loss": 1.0042, "step": 70875 }, { "epoch": 2.0899307091257557, "grad_norm": 3.2018103735230583, "learning_rate": 1.913196039867973e-06, "loss": 1.0479, "step": 70880 }, { "epoch": 2.090078136517765, "grad_norm": 3.3729810676961507, "learning_rate": 1.9126352409264474e-06, "loss": 1.0466, "step": 70885 }, { "epoch": 2.090225563909774, "grad_norm": 3.732911478382552, "learning_rate": 1.9120744960514777e-06, "loss": 1.0619, "step": 70890 }, { "epoch": 2.0903729913017837, "grad_norm": 3.4351227501429626, "learning_rate": 1.911513805259566e-06, "loss": 1.0677, "step": 70895 }, { "epoch": 2.090520418693793, "grad_norm": 3.307393678528065, "learning_rate": 1.9109531685672067e-06, "loss": 1.026, "step": 70900 }, { "epoch": 2.0906678460858026, "grad_norm": 3.330761756366923, "learning_rate": 1.9103925859909032e-06, "loss": 1.0845, "step": 70905 }, { "epoch": 2.090815273477812, "grad_norm": 3.44705567698431, "learning_rate": 1.9098320575471478e-06, "loss": 1.0748, "step": 70910 }, { "epoch": 2.0909627008698215, "grad_norm": 3.522031188073926, "learning_rate": 1.9092715832524345e-06, "loss": 1.0442, "step": 70915 }, { "epoch": 2.091110128261831, "grad_norm": 3.2750822994760016, "learning_rate": 1.9087111631232585e-06, "loss": 1.0423, "step": 70920 }, { "epoch": 2.0912575556538404, "grad_norm": 3.3190668576688114, "learning_rate": 1.908150797176106e-06, "loss": 1.024, "step": 70925 }, { "epoch": 2.09140498304585, "grad_norm": 3.5033388939654473, "learning_rate": 1.9075904854274732e-06, "loss": 1.0972, "step": 70930 }, { "epoch": 2.0915524104378593, "grad_norm": 3.4640402957931338, "learning_rate": 1.9070302278938417e-06, "loss": 1.0658, "step": 70935 }, { "epoch": 2.091699837829869, "grad_norm": 3.475928104616367, "learning_rate": 1.9064700245917006e-06, "loss": 1.052, "step": 70940 }, { "epoch": 2.0918472652218782, "grad_norm": 3.473192424371864, "learning_rate": 1.905909875537534e-06, "loss": 1.0384, "step": 70945 }, { "epoch": 2.0919946926138877, "grad_norm": 3.409825475479683, "learning_rate": 1.9053497807478252e-06, "loss": 1.037, "step": 70950 }, { "epoch": 2.092142120005897, "grad_norm": 3.2990329960078943, "learning_rate": 1.9047897402390554e-06, "loss": 1.0617, "step": 70955 }, { "epoch": 2.0922895473979066, "grad_norm": 3.2409938186277065, "learning_rate": 1.9042297540277048e-06, "loss": 1.0058, "step": 70960 }, { "epoch": 2.092436974789916, "grad_norm": 3.4115218965824683, "learning_rate": 1.9036698221302509e-06, "loss": 1.0351, "step": 70965 }, { "epoch": 2.0925844021819255, "grad_norm": 3.3684046436657087, "learning_rate": 1.9031099445631722e-06, "loss": 1.0493, "step": 70970 }, { "epoch": 2.092731829573935, "grad_norm": 3.322340859376333, "learning_rate": 1.9025501213429397e-06, "loss": 1.0082, "step": 70975 }, { "epoch": 2.0928792569659445, "grad_norm": 3.5207303809248134, "learning_rate": 1.9019903524860323e-06, "loss": 1.0603, "step": 70980 }, { "epoch": 2.093026684357954, "grad_norm": 3.3915571573026138, "learning_rate": 1.9014306380089172e-06, "loss": 1.0606, "step": 70985 }, { "epoch": 2.093174111749963, "grad_norm": 3.347635025374349, "learning_rate": 1.9008709779280666e-06, "loss": 1.0714, "step": 70990 }, { "epoch": 2.0933215391419724, "grad_norm": 3.248766562131126, "learning_rate": 1.9003113722599508e-06, "loss": 1.0594, "step": 70995 }, { "epoch": 2.093468966533982, "grad_norm": 3.2360381551095716, "learning_rate": 1.899751821021032e-06, "loss": 1.0013, "step": 71000 }, { "epoch": 2.093468966533982, "eval_loss": 1.087652325630188, "eval_runtime": 4.1921, "eval_samples_per_second": 94.463, "eval_steps_per_second": 3.101, "step": 71000 }, { "epoch": 2.0936163939259913, "grad_norm": 3.409960849461808, "learning_rate": 1.8991923242277808e-06, "loss": 1.0164, "step": 71005 }, { "epoch": 2.0937638213180008, "grad_norm": 3.4767951079741257, "learning_rate": 1.8986328818966569e-06, "loss": 1.0648, "step": 71010 }, { "epoch": 2.09391124871001, "grad_norm": 3.3594666784786513, "learning_rate": 1.898073494044127e-06, "loss": 1.0585, "step": 71015 }, { "epoch": 2.0940586761020197, "grad_norm": 3.2865401897861926, "learning_rate": 1.8975141606866474e-06, "loss": 1.0418, "step": 71020 }, { "epoch": 2.094206103494029, "grad_norm": 3.3775217316462016, "learning_rate": 1.89695488184068e-06, "loss": 1.1139, "step": 71025 }, { "epoch": 2.0943535308860386, "grad_norm": 3.222208655026888, "learning_rate": 1.89639565752268e-06, "loss": 1.0308, "step": 71030 }, { "epoch": 2.094500958278048, "grad_norm": 3.148168610037647, "learning_rate": 1.8958364877491047e-06, "loss": 1.049, "step": 71035 }, { "epoch": 2.0946483856700575, "grad_norm": 3.3881612832210943, "learning_rate": 1.8952773725364081e-06, "loss": 1.0498, "step": 71040 }, { "epoch": 2.094795813062067, "grad_norm": 3.340402576833339, "learning_rate": 1.8947183119010439e-06, "loss": 1.0156, "step": 71045 }, { "epoch": 2.0949432404540764, "grad_norm": 3.3693964136450627, "learning_rate": 1.8941593058594611e-06, "loss": 1.0456, "step": 71050 }, { "epoch": 2.095090667846086, "grad_norm": 3.4156825562513737, "learning_rate": 1.8936003544281093e-06, "loss": 1.041, "step": 71055 }, { "epoch": 2.0952380952380953, "grad_norm": 3.448460942351323, "learning_rate": 1.8930414576234368e-06, "loss": 1.0504, "step": 71060 }, { "epoch": 2.095385522630105, "grad_norm": 3.465119342494603, "learning_rate": 1.8924826154618898e-06, "loss": 1.1019, "step": 71065 }, { "epoch": 2.0955329500221143, "grad_norm": 3.231693293924225, "learning_rate": 1.891923827959915e-06, "loss": 1.066, "step": 71070 }, { "epoch": 2.0956803774141237, "grad_norm": 3.2708785769680175, "learning_rate": 1.8913650951339501e-06, "loss": 1.0904, "step": 71075 }, { "epoch": 2.095827804806133, "grad_norm": 3.5220049706358294, "learning_rate": 1.8908064170004422e-06, "loss": 1.0628, "step": 71080 }, { "epoch": 2.0959752321981426, "grad_norm": 3.403768313689546, "learning_rate": 1.8902477935758259e-06, "loss": 1.0506, "step": 71085 }, { "epoch": 2.0961226595901516, "grad_norm": 3.4783274161333213, "learning_rate": 1.8896892248765442e-06, "loss": 1.0848, "step": 71090 }, { "epoch": 2.096270086982161, "grad_norm": 3.2705343737356607, "learning_rate": 1.8891307109190304e-06, "loss": 1.0376, "step": 71095 }, { "epoch": 2.0964175143741706, "grad_norm": 3.499261590609052, "learning_rate": 1.88857225171972e-06, "loss": 1.0017, "step": 71100 }, { "epoch": 2.09656494176618, "grad_norm": 3.37462065806656, "learning_rate": 1.8880138472950467e-06, "loss": 1.0431, "step": 71105 }, { "epoch": 2.0967123691581895, "grad_norm": 3.3667856141802037, "learning_rate": 1.8874554976614423e-06, "loss": 1.0415, "step": 71110 }, { "epoch": 2.096859796550199, "grad_norm": 3.4376239049679977, "learning_rate": 1.886897202835338e-06, "loss": 1.0579, "step": 71115 }, { "epoch": 2.0970072239422084, "grad_norm": 3.3167053460186313, "learning_rate": 1.8863389628331597e-06, "loss": 1.0564, "step": 71120 }, { "epoch": 2.097154651334218, "grad_norm": 3.346469549777573, "learning_rate": 1.8857807776713353e-06, "loss": 1.0269, "step": 71125 }, { "epoch": 2.0973020787262273, "grad_norm": 3.324893739201576, "learning_rate": 1.8852226473662901e-06, "loss": 1.0259, "step": 71130 }, { "epoch": 2.0974495061182368, "grad_norm": 3.365993215799295, "learning_rate": 1.8846645719344483e-06, "loss": 1.0465, "step": 71135 }, { "epoch": 2.0975969335102462, "grad_norm": 3.530037054076324, "learning_rate": 1.884106551392231e-06, "loss": 1.0426, "step": 71140 }, { "epoch": 2.0977443609022557, "grad_norm": 3.3576084947346367, "learning_rate": 1.883548585756059e-06, "loss": 1.0304, "step": 71145 }, { "epoch": 2.097891788294265, "grad_norm": 3.3303660481810744, "learning_rate": 1.8829906750423514e-06, "loss": 1.0679, "step": 71150 }, { "epoch": 2.0980392156862746, "grad_norm": 3.3794654554553953, "learning_rate": 1.8824328192675268e-06, "loss": 1.0395, "step": 71155 }, { "epoch": 2.098186643078284, "grad_norm": 3.2526846695760057, "learning_rate": 1.8818750184479957e-06, "loss": 1.0374, "step": 71160 }, { "epoch": 2.0983340704702935, "grad_norm": 3.437933266702077, "learning_rate": 1.8813172726001782e-06, "loss": 1.0812, "step": 71165 }, { "epoch": 2.098481497862303, "grad_norm": 3.567575759507257, "learning_rate": 1.880759581740483e-06, "loss": 1.0804, "step": 71170 }, { "epoch": 2.0986289252543124, "grad_norm": 3.3959860805255886, "learning_rate": 1.8802019458853207e-06, "loss": 1.0589, "step": 71175 }, { "epoch": 2.098776352646322, "grad_norm": 3.383130571696989, "learning_rate": 1.879644365051103e-06, "loss": 1.0322, "step": 71180 }, { "epoch": 2.098923780038331, "grad_norm": 3.4299970111814178, "learning_rate": 1.8790868392542329e-06, "loss": 1.0626, "step": 71185 }, { "epoch": 2.0990712074303404, "grad_norm": 3.4154059778780943, "learning_rate": 1.8785293685111218e-06, "loss": 1.066, "step": 71190 }, { "epoch": 2.09921863482235, "grad_norm": 3.268266542736776, "learning_rate": 1.8779719528381691e-06, "loss": 1.0234, "step": 71195 }, { "epoch": 2.0993660622143593, "grad_norm": 3.2591064759685637, "learning_rate": 1.8774145922517793e-06, "loss": 1.0351, "step": 71200 }, { "epoch": 2.0995134896063687, "grad_norm": 3.295881773895617, "learning_rate": 1.8768572867683542e-06, "loss": 1.0443, "step": 71205 }, { "epoch": 2.099660916998378, "grad_norm": 3.3941676383121817, "learning_rate": 1.8763000364042914e-06, "loss": 1.0341, "step": 71210 }, { "epoch": 2.0998083443903877, "grad_norm": 3.3422297717029723, "learning_rate": 1.8757428411759905e-06, "loss": 1.074, "step": 71215 }, { "epoch": 2.099955771782397, "grad_norm": 3.383944860544674, "learning_rate": 1.8751857010998457e-06, "loss": 1.0638, "step": 71220 }, { "epoch": 2.1001031991744066, "grad_norm": 3.242582496509312, "learning_rate": 1.8746286161922532e-06, "loss": 1.0382, "step": 71225 }, { "epoch": 2.100250626566416, "grad_norm": 3.4475798445443178, "learning_rate": 1.8740715864696065e-06, "loss": 1.0295, "step": 71230 }, { "epoch": 2.1003980539584255, "grad_norm": 3.3002206362929063, "learning_rate": 1.8735146119482925e-06, "loss": 1.0563, "step": 71235 }, { "epoch": 2.100545481350435, "grad_norm": 3.329146419392388, "learning_rate": 1.8729576926447062e-06, "loss": 1.0504, "step": 71240 }, { "epoch": 2.1006929087424444, "grad_norm": 3.284170114027446, "learning_rate": 1.8724008285752322e-06, "loss": 1.0534, "step": 71245 }, { "epoch": 2.100840336134454, "grad_norm": 3.402752332660805, "learning_rate": 1.8718440197562576e-06, "loss": 1.0877, "step": 71250 }, { "epoch": 2.1009877635264633, "grad_norm": 3.287444029012863, "learning_rate": 1.8712872662041688e-06, "loss": 1.0568, "step": 71255 }, { "epoch": 2.101135190918473, "grad_norm": 3.2566474415591165, "learning_rate": 1.8707305679353448e-06, "loss": 1.058, "step": 71260 }, { "epoch": 2.1012826183104822, "grad_norm": 3.29608822367336, "learning_rate": 1.8701739249661723e-06, "loss": 1.0118, "step": 71265 }, { "epoch": 2.1014300457024917, "grad_norm": 3.337093677352493, "learning_rate": 1.8696173373130256e-06, "loss": 1.0391, "step": 71270 }, { "epoch": 2.101577473094501, "grad_norm": 3.3540064223534953, "learning_rate": 1.8690608049922895e-06, "loss": 1.0681, "step": 71275 }, { "epoch": 2.10172490048651, "grad_norm": 3.4775870380325133, "learning_rate": 1.868504328020335e-06, "loss": 1.0435, "step": 71280 }, { "epoch": 2.1018723278785196, "grad_norm": 3.459785594927166, "learning_rate": 1.8679479064135391e-06, "loss": 1.0763, "step": 71285 }, { "epoch": 2.102019755270529, "grad_norm": 3.3297371824833095, "learning_rate": 1.8673915401882752e-06, "loss": 1.0754, "step": 71290 }, { "epoch": 2.1021671826625385, "grad_norm": 3.528261236156059, "learning_rate": 1.8668352293609153e-06, "loss": 1.0676, "step": 71295 }, { "epoch": 2.102314610054548, "grad_norm": 3.46388172963101, "learning_rate": 1.8662789739478287e-06, "loss": 1.0785, "step": 71300 }, { "epoch": 2.1024620374465575, "grad_norm": 3.198116803687407, "learning_rate": 1.865722773965386e-06, "loss": 1.0311, "step": 71305 }, { "epoch": 2.102609464838567, "grad_norm": 3.3722247460702, "learning_rate": 1.8651666294299507e-06, "loss": 1.0128, "step": 71310 }, { "epoch": 2.1027568922305764, "grad_norm": 3.459975229571224, "learning_rate": 1.8646105403578897e-06, "loss": 1.0465, "step": 71315 }, { "epoch": 2.102904319622586, "grad_norm": 3.4001437985403182, "learning_rate": 1.8640545067655662e-06, "loss": 1.0133, "step": 71320 }, { "epoch": 2.1030517470145953, "grad_norm": 3.4860764093383434, "learning_rate": 1.8634985286693424e-06, "loss": 1.0418, "step": 71325 }, { "epoch": 2.1031991744066048, "grad_norm": 3.2893085201436603, "learning_rate": 1.86294260608558e-06, "loss": 1.0615, "step": 71330 }, { "epoch": 2.103346601798614, "grad_norm": 3.395642541087702, "learning_rate": 1.8623867390306336e-06, "loss": 1.1106, "step": 71335 }, { "epoch": 2.1034940291906237, "grad_norm": 3.416001723077916, "learning_rate": 1.8618309275208653e-06, "loss": 1.0747, "step": 71340 }, { "epoch": 2.103641456582633, "grad_norm": 3.284105212878211, "learning_rate": 1.8612751715726253e-06, "loss": 1.0435, "step": 71345 }, { "epoch": 2.1037888839746426, "grad_norm": 3.5930739398718536, "learning_rate": 1.860719471202273e-06, "loss": 1.0633, "step": 71350 }, { "epoch": 2.103936311366652, "grad_norm": 3.348463500281121, "learning_rate": 1.8601638264261563e-06, "loss": 1.0207, "step": 71355 }, { "epoch": 2.1040837387586615, "grad_norm": 3.5519245825124885, "learning_rate": 1.8596082372606268e-06, "loss": 1.0451, "step": 71360 }, { "epoch": 2.104231166150671, "grad_norm": 3.490048116687625, "learning_rate": 1.8590527037220333e-06, "loss": 1.067, "step": 71365 }, { "epoch": 2.1043785935426804, "grad_norm": 3.303562447796519, "learning_rate": 1.8584972258267237e-06, "loss": 1.0555, "step": 71370 }, { "epoch": 2.10452602093469, "grad_norm": 3.401823212743886, "learning_rate": 1.8579418035910443e-06, "loss": 1.0716, "step": 71375 }, { "epoch": 2.104673448326699, "grad_norm": 3.489704383363052, "learning_rate": 1.8573864370313364e-06, "loss": 1.0456, "step": 71380 }, { "epoch": 2.1048208757187084, "grad_norm": 3.3980291241524463, "learning_rate": 1.8568311261639436e-06, "loss": 1.0715, "step": 71385 }, { "epoch": 2.104968303110718, "grad_norm": 3.352955192448281, "learning_rate": 1.8562758710052069e-06, "loss": 1.0716, "step": 71390 }, { "epoch": 2.1051157305027273, "grad_norm": 3.304540547564835, "learning_rate": 1.8557206715714652e-06, "loss": 1.0764, "step": 71395 }, { "epoch": 2.1052631578947367, "grad_norm": 3.32543043712386, "learning_rate": 1.8551655278790557e-06, "loss": 1.0554, "step": 71400 }, { "epoch": 2.105410585286746, "grad_norm": 3.38583516297198, "learning_rate": 1.8546104399443141e-06, "loss": 1.0879, "step": 71405 }, { "epoch": 2.1055580126787556, "grad_norm": 3.343027431847517, "learning_rate": 1.8540554077835744e-06, "loss": 1.0417, "step": 71410 }, { "epoch": 2.105705440070765, "grad_norm": 3.328797028399354, "learning_rate": 1.853500431413171e-06, "loss": 1.0863, "step": 71415 }, { "epoch": 2.1058528674627746, "grad_norm": 3.3107201136541438, "learning_rate": 1.85294551084943e-06, "loss": 1.0118, "step": 71420 }, { "epoch": 2.106000294854784, "grad_norm": 3.4246856286800496, "learning_rate": 1.8523906461086866e-06, "loss": 1.09, "step": 71425 }, { "epoch": 2.1061477222467935, "grad_norm": 3.236511433372394, "learning_rate": 1.8518358372072638e-06, "loss": 1.0121, "step": 71430 }, { "epoch": 2.106295149638803, "grad_norm": 3.652451826271617, "learning_rate": 1.8512810841614887e-06, "loss": 1.1008, "step": 71435 }, { "epoch": 2.1064425770308124, "grad_norm": 3.4308846231453387, "learning_rate": 1.8507263869876878e-06, "loss": 1.0298, "step": 71440 }, { "epoch": 2.106590004422822, "grad_norm": 3.454071615048582, "learning_rate": 1.8501717457021782e-06, "loss": 1.0591, "step": 71445 }, { "epoch": 2.1067374318148313, "grad_norm": 3.377428133608404, "learning_rate": 1.8496171603212875e-06, "loss": 1.0473, "step": 71450 }, { "epoch": 2.1068848592068408, "grad_norm": 3.3202274896242545, "learning_rate": 1.8490626308613304e-06, "loss": 1.0276, "step": 71455 }, { "epoch": 2.1070322865988502, "grad_norm": 3.381842623398561, "learning_rate": 1.8485081573386262e-06, "loss": 1.0706, "step": 71460 }, { "epoch": 2.1071797139908597, "grad_norm": 3.3136939994438115, "learning_rate": 1.8479537397694906e-06, "loss": 1.0521, "step": 71465 }, { "epoch": 2.107327141382869, "grad_norm": 3.420209111324743, "learning_rate": 1.8473993781702381e-06, "loss": 1.0576, "step": 71470 }, { "epoch": 2.1074745687748786, "grad_norm": 3.316095162921283, "learning_rate": 1.8468450725571813e-06, "loss": 1.0766, "step": 71475 }, { "epoch": 2.1076219961668876, "grad_norm": 3.276311227536722, "learning_rate": 1.8462908229466317e-06, "loss": 1.0388, "step": 71480 }, { "epoch": 2.107769423558897, "grad_norm": 3.482755328211367, "learning_rate": 1.8457366293548985e-06, "loss": 1.0755, "step": 71485 }, { "epoch": 2.1079168509509065, "grad_norm": 3.378714429934702, "learning_rate": 1.8451824917982905e-06, "loss": 1.0508, "step": 71490 }, { "epoch": 2.108064278342916, "grad_norm": 3.304870410449659, "learning_rate": 1.84462841029311e-06, "loss": 1.0889, "step": 71495 }, { "epoch": 2.1082117057349254, "grad_norm": 3.3436868175354006, "learning_rate": 1.8440743848556673e-06, "loss": 1.0327, "step": 71500 }, { "epoch": 2.1082117057349254, "eval_loss": 1.0870481729507446, "eval_runtime": 4.2867, "eval_samples_per_second": 92.379, "eval_steps_per_second": 3.033, "step": 71500 }, { "epoch": 2.108359133126935, "grad_norm": 3.3983034836045016, "learning_rate": 1.8435204155022605e-06, "loss": 1.0488, "step": 71505 }, { "epoch": 2.1085065605189444, "grad_norm": 3.511689903758968, "learning_rate": 1.8429665022491924e-06, "loss": 1.0597, "step": 71510 }, { "epoch": 2.108653987910954, "grad_norm": 3.1892601734519284, "learning_rate": 1.8424126451127644e-06, "loss": 1.0636, "step": 71515 }, { "epoch": 2.1088014153029633, "grad_norm": 3.1260683878767552, "learning_rate": 1.8418588441092693e-06, "loss": 0.9766, "step": 71520 }, { "epoch": 2.1089488426949727, "grad_norm": 3.3744459703332073, "learning_rate": 1.8413050992550095e-06, "loss": 1.0024, "step": 71525 }, { "epoch": 2.109096270086982, "grad_norm": 3.4496985787120376, "learning_rate": 1.8407514105662739e-06, "loss": 1.0483, "step": 71530 }, { "epoch": 2.1092436974789917, "grad_norm": 3.4693170636686204, "learning_rate": 1.8401977780593613e-06, "loss": 1.0427, "step": 71535 }, { "epoch": 2.109391124871001, "grad_norm": 3.4046756737279225, "learning_rate": 1.8396442017505577e-06, "loss": 1.0577, "step": 71540 }, { "epoch": 2.1095385522630106, "grad_norm": 3.379457684467593, "learning_rate": 1.8390906816561556e-06, "loss": 1.0376, "step": 71545 }, { "epoch": 2.10968597965502, "grad_norm": 3.258726265382426, "learning_rate": 1.8385372177924415e-06, "loss": 1.0714, "step": 71550 }, { "epoch": 2.1098334070470295, "grad_norm": 3.4431210942423345, "learning_rate": 1.837983810175703e-06, "loss": 1.1119, "step": 71555 }, { "epoch": 2.109980834439039, "grad_norm": 3.2732099613693397, "learning_rate": 1.8374304588222241e-06, "loss": 1.0239, "step": 71560 }, { "epoch": 2.1101282618310484, "grad_norm": 3.3768042270225207, "learning_rate": 1.8368771637482894e-06, "loss": 1.0869, "step": 71565 }, { "epoch": 2.110275689223058, "grad_norm": 3.3914505483287414, "learning_rate": 1.8363239249701755e-06, "loss": 1.0876, "step": 71570 }, { "epoch": 2.110423116615067, "grad_norm": 3.3861207519539316, "learning_rate": 1.8357707425041682e-06, "loss": 1.0499, "step": 71575 }, { "epoch": 2.1105705440070763, "grad_norm": 3.295579956227015, "learning_rate": 1.8352176163665412e-06, "loss": 1.0043, "step": 71580 }, { "epoch": 2.110717971399086, "grad_norm": 3.333093248062017, "learning_rate": 1.834664546573573e-06, "loss": 1.0573, "step": 71585 }, { "epoch": 2.1108653987910952, "grad_norm": 3.229888764747773, "learning_rate": 1.8341115331415383e-06, "loss": 1.0212, "step": 71590 }, { "epoch": 2.1110128261831047, "grad_norm": 3.4418924574947503, "learning_rate": 1.8335585760867065e-06, "loss": 1.0348, "step": 71595 }, { "epoch": 2.111160253575114, "grad_norm": 3.504685043460609, "learning_rate": 1.8330056754253552e-06, "loss": 1.023, "step": 71600 }, { "epoch": 2.1113076809671236, "grad_norm": 3.2333845682999143, "learning_rate": 1.8324528311737478e-06, "loss": 1.0422, "step": 71605 }, { "epoch": 2.111455108359133, "grad_norm": 3.395034550568782, "learning_rate": 1.8319000433481583e-06, "loss": 1.0692, "step": 71610 }, { "epoch": 2.1116025357511425, "grad_norm": 3.282653861448305, "learning_rate": 1.831347311964849e-06, "loss": 1.0274, "step": 71615 }, { "epoch": 2.111749963143152, "grad_norm": 3.3351223085175548, "learning_rate": 1.8307946370400857e-06, "loss": 1.064, "step": 71620 }, { "epoch": 2.1118973905351615, "grad_norm": 3.4943605353060128, "learning_rate": 1.830242018590132e-06, "loss": 1.077, "step": 71625 }, { "epoch": 2.112044817927171, "grad_norm": 3.3521256955688883, "learning_rate": 1.8296894566312487e-06, "loss": 1.0644, "step": 71630 }, { "epoch": 2.1121922453191804, "grad_norm": 3.48682630593499, "learning_rate": 1.829136951179696e-06, "loss": 1.0796, "step": 71635 }, { "epoch": 2.11233967271119, "grad_norm": 3.4221873139885806, "learning_rate": 1.8285845022517336e-06, "loss": 1.0579, "step": 71640 }, { "epoch": 2.1124871001031993, "grad_norm": 3.3680436107381255, "learning_rate": 1.8280321098636145e-06, "loss": 1.0154, "step": 71645 }, { "epoch": 2.1126345274952087, "grad_norm": 3.513844551370976, "learning_rate": 1.8274797740315956e-06, "loss": 1.0864, "step": 71650 }, { "epoch": 2.112781954887218, "grad_norm": 3.2964568380064994, "learning_rate": 1.8269274947719293e-06, "loss": 1.0612, "step": 71655 }, { "epoch": 2.1129293822792277, "grad_norm": 3.5075896224608214, "learning_rate": 1.8263752721008673e-06, "loss": 1.0669, "step": 71660 }, { "epoch": 2.113076809671237, "grad_norm": 3.271650267694754, "learning_rate": 1.8258231060346597e-06, "loss": 1.0482, "step": 71665 }, { "epoch": 2.113224237063246, "grad_norm": 3.28728682866401, "learning_rate": 1.8252709965895541e-06, "loss": 1.0181, "step": 71670 }, { "epoch": 2.1133716644552556, "grad_norm": 3.2791735529849872, "learning_rate": 1.824718943781799e-06, "loss": 1.0267, "step": 71675 }, { "epoch": 2.113519091847265, "grad_norm": 3.261579072207044, "learning_rate": 1.8241669476276346e-06, "loss": 1.0382, "step": 71680 }, { "epoch": 2.1136665192392745, "grad_norm": 3.4337082206834793, "learning_rate": 1.8236150081433095e-06, "loss": 1.042, "step": 71685 }, { "epoch": 2.113813946631284, "grad_norm": 3.4083128237281706, "learning_rate": 1.823063125345061e-06, "loss": 1.013, "step": 71690 }, { "epoch": 2.1139613740232934, "grad_norm": 3.5961813390891524, "learning_rate": 1.8225112992491305e-06, "loss": 1.0999, "step": 71695 }, { "epoch": 2.114108801415303, "grad_norm": 3.3645214965170016, "learning_rate": 1.821959529871756e-06, "loss": 1.0627, "step": 71700 }, { "epoch": 2.1142562288073123, "grad_norm": 3.501163792049472, "learning_rate": 1.8214078172291738e-06, "loss": 1.0955, "step": 71705 }, { "epoch": 2.114403656199322, "grad_norm": 3.5853987747817664, "learning_rate": 1.8208561613376205e-06, "loss": 1.0556, "step": 71710 }, { "epoch": 2.1145510835913313, "grad_norm": 3.3816510485890316, "learning_rate": 1.8203045622133257e-06, "loss": 1.0415, "step": 71715 }, { "epoch": 2.1146985109833407, "grad_norm": 3.3461896755070515, "learning_rate": 1.8197530198725228e-06, "loss": 1.0437, "step": 71720 }, { "epoch": 2.11484593837535, "grad_norm": 3.336416260499494, "learning_rate": 1.8192015343314416e-06, "loss": 1.0119, "step": 71725 }, { "epoch": 2.1149933657673596, "grad_norm": 3.4683518197149383, "learning_rate": 1.8186501056063098e-06, "loss": 1.0509, "step": 71730 }, { "epoch": 2.115140793159369, "grad_norm": 3.381044138587335, "learning_rate": 1.818098733713354e-06, "loss": 1.0685, "step": 71735 }, { "epoch": 2.1152882205513786, "grad_norm": 3.3470426508426208, "learning_rate": 1.8175474186687988e-06, "loss": 1.0468, "step": 71740 }, { "epoch": 2.115435647943388, "grad_norm": 3.4172827644139914, "learning_rate": 1.8169961604888678e-06, "loss": 1.0066, "step": 71745 }, { "epoch": 2.1155830753353975, "grad_norm": 3.3051189600025057, "learning_rate": 1.8164449591897833e-06, "loss": 1.0403, "step": 71750 }, { "epoch": 2.115730502727407, "grad_norm": 3.364198113587561, "learning_rate": 1.8158938147877606e-06, "loss": 0.9996, "step": 71755 }, { "epoch": 2.1158779301194164, "grad_norm": 3.3177066918117357, "learning_rate": 1.8153427272990241e-06, "loss": 1.0523, "step": 71760 }, { "epoch": 2.1160253575114254, "grad_norm": 3.6763036245465517, "learning_rate": 1.8147916967397856e-06, "loss": 1.0684, "step": 71765 }, { "epoch": 2.116172784903435, "grad_norm": 3.4833547158585016, "learning_rate": 1.8142407231262612e-06, "loss": 1.1474, "step": 71770 }, { "epoch": 2.1163202122954443, "grad_norm": 3.4844246961687353, "learning_rate": 1.8136898064746652e-06, "loss": 1.0497, "step": 71775 }, { "epoch": 2.1164676396874538, "grad_norm": 3.590392156663109, "learning_rate": 1.813138946801205e-06, "loss": 1.05, "step": 71780 }, { "epoch": 2.1166150670794632, "grad_norm": 3.5270168611711274, "learning_rate": 1.8125881441220958e-06, "loss": 1.0526, "step": 71785 }, { "epoch": 2.1167624944714727, "grad_norm": 3.5315918649520635, "learning_rate": 1.8120373984535397e-06, "loss": 1.0462, "step": 71790 }, { "epoch": 2.116909921863482, "grad_norm": 3.319205095464418, "learning_rate": 1.8114867098117493e-06, "loss": 1.079, "step": 71795 }, { "epoch": 2.1170573492554916, "grad_norm": 3.416681610241435, "learning_rate": 1.810936078212924e-06, "loss": 1.0929, "step": 71800 }, { "epoch": 2.117204776647501, "grad_norm": 3.382666979463264, "learning_rate": 1.8103855036732691e-06, "loss": 1.0698, "step": 71805 }, { "epoch": 2.1173522040395105, "grad_norm": 3.636629968449565, "learning_rate": 1.8098349862089858e-06, "loss": 1.0964, "step": 71810 }, { "epoch": 2.11749963143152, "grad_norm": 3.4971347456316795, "learning_rate": 1.8092845258362732e-06, "loss": 1.0417, "step": 71815 }, { "epoch": 2.1176470588235294, "grad_norm": 3.2844746330497987, "learning_rate": 1.8087341225713293e-06, "loss": 1.0476, "step": 71820 }, { "epoch": 2.117794486215539, "grad_norm": 3.4467279378852864, "learning_rate": 1.8081837764303524e-06, "loss": 1.0217, "step": 71825 }, { "epoch": 2.1179419136075484, "grad_norm": 3.50919033401817, "learning_rate": 1.807633487429532e-06, "loss": 1.0643, "step": 71830 }, { "epoch": 2.118089340999558, "grad_norm": 3.4308947977069533, "learning_rate": 1.8070832555850674e-06, "loss": 1.0493, "step": 71835 }, { "epoch": 2.1182367683915673, "grad_norm": 3.3506824633170367, "learning_rate": 1.8065330809131446e-06, "loss": 1.0071, "step": 71840 }, { "epoch": 2.1183841957835767, "grad_norm": 3.3668916842658256, "learning_rate": 1.8059829634299555e-06, "loss": 1.0555, "step": 71845 }, { "epoch": 2.118531623175586, "grad_norm": 3.391943684211787, "learning_rate": 1.8054329031516891e-06, "loss": 1.0664, "step": 71850 }, { "epoch": 2.1186790505675956, "grad_norm": 3.2856527794035086, "learning_rate": 1.8048829000945268e-06, "loss": 1.0252, "step": 71855 }, { "epoch": 2.118826477959605, "grad_norm": 3.368305702757756, "learning_rate": 1.8043329542746593e-06, "loss": 1.0547, "step": 71860 }, { "epoch": 2.118973905351614, "grad_norm": 3.3917825930216203, "learning_rate": 1.8037830657082636e-06, "loss": 1.0155, "step": 71865 }, { "epoch": 2.1191213327436236, "grad_norm": 3.3826528120698915, "learning_rate": 1.8032332344115259e-06, "loss": 1.022, "step": 71870 }, { "epoch": 2.119268760135633, "grad_norm": 3.5035085764173552, "learning_rate": 1.8026834604006223e-06, "loss": 1.0697, "step": 71875 }, { "epoch": 2.1194161875276425, "grad_norm": 3.5024390932786424, "learning_rate": 1.8021337436917311e-06, "loss": 1.033, "step": 71880 }, { "epoch": 2.119563614919652, "grad_norm": 3.4384790730124535, "learning_rate": 1.8015840843010288e-06, "loss": 1.0614, "step": 71885 }, { "epoch": 2.1197110423116614, "grad_norm": 3.586325814175564, "learning_rate": 1.80103448224469e-06, "loss": 1.0591, "step": 71890 }, { "epoch": 2.119858469703671, "grad_norm": 3.4944891609441884, "learning_rate": 1.8004849375388869e-06, "loss": 1.0693, "step": 71895 }, { "epoch": 2.1200058970956803, "grad_norm": 3.4723098546115425, "learning_rate": 1.7999354501997922e-06, "loss": 1.0737, "step": 71900 }, { "epoch": 2.12015332448769, "grad_norm": 3.402459218529437, "learning_rate": 1.799386020243572e-06, "loss": 1.1021, "step": 71905 }, { "epoch": 2.1203007518796992, "grad_norm": 3.3355982883068083, "learning_rate": 1.7988366476863952e-06, "loss": 1.0585, "step": 71910 }, { "epoch": 2.1204481792717087, "grad_norm": 3.2835707321769068, "learning_rate": 1.7982873325444285e-06, "loss": 1.0589, "step": 71915 }, { "epoch": 2.120595606663718, "grad_norm": 3.4388989380680175, "learning_rate": 1.7977380748338358e-06, "loss": 1.0464, "step": 71920 }, { "epoch": 2.1207430340557276, "grad_norm": 3.436560070755069, "learning_rate": 1.7971888745707787e-06, "loss": 1.0539, "step": 71925 }, { "epoch": 2.120890461447737, "grad_norm": 3.48843105129383, "learning_rate": 1.7966397317714197e-06, "loss": 1.0952, "step": 71930 }, { "epoch": 2.1210378888397465, "grad_norm": 3.303878109808957, "learning_rate": 1.796090646451918e-06, "loss": 1.0194, "step": 71935 }, { "epoch": 2.121185316231756, "grad_norm": 3.60348692504514, "learning_rate": 1.7955416186284273e-06, "loss": 1.0091, "step": 71940 }, { "epoch": 2.1213327436237654, "grad_norm": 3.4326717760461274, "learning_rate": 1.7949926483171094e-06, "loss": 1.0817, "step": 71945 }, { "epoch": 2.121480171015775, "grad_norm": 3.426874571258283, "learning_rate": 1.7944437355341138e-06, "loss": 1.049, "step": 71950 }, { "epoch": 2.1216275984077844, "grad_norm": 3.3920266522098284, "learning_rate": 1.7938948802955941e-06, "loss": 1.0583, "step": 71955 }, { "epoch": 2.121775025799794, "grad_norm": 3.4656313498043163, "learning_rate": 1.7933460826177012e-06, "loss": 1.0749, "step": 71960 }, { "epoch": 2.121922453191803, "grad_norm": 3.185267521414723, "learning_rate": 1.7927973425165841e-06, "loss": 1.0552, "step": 71965 }, { "epoch": 2.1220698805838123, "grad_norm": 3.6675432279804197, "learning_rate": 1.7922486600083914e-06, "loss": 1.0692, "step": 71970 }, { "epoch": 2.1222173079758218, "grad_norm": 3.6053397211791642, "learning_rate": 1.7917000351092666e-06, "loss": 1.0556, "step": 71975 }, { "epoch": 2.122364735367831, "grad_norm": 3.468873951357367, "learning_rate": 1.7911514678353535e-06, "loss": 1.04, "step": 71980 }, { "epoch": 2.1225121627598407, "grad_norm": 3.471125717408369, "learning_rate": 1.7906029582027956e-06, "loss": 1.0488, "step": 71985 }, { "epoch": 2.12265959015185, "grad_norm": 3.442214651863389, "learning_rate": 1.7900545062277332e-06, "loss": 1.0328, "step": 71990 }, { "epoch": 2.1228070175438596, "grad_norm": 3.3453601213518076, "learning_rate": 1.7895061119263049e-06, "loss": 1.0041, "step": 71995 }, { "epoch": 2.122954444935869, "grad_norm": 3.435741014468278, "learning_rate": 1.788957775314648e-06, "loss": 1.0615, "step": 72000 }, { "epoch": 2.122954444935869, "eval_loss": 1.0854955911636353, "eval_runtime": 4.1633, "eval_samples_per_second": 95.117, "eval_steps_per_second": 3.123, "step": 72000 }, { "epoch": 2.1231018723278785, "grad_norm": 3.4896801995556195, "learning_rate": 1.7884094964088978e-06, "loss": 1.0334, "step": 72005 }, { "epoch": 2.123249299719888, "grad_norm": 3.337401809465095, "learning_rate": 1.7878612752251893e-06, "loss": 1.0488, "step": 72010 }, { "epoch": 2.1233967271118974, "grad_norm": 3.331516708542494, "learning_rate": 1.7873131117796505e-06, "loss": 1.0309, "step": 72015 }, { "epoch": 2.123544154503907, "grad_norm": 3.4329605220567236, "learning_rate": 1.7867650060884174e-06, "loss": 1.0647, "step": 72020 }, { "epoch": 2.1236915818959163, "grad_norm": 3.3900432034298498, "learning_rate": 1.7862169581676143e-06, "loss": 1.0436, "step": 72025 }, { "epoch": 2.123839009287926, "grad_norm": 3.4943879733643035, "learning_rate": 1.78566896803337e-06, "loss": 1.0668, "step": 72030 }, { "epoch": 2.1239864366799353, "grad_norm": 3.47367767551757, "learning_rate": 1.7851210357018102e-06, "loss": 1.0582, "step": 72035 }, { "epoch": 2.1241338640719447, "grad_norm": 3.3212405567510506, "learning_rate": 1.784573161189055e-06, "loss": 1.0314, "step": 72040 }, { "epoch": 2.124281291463954, "grad_norm": 3.15631114056377, "learning_rate": 1.7840253445112318e-06, "loss": 1.0285, "step": 72045 }, { "epoch": 2.1244287188559636, "grad_norm": 3.4242685462304805, "learning_rate": 1.7834775856844548e-06, "loss": 1.0303, "step": 72050 }, { "epoch": 2.124576146247973, "grad_norm": 3.370641341766427, "learning_rate": 1.7829298847248481e-06, "loss": 1.0479, "step": 72055 }, { "epoch": 2.124723573639982, "grad_norm": 3.350555575160546, "learning_rate": 1.7823822416485243e-06, "loss": 1.0228, "step": 72060 }, { "epoch": 2.1248710010319916, "grad_norm": 3.4210169912692443, "learning_rate": 1.7818346564715997e-06, "loss": 1.0264, "step": 72065 }, { "epoch": 2.125018428424001, "grad_norm": 3.3524007253237147, "learning_rate": 1.781287129210188e-06, "loss": 1.0286, "step": 72070 }, { "epoch": 2.1251658558160105, "grad_norm": 3.478501835803319, "learning_rate": 1.7807396598804007e-06, "loss": 1.05, "step": 72075 }, { "epoch": 2.12531328320802, "grad_norm": 3.417476071080981, "learning_rate": 1.7801922484983476e-06, "loss": 1.0642, "step": 72080 }, { "epoch": 2.1254607106000294, "grad_norm": 3.4195732120562026, "learning_rate": 1.7796448950801376e-06, "loss": 1.0498, "step": 72085 }, { "epoch": 2.125608137992039, "grad_norm": 3.3484640690882244, "learning_rate": 1.7790975996418745e-06, "loss": 1.0279, "step": 72090 }, { "epoch": 2.1257555653840483, "grad_norm": 3.326585375714114, "learning_rate": 1.7785503621996672e-06, "loss": 0.9899, "step": 72095 }, { "epoch": 2.1259029927760578, "grad_norm": 3.404650904370614, "learning_rate": 1.7780031827696157e-06, "loss": 1.062, "step": 72100 }, { "epoch": 2.1260504201680672, "grad_norm": 3.4731097297413815, "learning_rate": 1.7774560613678218e-06, "loss": 1.0939, "step": 72105 }, { "epoch": 2.1261978475600767, "grad_norm": 3.4686386898626265, "learning_rate": 1.7769089980103877e-06, "loss": 1.0784, "step": 72110 }, { "epoch": 2.126345274952086, "grad_norm": 3.479964977895175, "learning_rate": 1.7763619927134063e-06, "loss": 1.0524, "step": 72115 }, { "epoch": 2.1264927023440956, "grad_norm": 3.6003644126037284, "learning_rate": 1.7758150454929798e-06, "loss": 1.0372, "step": 72120 }, { "epoch": 2.126640129736105, "grad_norm": 3.394523668715102, "learning_rate": 1.7752681563651968e-06, "loss": 1.0403, "step": 72125 }, { "epoch": 2.1267875571281145, "grad_norm": 3.310316134783248, "learning_rate": 1.7747213253461564e-06, "loss": 1.0663, "step": 72130 }, { "epoch": 2.126934984520124, "grad_norm": 3.4990758996529046, "learning_rate": 1.774174552451945e-06, "loss": 1.0636, "step": 72135 }, { "epoch": 2.1270824119121334, "grad_norm": 3.428984588792093, "learning_rate": 1.7736278376986534e-06, "loss": 1.06, "step": 72140 }, { "epoch": 2.127229839304143, "grad_norm": 3.1850506838625443, "learning_rate": 1.7730811811023697e-06, "loss": 1.0603, "step": 72145 }, { "epoch": 2.1273772666961523, "grad_norm": 3.325058941986282, "learning_rate": 1.7725345826791797e-06, "loss": 1.0697, "step": 72150 }, { "epoch": 2.1275246940881614, "grad_norm": 3.469671567925678, "learning_rate": 1.7719880424451672e-06, "loss": 1.0314, "step": 72155 }, { "epoch": 2.127672121480171, "grad_norm": 3.3836045938201305, "learning_rate": 1.7714415604164172e-06, "loss": 1.0084, "step": 72160 }, { "epoch": 2.1278195488721803, "grad_norm": 3.241213384997527, "learning_rate": 1.7708951366090063e-06, "loss": 1.0577, "step": 72165 }, { "epoch": 2.1279669762641897, "grad_norm": 3.4679612903600026, "learning_rate": 1.7703487710390167e-06, "loss": 1.0551, "step": 72170 }, { "epoch": 2.128114403656199, "grad_norm": 3.3889813892501537, "learning_rate": 1.7698024637225247e-06, "loss": 1.036, "step": 72175 }, { "epoch": 2.1282618310482087, "grad_norm": 3.4038520961369043, "learning_rate": 1.7692562146756063e-06, "loss": 1.0569, "step": 72180 }, { "epoch": 2.128409258440218, "grad_norm": 3.558072268033628, "learning_rate": 1.7687100239143358e-06, "loss": 1.0388, "step": 72185 }, { "epoch": 2.1285566858322276, "grad_norm": 3.268804543916182, "learning_rate": 1.7681638914547852e-06, "loss": 1.0268, "step": 72190 }, { "epoch": 2.128704113224237, "grad_norm": 3.347806087993777, "learning_rate": 1.7676178173130263e-06, "loss": 1.0832, "step": 72195 }, { "epoch": 2.1288515406162465, "grad_norm": 3.3156909797760417, "learning_rate": 1.767071801505124e-06, "loss": 1.0421, "step": 72200 }, { "epoch": 2.128998968008256, "grad_norm": 3.331694965104845, "learning_rate": 1.766525844047151e-06, "loss": 1.0555, "step": 72205 }, { "epoch": 2.1291463954002654, "grad_norm": 3.386458548734029, "learning_rate": 1.765979944955169e-06, "loss": 1.0435, "step": 72210 }, { "epoch": 2.129293822792275, "grad_norm": 3.401960103759647, "learning_rate": 1.7654341042452422e-06, "loss": 1.0736, "step": 72215 }, { "epoch": 2.1294412501842843, "grad_norm": 3.3314817125036216, "learning_rate": 1.7648883219334328e-06, "loss": 1.008, "step": 72220 }, { "epoch": 2.1295886775762938, "grad_norm": 3.3356672947926587, "learning_rate": 1.7643425980358017e-06, "loss": 1.0494, "step": 72225 }, { "epoch": 2.1297361049683032, "grad_norm": 3.5700444368526902, "learning_rate": 1.7637969325684085e-06, "loss": 1.0675, "step": 72230 }, { "epoch": 2.1298835323603127, "grad_norm": 3.352558800232466, "learning_rate": 1.7632513255473068e-06, "loss": 1.0017, "step": 72235 }, { "epoch": 2.130030959752322, "grad_norm": 3.440697303154643, "learning_rate": 1.7627057769885538e-06, "loss": 1.0512, "step": 72240 }, { "epoch": 2.1301783871443316, "grad_norm": 3.4149136363433445, "learning_rate": 1.762160286908202e-06, "loss": 1.0699, "step": 72245 }, { "epoch": 2.1303258145363406, "grad_norm": 3.5568647491525494, "learning_rate": 1.7616148553223043e-06, "loss": 1.0361, "step": 72250 }, { "epoch": 2.13047324192835, "grad_norm": 3.4820960254928073, "learning_rate": 1.7610694822469095e-06, "loss": 1.0725, "step": 72255 }, { "epoch": 2.1306206693203595, "grad_norm": 3.387353488289144, "learning_rate": 1.7605241676980668e-06, "loss": 1.0435, "step": 72260 }, { "epoch": 2.130768096712369, "grad_norm": 3.440710111422505, "learning_rate": 1.7599789116918217e-06, "loss": 1.0508, "step": 72265 }, { "epoch": 2.1309155241043785, "grad_norm": 3.514657982958109, "learning_rate": 1.759433714244221e-06, "loss": 1.0311, "step": 72270 }, { "epoch": 2.131062951496388, "grad_norm": 3.256024825101074, "learning_rate": 1.7588885753713036e-06, "loss": 1.0304, "step": 72275 }, { "epoch": 2.1312103788883974, "grad_norm": 3.3233123709041186, "learning_rate": 1.7583434950891166e-06, "loss": 1.0683, "step": 72280 }, { "epoch": 2.131357806280407, "grad_norm": 3.551258116992766, "learning_rate": 1.7577984734136947e-06, "loss": 1.0934, "step": 72285 }, { "epoch": 2.1315052336724163, "grad_norm": 3.4173073878450766, "learning_rate": 1.7572535103610772e-06, "loss": 1.0526, "step": 72290 }, { "epoch": 2.1316526610644257, "grad_norm": 3.2844055124488185, "learning_rate": 1.7567086059473026e-06, "loss": 1.0348, "step": 72295 }, { "epoch": 2.131800088456435, "grad_norm": 3.266114369803836, "learning_rate": 1.7561637601884003e-06, "loss": 1.0274, "step": 72300 }, { "epoch": 2.1319475158484447, "grad_norm": 3.48477521196433, "learning_rate": 1.7556189731004093e-06, "loss": 1.0051, "step": 72305 }, { "epoch": 2.132094943240454, "grad_norm": 3.3850507110533985, "learning_rate": 1.7550742446993543e-06, "loss": 1.0562, "step": 72310 }, { "epoch": 2.1322423706324636, "grad_norm": 3.489190899290323, "learning_rate": 1.75452957500127e-06, "loss": 1.0395, "step": 72315 }, { "epoch": 2.132389798024473, "grad_norm": 3.295380244057033, "learning_rate": 1.7539849640221806e-06, "loss": 1.0308, "step": 72320 }, { "epoch": 2.1325372254164825, "grad_norm": 3.459313985927705, "learning_rate": 1.7534404117781128e-06, "loss": 1.0491, "step": 72325 }, { "epoch": 2.132684652808492, "grad_norm": 3.49642458615653, "learning_rate": 1.752895918285091e-06, "loss": 1.0698, "step": 72330 }, { "epoch": 2.1328320802005014, "grad_norm": 3.424055223523662, "learning_rate": 1.7523514835591368e-06, "loss": 1.0803, "step": 72335 }, { "epoch": 2.132979507592511, "grad_norm": 3.3626122300724086, "learning_rate": 1.7518071076162713e-06, "loss": 1.011, "step": 72340 }, { "epoch": 2.1331269349845203, "grad_norm": 3.3185969213164097, "learning_rate": 1.7512627904725148e-06, "loss": 1.033, "step": 72345 }, { "epoch": 2.13327436237653, "grad_norm": 3.4255891572378454, "learning_rate": 1.7507185321438803e-06, "loss": 1.0504, "step": 72350 }, { "epoch": 2.133421789768539, "grad_norm": 3.365306322650441, "learning_rate": 1.7501743326463889e-06, "loss": 1.0772, "step": 72355 }, { "epoch": 2.1335692171605483, "grad_norm": 3.363492286662918, "learning_rate": 1.7496301919960494e-06, "loss": 1.0747, "step": 72360 }, { "epoch": 2.1337166445525577, "grad_norm": 3.4402171871584315, "learning_rate": 1.7490861102088761e-06, "loss": 1.0739, "step": 72365 }, { "epoch": 2.133864071944567, "grad_norm": 3.3298649972335204, "learning_rate": 1.74854208730088e-06, "loss": 1.0506, "step": 72370 }, { "epoch": 2.1340114993365766, "grad_norm": 3.417192492533349, "learning_rate": 1.747998123288066e-06, "loss": 1.0703, "step": 72375 }, { "epoch": 2.134158926728586, "grad_norm": 3.1435896185245817, "learning_rate": 1.7474542181864457e-06, "loss": 1.0257, "step": 72380 }, { "epoch": 2.1343063541205956, "grad_norm": 3.527416423888888, "learning_rate": 1.746910372012019e-06, "loss": 1.0691, "step": 72385 }, { "epoch": 2.134453781512605, "grad_norm": 3.643539485029619, "learning_rate": 1.7463665847807948e-06, "loss": 1.0482, "step": 72390 }, { "epoch": 2.1346012089046145, "grad_norm": 3.427988076762043, "learning_rate": 1.7458228565087703e-06, "loss": 1.0358, "step": 72395 }, { "epoch": 2.134748636296624, "grad_norm": 3.538681861205866, "learning_rate": 1.7452791872119467e-06, "loss": 1.086, "step": 72400 }, { "epoch": 2.1348960636886334, "grad_norm": 3.3143348793449463, "learning_rate": 1.7447355769063222e-06, "loss": 1.1094, "step": 72405 }, { "epoch": 2.135043491080643, "grad_norm": 3.3000685714849007, "learning_rate": 1.7441920256078928e-06, "loss": 1.0424, "step": 72410 }, { "epoch": 2.1351909184726523, "grad_norm": 3.6880084462465854, "learning_rate": 1.7436485333326539e-06, "loss": 1.0789, "step": 72415 }, { "epoch": 2.1353383458646618, "grad_norm": 3.431838012446926, "learning_rate": 1.743105100096599e-06, "loss": 1.028, "step": 72420 }, { "epoch": 2.135485773256671, "grad_norm": 3.4346717057573444, "learning_rate": 1.7425617259157172e-06, "loss": 1.0431, "step": 72425 }, { "epoch": 2.1356332006486807, "grad_norm": 3.411022950068547, "learning_rate": 1.742018410805998e-06, "loss": 1.0867, "step": 72430 }, { "epoch": 2.13578062804069, "grad_norm": 3.6243036738484657, "learning_rate": 1.741475154783431e-06, "loss": 1.1108, "step": 72435 }, { "epoch": 2.1359280554326996, "grad_norm": 3.3508464109290594, "learning_rate": 1.7409319578640006e-06, "loss": 1.0285, "step": 72440 }, { "epoch": 2.136075482824709, "grad_norm": 3.2956741791604256, "learning_rate": 1.7403888200636912e-06, "loss": 1.0626, "step": 72445 }, { "epoch": 2.136222910216718, "grad_norm": 3.5357025116535206, "learning_rate": 1.7398457413984859e-06, "loss": 1.113, "step": 72450 }, { "epoch": 2.1363703376087275, "grad_norm": 3.5073483590670143, "learning_rate": 1.7393027218843662e-06, "loss": 1.1013, "step": 72455 }, { "epoch": 2.136517765000737, "grad_norm": 3.461027289064993, "learning_rate": 1.7387597615373073e-06, "loss": 1.0795, "step": 72460 }, { "epoch": 2.1366651923927464, "grad_norm": 3.490503549456177, "learning_rate": 1.7382168603732916e-06, "loss": 1.0362, "step": 72465 }, { "epoch": 2.136812619784756, "grad_norm": 3.5400316467824005, "learning_rate": 1.7376740184082905e-06, "loss": 1.0601, "step": 72470 }, { "epoch": 2.1369600471767654, "grad_norm": 3.3860708545672322, "learning_rate": 1.7371312356582795e-06, "loss": 1.0341, "step": 72475 }, { "epoch": 2.137107474568775, "grad_norm": 3.6020913370334235, "learning_rate": 1.7365885121392305e-06, "loss": 1.0975, "step": 72480 }, { "epoch": 2.1372549019607843, "grad_norm": 3.261549453991922, "learning_rate": 1.736045847867113e-06, "loss": 1.068, "step": 72485 }, { "epoch": 2.1374023293527937, "grad_norm": 3.3720729092443245, "learning_rate": 1.7355032428578981e-06, "loss": 1.0487, "step": 72490 }, { "epoch": 2.137549756744803, "grad_norm": 3.3691857627478847, "learning_rate": 1.734960697127549e-06, "loss": 1.042, "step": 72495 }, { "epoch": 2.1376971841368126, "grad_norm": 3.4706990127229145, "learning_rate": 1.7344182106920321e-06, "loss": 1.043, "step": 72500 }, { "epoch": 2.1376971841368126, "eval_loss": 1.0864368677139282, "eval_runtime": 4.2235, "eval_samples_per_second": 93.761, "eval_steps_per_second": 3.078, "step": 72500 }, { "epoch": 2.137844611528822, "grad_norm": 3.448138644008751, "learning_rate": 1.7338757835673113e-06, "loss": 1.0475, "step": 72505 }, { "epoch": 2.1379920389208316, "grad_norm": 3.355639255788659, "learning_rate": 1.7333334157693475e-06, "loss": 0.9951, "step": 72510 }, { "epoch": 2.138139466312841, "grad_norm": 3.4000185791942683, "learning_rate": 1.7327911073141006e-06, "loss": 1.0494, "step": 72515 }, { "epoch": 2.1382868937048505, "grad_norm": 3.406566865524708, "learning_rate": 1.732248858217529e-06, "loss": 1.0617, "step": 72520 }, { "epoch": 2.13843432109686, "grad_norm": 3.364217017375238, "learning_rate": 1.7317066684955887e-06, "loss": 1.0241, "step": 72525 }, { "epoch": 2.1385817484888694, "grad_norm": 3.3472507773474898, "learning_rate": 1.7311645381642356e-06, "loss": 1.0162, "step": 72530 }, { "epoch": 2.138729175880879, "grad_norm": 3.2908189207921166, "learning_rate": 1.7306224672394188e-06, "loss": 1.0309, "step": 72535 }, { "epoch": 2.1388766032728883, "grad_norm": 3.875197378648103, "learning_rate": 1.7300804557370945e-06, "loss": 1.0532, "step": 72540 }, { "epoch": 2.1390240306648973, "grad_norm": 3.2158705634429072, "learning_rate": 1.7295385036732075e-06, "loss": 1.0555, "step": 72545 }, { "epoch": 2.139171458056907, "grad_norm": 3.4939086739531233, "learning_rate": 1.7289966110637074e-06, "loss": 1.075, "step": 72550 }, { "epoch": 2.1393188854489162, "grad_norm": 3.2667638588142456, "learning_rate": 1.7284547779245416e-06, "loss": 1.0173, "step": 72555 }, { "epoch": 2.1394663128409257, "grad_norm": 3.491885138913167, "learning_rate": 1.727913004271649e-06, "loss": 1.0331, "step": 72560 }, { "epoch": 2.139613740232935, "grad_norm": 3.4382514153392383, "learning_rate": 1.7273712901209781e-06, "loss": 1.0458, "step": 72565 }, { "epoch": 2.1397611676249446, "grad_norm": 3.196492953919639, "learning_rate": 1.7268296354884637e-06, "loss": 1.0329, "step": 72570 }, { "epoch": 2.139908595016954, "grad_norm": 3.5332335861894677, "learning_rate": 1.7262880403900503e-06, "loss": 1.0287, "step": 72575 }, { "epoch": 2.1400560224089635, "grad_norm": 3.449880506021079, "learning_rate": 1.7257465048416708e-06, "loss": 1.0281, "step": 72580 }, { "epoch": 2.140203449800973, "grad_norm": 3.360847388689474, "learning_rate": 1.7252050288592618e-06, "loss": 1.0547, "step": 72585 }, { "epoch": 2.1403508771929824, "grad_norm": 3.5965071092415717, "learning_rate": 1.7246636124587574e-06, "loss": 1.0283, "step": 72590 }, { "epoch": 2.140498304584992, "grad_norm": 3.507035584865466, "learning_rate": 1.7241222556560883e-06, "loss": 1.0793, "step": 72595 }, { "epoch": 2.1406457319770014, "grad_norm": 3.44104526966101, "learning_rate": 1.7235809584671855e-06, "loss": 1.0786, "step": 72600 }, { "epoch": 2.140793159369011, "grad_norm": 3.703207256239117, "learning_rate": 1.7230397209079784e-06, "loss": 1.0844, "step": 72605 }, { "epoch": 2.1409405867610203, "grad_norm": 3.435384209979449, "learning_rate": 1.7224985429943885e-06, "loss": 1.051, "step": 72610 }, { "epoch": 2.1410880141530297, "grad_norm": 3.493015230725772, "learning_rate": 1.7219574247423478e-06, "loss": 1.0752, "step": 72615 }, { "epoch": 2.141235441545039, "grad_norm": 3.2005641522530452, "learning_rate": 1.721416366167774e-06, "loss": 1.0493, "step": 72620 }, { "epoch": 2.1413828689370487, "grad_norm": 3.4132383983799723, "learning_rate": 1.7208753672865898e-06, "loss": 1.0518, "step": 72625 }, { "epoch": 2.141530296329058, "grad_norm": 3.444077826215413, "learning_rate": 1.7203344281147164e-06, "loss": 1.0833, "step": 72630 }, { "epoch": 2.1416777237210676, "grad_norm": 3.40023400768681, "learning_rate": 1.7197935486680675e-06, "loss": 1.0607, "step": 72635 }, { "epoch": 2.1418251511130766, "grad_norm": 3.4736546528490075, "learning_rate": 1.7192527289625645e-06, "loss": 1.0169, "step": 72640 }, { "epoch": 2.141972578505086, "grad_norm": 3.5823829301034102, "learning_rate": 1.7187119690141159e-06, "loss": 1.0476, "step": 72645 }, { "epoch": 2.1421200058970955, "grad_norm": 3.5509606038590067, "learning_rate": 1.7181712688386398e-06, "loss": 1.081, "step": 72650 }, { "epoch": 2.142267433289105, "grad_norm": 3.2857116606252053, "learning_rate": 1.7176306284520426e-06, "loss": 1.0317, "step": 72655 }, { "epoch": 2.1424148606811144, "grad_norm": 3.628128968709772, "learning_rate": 1.7170900478702353e-06, "loss": 1.1074, "step": 72660 }, { "epoch": 2.142562288073124, "grad_norm": 3.3029670025953504, "learning_rate": 1.7165495271091247e-06, "loss": 1.0794, "step": 72665 }, { "epoch": 2.1427097154651333, "grad_norm": 3.636800635626774, "learning_rate": 1.716009066184616e-06, "loss": 1.0957, "step": 72670 }, { "epoch": 2.142857142857143, "grad_norm": 3.499968691207262, "learning_rate": 1.7154686651126133e-06, "loss": 1.0743, "step": 72675 }, { "epoch": 2.1430045702491523, "grad_norm": 3.312337889840505, "learning_rate": 1.7149283239090195e-06, "loss": 1.0507, "step": 72680 }, { "epoch": 2.1431519976411617, "grad_norm": 3.4427574450945175, "learning_rate": 1.7143880425897322e-06, "loss": 1.0934, "step": 72685 }, { "epoch": 2.143299425033171, "grad_norm": 3.344989014279571, "learning_rate": 1.7138478211706515e-06, "loss": 1.0593, "step": 72690 }, { "epoch": 2.1434468524251806, "grad_norm": 3.41514826869435, "learning_rate": 1.7133076596676734e-06, "loss": 1.0696, "step": 72695 }, { "epoch": 2.14359427981719, "grad_norm": 3.464124042900349, "learning_rate": 1.7127675580966928e-06, "loss": 1.0933, "step": 72700 }, { "epoch": 2.1437417072091995, "grad_norm": 3.429492981247655, "learning_rate": 1.7122275164736036e-06, "loss": 1.0401, "step": 72705 }, { "epoch": 2.143889134601209, "grad_norm": 3.29895659700724, "learning_rate": 1.7116875348142966e-06, "loss": 1.029, "step": 72710 }, { "epoch": 2.1440365619932185, "grad_norm": 3.4502786301603443, "learning_rate": 1.7111476131346624e-06, "loss": 1.0559, "step": 72715 }, { "epoch": 2.144183989385228, "grad_norm": 3.3755165232851123, "learning_rate": 1.7106077514505851e-06, "loss": 1.0599, "step": 72720 }, { "epoch": 2.1443314167772374, "grad_norm": 3.320606544924789, "learning_rate": 1.7100679497779568e-06, "loss": 1.0503, "step": 72725 }, { "epoch": 2.144478844169247, "grad_norm": 3.440158374726044, "learning_rate": 1.7095282081326563e-06, "loss": 1.0581, "step": 72730 }, { "epoch": 2.144626271561256, "grad_norm": 3.4047426122499695, "learning_rate": 1.708988526530569e-06, "loss": 1.0712, "step": 72735 }, { "epoch": 2.1447736989532658, "grad_norm": 3.4453601758909507, "learning_rate": 1.7084489049875741e-06, "loss": 1.0623, "step": 72740 }, { "epoch": 2.1449211263452748, "grad_norm": 3.5327814022559285, "learning_rate": 1.7079093435195517e-06, "loss": 1.1008, "step": 72745 }, { "epoch": 2.1450685537372842, "grad_norm": 3.3929278668704597, "learning_rate": 1.7073698421423803e-06, "loss": 1.015, "step": 72750 }, { "epoch": 2.1452159811292937, "grad_norm": 3.365679588626825, "learning_rate": 1.7068304008719322e-06, "loss": 1.0562, "step": 72755 }, { "epoch": 2.145363408521303, "grad_norm": 3.394486291958509, "learning_rate": 1.7062910197240823e-06, "loss": 1.0224, "step": 72760 }, { "epoch": 2.1455108359133126, "grad_norm": 3.454990523004251, "learning_rate": 1.7057516987147035e-06, "loss": 1.085, "step": 72765 }, { "epoch": 2.145658263305322, "grad_norm": 3.382914054110382, "learning_rate": 1.7052124378596649e-06, "loss": 1.04, "step": 72770 }, { "epoch": 2.1458056906973315, "grad_norm": 3.3772098379267117, "learning_rate": 1.704673237174835e-06, "loss": 1.0322, "step": 72775 }, { "epoch": 2.145953118089341, "grad_norm": 3.464377521526982, "learning_rate": 1.7041340966760809e-06, "loss": 1.041, "step": 72780 }, { "epoch": 2.1461005454813504, "grad_norm": 3.3593135992805627, "learning_rate": 1.7035950163792668e-06, "loss": 1.0706, "step": 72785 }, { "epoch": 2.14624797287336, "grad_norm": 3.364159134547452, "learning_rate": 1.7030559963002578e-06, "loss": 1.0743, "step": 72790 }, { "epoch": 2.1463954002653693, "grad_norm": 3.310611711951804, "learning_rate": 1.7025170364549104e-06, "loss": 1.0486, "step": 72795 }, { "epoch": 2.146542827657379, "grad_norm": 3.3922708579433305, "learning_rate": 1.70197813685909e-06, "loss": 1.0363, "step": 72800 }, { "epoch": 2.1466902550493883, "grad_norm": 3.387876482940271, "learning_rate": 1.7014392975286505e-06, "loss": 1.0773, "step": 72805 }, { "epoch": 2.1468376824413977, "grad_norm": 3.4603002264178633, "learning_rate": 1.7009005184794487e-06, "loss": 1.022, "step": 72810 }, { "epoch": 2.146985109833407, "grad_norm": 3.446222040018902, "learning_rate": 1.7003617997273403e-06, "loss": 1.0568, "step": 72815 }, { "epoch": 2.1471325372254166, "grad_norm": 3.574152136800349, "learning_rate": 1.6998231412881737e-06, "loss": 1.0667, "step": 72820 }, { "epoch": 2.147279964617426, "grad_norm": 3.5367949712496136, "learning_rate": 1.699284543177805e-06, "loss": 1.0771, "step": 72825 }, { "epoch": 2.1474273920094356, "grad_norm": 3.301373848955984, "learning_rate": 1.6987460054120773e-06, "loss": 1.0433, "step": 72830 }, { "epoch": 2.147574819401445, "grad_norm": 3.530180797364892, "learning_rate": 1.6982075280068437e-06, "loss": 1.0464, "step": 72835 }, { "epoch": 2.147722246793454, "grad_norm": 3.3520598127817163, "learning_rate": 1.6976691109779449e-06, "loss": 1.0426, "step": 72840 }, { "epoch": 2.1478696741854635, "grad_norm": 3.3076202291178656, "learning_rate": 1.6971307543412262e-06, "loss": 1.038, "step": 72845 }, { "epoch": 2.148017101577473, "grad_norm": 3.3444269793337145, "learning_rate": 1.6965924581125291e-06, "loss": 1.0531, "step": 72850 }, { "epoch": 2.1481645289694824, "grad_norm": 3.1831798865385674, "learning_rate": 1.6960542223076935e-06, "loss": 0.9916, "step": 72855 }, { "epoch": 2.148311956361492, "grad_norm": 3.4896248521187494, "learning_rate": 1.6955160469425582e-06, "loss": 1.0799, "step": 72860 }, { "epoch": 2.1484593837535013, "grad_norm": 3.500860179436481, "learning_rate": 1.6949779320329602e-06, "loss": 1.0223, "step": 72865 }, { "epoch": 2.1486068111455108, "grad_norm": 3.5328049473661864, "learning_rate": 1.6944398775947306e-06, "loss": 1.0541, "step": 72870 }, { "epoch": 2.1487542385375202, "grad_norm": 3.4784482201831417, "learning_rate": 1.6939018836437075e-06, "loss": 1.0413, "step": 72875 }, { "epoch": 2.1489016659295297, "grad_norm": 3.4135116129366576, "learning_rate": 1.6933639501957176e-06, "loss": 1.0529, "step": 72880 }, { "epoch": 2.149049093321539, "grad_norm": 3.4758278340287414, "learning_rate": 1.6928260772665924e-06, "loss": 1.0693, "step": 72885 }, { "epoch": 2.1491965207135486, "grad_norm": 3.306132672041587, "learning_rate": 1.6922882648721586e-06, "loss": 1.0769, "step": 72890 }, { "epoch": 2.149343948105558, "grad_norm": 3.2962012905147198, "learning_rate": 1.6917505130282426e-06, "loss": 1.0549, "step": 72895 }, { "epoch": 2.1494913754975675, "grad_norm": 3.329167854341482, "learning_rate": 1.691212821750669e-06, "loss": 1.0124, "step": 72900 }, { "epoch": 2.149638802889577, "grad_norm": 3.2971199205860615, "learning_rate": 1.6906751910552565e-06, "loss": 1.0411, "step": 72905 }, { "epoch": 2.1497862302815864, "grad_norm": 3.2836085157919572, "learning_rate": 1.6901376209578306e-06, "loss": 1.0264, "step": 72910 }, { "epoch": 2.149933657673596, "grad_norm": 3.3773020387083803, "learning_rate": 1.689600111474206e-06, "loss": 1.0202, "step": 72915 }, { "epoch": 2.1500810850656054, "grad_norm": 3.5466756717456773, "learning_rate": 1.6890626626202008e-06, "loss": 1.1048, "step": 72920 }, { "epoch": 2.150228512457615, "grad_norm": 3.2341378542033046, "learning_rate": 1.6885252744116305e-06, "loss": 1.0275, "step": 72925 }, { "epoch": 2.1503759398496243, "grad_norm": 3.5270987143608767, "learning_rate": 1.6879879468643074e-06, "loss": 1.0399, "step": 72930 }, { "epoch": 2.1505233672416333, "grad_norm": 3.444981176729942, "learning_rate": 1.687450679994044e-06, "loss": 1.0234, "step": 72935 }, { "epoch": 2.1506707946336427, "grad_norm": 3.5258087882442046, "learning_rate": 1.6869134738166505e-06, "loss": 1.0631, "step": 72940 }, { "epoch": 2.150818222025652, "grad_norm": 3.397326228496714, "learning_rate": 1.6863763283479325e-06, "loss": 1.0889, "step": 72945 }, { "epoch": 2.1509656494176617, "grad_norm": 3.312580257617351, "learning_rate": 1.6858392436036975e-06, "loss": 1.0887, "step": 72950 }, { "epoch": 2.151113076809671, "grad_norm": 3.287222008394899, "learning_rate": 1.6853022195997498e-06, "loss": 1.0787, "step": 72955 }, { "epoch": 2.1512605042016806, "grad_norm": 3.279774326682244, "learning_rate": 1.684765256351892e-06, "loss": 1.036, "step": 72960 }, { "epoch": 2.15140793159369, "grad_norm": 3.3814175657378724, "learning_rate": 1.684228353875925e-06, "loss": 1.0302, "step": 72965 }, { "epoch": 2.1515553589856995, "grad_norm": 3.4810748294213005, "learning_rate": 1.6836915121876474e-06, "loss": 1.0143, "step": 72970 }, { "epoch": 2.151702786377709, "grad_norm": 3.44808073493458, "learning_rate": 1.6831547313028575e-06, "loss": 1.0154, "step": 72975 }, { "epoch": 2.1518502137697184, "grad_norm": 3.3162389324316797, "learning_rate": 1.6826180112373473e-06, "loss": 1.0317, "step": 72980 }, { "epoch": 2.151997641161728, "grad_norm": 3.435158632229481, "learning_rate": 1.6820813520069157e-06, "loss": 1.045, "step": 72985 }, { "epoch": 2.1521450685537373, "grad_norm": 3.53361906555659, "learning_rate": 1.68154475362735e-06, "loss": 1.0561, "step": 72990 }, { "epoch": 2.152292495945747, "grad_norm": 3.457804922866193, "learning_rate": 1.6810082161144418e-06, "loss": 1.0402, "step": 72995 }, { "epoch": 2.1524399233377562, "grad_norm": 3.438199144782118, "learning_rate": 1.68047173948398e-06, "loss": 1.0476, "step": 73000 }, { "epoch": 2.1524399233377562, "eval_loss": 1.0852781534194946, "eval_runtime": 4.1853, "eval_samples_per_second": 94.616, "eval_steps_per_second": 3.106, "step": 73000 }, { "epoch": 2.1525873507297657, "grad_norm": 3.4668878591959467, "learning_rate": 1.67993532375175e-06, "loss": 1.0053, "step": 73005 }, { "epoch": 2.152734778121775, "grad_norm": 3.201300379619684, "learning_rate": 1.679398968933538e-06, "loss": 1.038, "step": 73010 }, { "epoch": 2.1528822055137846, "grad_norm": 3.578527526708348, "learning_rate": 1.6788626750451246e-06, "loss": 1.0879, "step": 73015 }, { "epoch": 2.153029632905794, "grad_norm": 3.1779416297640934, "learning_rate": 1.6783264421022918e-06, "loss": 0.9947, "step": 73020 }, { "epoch": 2.1531770602978035, "grad_norm": 3.557217809602632, "learning_rate": 1.6777902701208194e-06, "loss": 1.0852, "step": 73025 }, { "epoch": 2.1533244876898125, "grad_norm": 3.3748892273051676, "learning_rate": 1.6772541591164842e-06, "loss": 1.0817, "step": 73030 }, { "epoch": 2.153471915081822, "grad_norm": 3.4297629803740497, "learning_rate": 1.676718109105062e-06, "loss": 1.0359, "step": 73035 }, { "epoch": 2.1536193424738315, "grad_norm": 3.59310013352363, "learning_rate": 1.6761821201023276e-06, "loss": 1.0481, "step": 73040 }, { "epoch": 2.153766769865841, "grad_norm": 3.4370670878104113, "learning_rate": 1.6756461921240517e-06, "loss": 1.0428, "step": 73045 }, { "epoch": 2.1539141972578504, "grad_norm": 3.576690842272703, "learning_rate": 1.6751103251860069e-06, "loss": 1.0666, "step": 73050 }, { "epoch": 2.15406162464986, "grad_norm": 3.32737942571265, "learning_rate": 1.6745745193039572e-06, "loss": 1.0418, "step": 73055 }, { "epoch": 2.1542090520418693, "grad_norm": 3.405185788185414, "learning_rate": 1.6740387744936752e-06, "loss": 1.0417, "step": 73060 }, { "epoch": 2.1543564794338788, "grad_norm": 3.5326129592499425, "learning_rate": 1.673503090770921e-06, "loss": 1.0379, "step": 73065 }, { "epoch": 2.154503906825888, "grad_norm": 3.498979181782471, "learning_rate": 1.6729674681514597e-06, "loss": 1.0752, "step": 73070 }, { "epoch": 2.1546513342178977, "grad_norm": 3.3330698918370336, "learning_rate": 1.6724319066510539e-06, "loss": 1.0257, "step": 73075 }, { "epoch": 2.154798761609907, "grad_norm": 3.3625367494350678, "learning_rate": 1.6718964062854583e-06, "loss": 1.0563, "step": 73080 }, { "epoch": 2.1549461890019166, "grad_norm": 3.5286393141186143, "learning_rate": 1.6713609670704371e-06, "loss": 1.0826, "step": 73085 }, { "epoch": 2.155093616393926, "grad_norm": 3.2762964161753465, "learning_rate": 1.6708255890217399e-06, "loss": 1.0642, "step": 73090 }, { "epoch": 2.1552410437859355, "grad_norm": 3.4270116858340818, "learning_rate": 1.670290272155127e-06, "loss": 1.0801, "step": 73095 }, { "epoch": 2.155388471177945, "grad_norm": 3.2496330818848667, "learning_rate": 1.6697550164863462e-06, "loss": 1.0388, "step": 73100 }, { "epoch": 2.1555358985699544, "grad_norm": 3.275153251095354, "learning_rate": 1.6692198220311494e-06, "loss": 1.0706, "step": 73105 }, { "epoch": 2.155683325961964, "grad_norm": 3.416860092015873, "learning_rate": 1.668684688805285e-06, "loss": 1.0218, "step": 73110 }, { "epoch": 2.1558307533539733, "grad_norm": 3.5048697033820697, "learning_rate": 1.6681496168245003e-06, "loss": 1.0359, "step": 73115 }, { "epoch": 2.155978180745983, "grad_norm": 3.338520138790268, "learning_rate": 1.6676146061045406e-06, "loss": 1.03, "step": 73120 }, { "epoch": 2.156125608137992, "grad_norm": 3.3628529988952525, "learning_rate": 1.6670796566611505e-06, "loss": 1.0378, "step": 73125 }, { "epoch": 2.1562730355300013, "grad_norm": 3.3103740422496015, "learning_rate": 1.6665447685100672e-06, "loss": 1.0434, "step": 73130 }, { "epoch": 2.1564204629220107, "grad_norm": 3.2974410240040024, "learning_rate": 1.6660099416670357e-06, "loss": 1.0366, "step": 73135 }, { "epoch": 2.15656789031402, "grad_norm": 3.5534382856782165, "learning_rate": 1.6654751761477898e-06, "loss": 1.077, "step": 73140 }, { "epoch": 2.1567153177060296, "grad_norm": 3.484825786926402, "learning_rate": 1.6649404719680673e-06, "loss": 1.0176, "step": 73145 }, { "epoch": 2.156862745098039, "grad_norm": 3.485002193099934, "learning_rate": 1.6644058291436021e-06, "loss": 1.0771, "step": 73150 }, { "epoch": 2.1570101724900486, "grad_norm": 3.4901882198059617, "learning_rate": 1.6638712476901271e-06, "loss": 1.0637, "step": 73155 }, { "epoch": 2.157157599882058, "grad_norm": 3.500288000899476, "learning_rate": 1.6633367276233743e-06, "loss": 1.0673, "step": 73160 }, { "epoch": 2.1573050272740675, "grad_norm": 3.6574170786578186, "learning_rate": 1.6628022689590678e-06, "loss": 1.062, "step": 73165 }, { "epoch": 2.157452454666077, "grad_norm": 3.421365858129776, "learning_rate": 1.662267871712941e-06, "loss": 1.063, "step": 73170 }, { "epoch": 2.1575998820580864, "grad_norm": 3.455995339019624, "learning_rate": 1.6617335359007142e-06, "loss": 1.0225, "step": 73175 }, { "epoch": 2.157747309450096, "grad_norm": 3.3606421247913785, "learning_rate": 1.6611992615381129e-06, "loss": 1.0573, "step": 73180 }, { "epoch": 2.1578947368421053, "grad_norm": 3.5161597575354513, "learning_rate": 1.6606650486408584e-06, "loss": 1.0045, "step": 73185 }, { "epoch": 2.1580421642341148, "grad_norm": 3.567996381455588, "learning_rate": 1.660130897224671e-06, "loss": 1.0718, "step": 73190 }, { "epoch": 2.1581895916261242, "grad_norm": 3.327014784718862, "learning_rate": 1.6595968073052678e-06, "loss": 1.0557, "step": 73195 }, { "epoch": 2.1583370190181337, "grad_norm": 3.3163029896013136, "learning_rate": 1.6590627788983674e-06, "loss": 0.9879, "step": 73200 }, { "epoch": 2.158484446410143, "grad_norm": 3.437837643591403, "learning_rate": 1.6585288120196806e-06, "loss": 1.042, "step": 73205 }, { "epoch": 2.1586318738021526, "grad_norm": 3.526424872390614, "learning_rate": 1.6579949066849223e-06, "loss": 1.0741, "step": 73210 }, { "epoch": 2.158779301194162, "grad_norm": 3.2462030957789967, "learning_rate": 1.657461062909802e-06, "loss": 1.0042, "step": 73215 }, { "epoch": 2.1589267285861715, "grad_norm": 3.2106319831740695, "learning_rate": 1.6569272807100294e-06, "loss": 1.0539, "step": 73220 }, { "epoch": 2.159074155978181, "grad_norm": 3.276087464530405, "learning_rate": 1.656393560101312e-06, "loss": 1.0404, "step": 73225 }, { "epoch": 2.15922158337019, "grad_norm": 3.3510892227916242, "learning_rate": 1.6558599010993544e-06, "loss": 0.9987, "step": 73230 }, { "epoch": 2.1593690107621994, "grad_norm": 3.351803422229395, "learning_rate": 1.6553263037198618e-06, "loss": 1.043, "step": 73235 }, { "epoch": 2.159516438154209, "grad_norm": 3.4580469934324274, "learning_rate": 1.6547927679785322e-06, "loss": 1.0719, "step": 73240 }, { "epoch": 2.1596638655462184, "grad_norm": 3.3338394020521753, "learning_rate": 1.6542592938910702e-06, "loss": 1.0376, "step": 73245 }, { "epoch": 2.159811292938228, "grad_norm": 3.455683978931001, "learning_rate": 1.6537258814731707e-06, "loss": 1.0983, "step": 73250 }, { "epoch": 2.1599587203302373, "grad_norm": 3.375900493164057, "learning_rate": 1.6531925307405307e-06, "loss": 1.0576, "step": 73255 }, { "epoch": 2.1601061477222467, "grad_norm": 3.3349098708158285, "learning_rate": 1.6526592417088445e-06, "loss": 1.0064, "step": 73260 }, { "epoch": 2.160253575114256, "grad_norm": 3.426140760306009, "learning_rate": 1.6521260143938053e-06, "loss": 1.0577, "step": 73265 }, { "epoch": 2.1604010025062657, "grad_norm": 3.3537649421835773, "learning_rate": 1.6515928488111047e-06, "loss": 1.0676, "step": 73270 }, { "epoch": 2.160548429898275, "grad_norm": 3.4422956052477516, "learning_rate": 1.6510597449764279e-06, "loss": 1.0157, "step": 73275 }, { "epoch": 2.1606958572902846, "grad_norm": 3.5592398125968376, "learning_rate": 1.6505267029054678e-06, "loss": 1.0868, "step": 73280 }, { "epoch": 2.160843284682294, "grad_norm": 3.3905245227715994, "learning_rate": 1.6499937226139055e-06, "loss": 1.0857, "step": 73285 }, { "epoch": 2.1609907120743035, "grad_norm": 3.544584539818672, "learning_rate": 1.6494608041174256e-06, "loss": 1.0552, "step": 73290 }, { "epoch": 2.161138139466313, "grad_norm": 3.5149352051188454, "learning_rate": 1.6489279474317107e-06, "loss": 1.068, "step": 73295 }, { "epoch": 2.1612855668583224, "grad_norm": 3.188730566279635, "learning_rate": 1.6483951525724396e-06, "loss": 1.0187, "step": 73300 }, { "epoch": 2.161432994250332, "grad_norm": 3.4942632619576144, "learning_rate": 1.6478624195552912e-06, "loss": 1.0384, "step": 73305 }, { "epoch": 2.1615804216423413, "grad_norm": 3.5224160776122395, "learning_rate": 1.6473297483959426e-06, "loss": 1.0394, "step": 73310 }, { "epoch": 2.161727849034351, "grad_norm": 3.1017037740530093, "learning_rate": 1.6467971391100643e-06, "loss": 1.0735, "step": 73315 }, { "epoch": 2.1618752764263602, "grad_norm": 3.420476799532813, "learning_rate": 1.646264591713335e-06, "loss": 1.0701, "step": 73320 }, { "epoch": 2.1620227038183693, "grad_norm": 3.5151820019001767, "learning_rate": 1.6457321062214205e-06, "loss": 1.0721, "step": 73325 }, { "epoch": 2.1621701312103787, "grad_norm": 3.3807045395849276, "learning_rate": 1.6451996826499917e-06, "loss": 1.0331, "step": 73330 }, { "epoch": 2.162317558602388, "grad_norm": 3.600981953590053, "learning_rate": 1.6446673210147172e-06, "loss": 1.0689, "step": 73335 }, { "epoch": 2.1624649859943976, "grad_norm": 3.3318584661268145, "learning_rate": 1.6441350213312578e-06, "loss": 1.0637, "step": 73340 }, { "epoch": 2.162612413386407, "grad_norm": 3.1770319010391397, "learning_rate": 1.6436027836152833e-06, "loss": 1.0294, "step": 73345 }, { "epoch": 2.1627598407784165, "grad_norm": 3.4901059411482964, "learning_rate": 1.6430706078824489e-06, "loss": 1.08, "step": 73350 }, { "epoch": 2.162907268170426, "grad_norm": 3.4609315556116247, "learning_rate": 1.6425384941484207e-06, "loss": 1.052, "step": 73355 }, { "epoch": 2.1630546955624355, "grad_norm": 3.310840758470047, "learning_rate": 1.6420064424288523e-06, "loss": 1.0582, "step": 73360 }, { "epoch": 2.163202122954445, "grad_norm": 3.321397251813479, "learning_rate": 1.6414744527394013e-06, "loss": 1.0099, "step": 73365 }, { "epoch": 2.1633495503464544, "grad_norm": 3.648187191168661, "learning_rate": 1.6409425250957224e-06, "loss": 1.035, "step": 73370 }, { "epoch": 2.163496977738464, "grad_norm": 3.509412017870256, "learning_rate": 1.640410659513468e-06, "loss": 1.0261, "step": 73375 }, { "epoch": 2.1636444051304733, "grad_norm": 3.3861137758083615, "learning_rate": 1.6398788560082886e-06, "loss": 1.0587, "step": 73380 }, { "epoch": 2.1637918325224827, "grad_norm": 3.3142562887884415, "learning_rate": 1.6393471145958353e-06, "loss": 1.0501, "step": 73385 }, { "epoch": 2.163939259914492, "grad_norm": 3.3108630741470573, "learning_rate": 1.63881543529175e-06, "loss": 1.0246, "step": 73390 }, { "epoch": 2.1640866873065017, "grad_norm": 3.8475265441808224, "learning_rate": 1.6382838181116838e-06, "loss": 1.0362, "step": 73395 }, { "epoch": 2.164234114698511, "grad_norm": 3.4324397490935157, "learning_rate": 1.6377522630712765e-06, "loss": 1.0784, "step": 73400 }, { "epoch": 2.1643815420905206, "grad_norm": 3.450673968301307, "learning_rate": 1.637220770186171e-06, "loss": 1.0621, "step": 73405 }, { "epoch": 2.16452896948253, "grad_norm": 3.3222672976595806, "learning_rate": 1.636689339472007e-06, "loss": 1.082, "step": 73410 }, { "epoch": 2.1646763968745395, "grad_norm": 3.5450592754722514, "learning_rate": 1.636157970944422e-06, "loss": 1.0529, "step": 73415 }, { "epoch": 2.1648238242665485, "grad_norm": 3.3499079418899678, "learning_rate": 1.6356266646190536e-06, "loss": 1.0219, "step": 73420 }, { "epoch": 2.164971251658558, "grad_norm": 3.2635672745312703, "learning_rate": 1.6350954205115324e-06, "loss": 1.0127, "step": 73425 }, { "epoch": 2.1651186790505674, "grad_norm": 3.3893166625147955, "learning_rate": 1.6345642386374963e-06, "loss": 1.0293, "step": 73430 }, { "epoch": 2.165266106442577, "grad_norm": 3.3872863711405334, "learning_rate": 1.634033119012572e-06, "loss": 1.0809, "step": 73435 }, { "epoch": 2.1654135338345863, "grad_norm": 3.3620993992260386, "learning_rate": 1.6335020616523889e-06, "loss": 0.9816, "step": 73440 }, { "epoch": 2.165560961226596, "grad_norm": 3.570500398878678, "learning_rate": 1.6329710665725745e-06, "loss": 1.0343, "step": 73445 }, { "epoch": 2.1657083886186053, "grad_norm": 3.4074536179571298, "learning_rate": 1.632440133788754e-06, "loss": 1.0704, "step": 73450 }, { "epoch": 2.1658558160106147, "grad_norm": 3.2962540712346335, "learning_rate": 1.63190926331655e-06, "loss": 1.0203, "step": 73455 }, { "epoch": 2.166003243402624, "grad_norm": 3.7833081523827135, "learning_rate": 1.6313784551715865e-06, "loss": 1.0574, "step": 73460 }, { "epoch": 2.1661506707946336, "grad_norm": 3.4973163631993116, "learning_rate": 1.6308477093694794e-06, "loss": 1.0612, "step": 73465 }, { "epoch": 2.166298098186643, "grad_norm": 3.303514819737439, "learning_rate": 1.6303170259258486e-06, "loss": 1.0345, "step": 73470 }, { "epoch": 2.1664455255786526, "grad_norm": 3.3454413767908155, "learning_rate": 1.6297864048563095e-06, "loss": 1.0449, "step": 73475 }, { "epoch": 2.166592952970662, "grad_norm": 3.4054244878848494, "learning_rate": 1.6292558461764767e-06, "loss": 1.0245, "step": 73480 }, { "epoch": 2.1667403803626715, "grad_norm": 3.389755452944298, "learning_rate": 1.6287253499019623e-06, "loss": 1.0639, "step": 73485 }, { "epoch": 2.166887807754681, "grad_norm": 3.299852247403067, "learning_rate": 1.6281949160483763e-06, "loss": 1.0853, "step": 73490 }, { "epoch": 2.1670352351466904, "grad_norm": 3.5471978085351945, "learning_rate": 1.6276645446313297e-06, "loss": 1.0269, "step": 73495 }, { "epoch": 2.1671826625387, "grad_norm": 3.280169644957723, "learning_rate": 1.627134235666424e-06, "loss": 1.0105, "step": 73500 }, { "epoch": 2.1671826625387, "eval_loss": 1.0860058069229126, "eval_runtime": 4.2707, "eval_samples_per_second": 92.724, "eval_steps_per_second": 3.044, "step": 73500 }, { "epoch": 2.1673300899307093, "grad_norm": 3.4924226823696065, "learning_rate": 1.6266039891692704e-06, "loss": 1.044, "step": 73505 }, { "epoch": 2.1674775173227188, "grad_norm": 3.292097720879424, "learning_rate": 1.626073805155468e-06, "loss": 1.0274, "step": 73510 }, { "epoch": 2.1676249447147278, "grad_norm": 3.3369995804952004, "learning_rate": 1.625543683640619e-06, "loss": 1.0399, "step": 73515 }, { "epoch": 2.1677723721067372, "grad_norm": 3.464124368972089, "learning_rate": 1.6250136246403232e-06, "loss": 1.0508, "step": 73520 }, { "epoch": 2.1679197994987467, "grad_norm": 3.424296515646067, "learning_rate": 1.6244836281701776e-06, "loss": 1.023, "step": 73525 }, { "epoch": 2.168067226890756, "grad_norm": 3.4746381519583114, "learning_rate": 1.6239536942457795e-06, "loss": 1.0399, "step": 73530 }, { "epoch": 2.1682146542827656, "grad_norm": 3.522267089262004, "learning_rate": 1.6234238228827195e-06, "loss": 1.0781, "step": 73535 }, { "epoch": 2.168362081674775, "grad_norm": 3.347166784026542, "learning_rate": 1.6228940140965942e-06, "loss": 1.0304, "step": 73540 }, { "epoch": 2.1685095090667845, "grad_norm": 3.401707672501803, "learning_rate": 1.62236426790299e-06, "loss": 1.058, "step": 73545 }, { "epoch": 2.168656936458794, "grad_norm": 3.438187329930598, "learning_rate": 1.621834584317497e-06, "loss": 1.0717, "step": 73550 }, { "epoch": 2.1688043638508034, "grad_norm": 3.3388253155310608, "learning_rate": 1.6213049633557015e-06, "loss": 1.0316, "step": 73555 }, { "epoch": 2.168951791242813, "grad_norm": 3.3344317746587357, "learning_rate": 1.6207754050331876e-06, "loss": 1.0489, "step": 73560 }, { "epoch": 2.1690992186348224, "grad_norm": 3.4034728836537154, "learning_rate": 1.6202459093655396e-06, "loss": 1.0609, "step": 73565 }, { "epoch": 2.169246646026832, "grad_norm": 3.3164492324771184, "learning_rate": 1.6197164763683384e-06, "loss": 1.0165, "step": 73570 }, { "epoch": 2.1693940734188413, "grad_norm": 3.483092077069318, "learning_rate": 1.6191871060571594e-06, "loss": 1.065, "step": 73575 }, { "epoch": 2.1695415008108507, "grad_norm": 3.4081893884629113, "learning_rate": 1.6186577984475863e-06, "loss": 1.085, "step": 73580 }, { "epoch": 2.16968892820286, "grad_norm": 3.3695664957756533, "learning_rate": 1.6181285535551898e-06, "loss": 1.0466, "step": 73585 }, { "epoch": 2.1698363555948696, "grad_norm": 3.3408455809287085, "learning_rate": 1.6175993713955444e-06, "loss": 1.0624, "step": 73590 }, { "epoch": 2.169983782986879, "grad_norm": 3.555166144949756, "learning_rate": 1.6170702519842243e-06, "loss": 1.0323, "step": 73595 }, { "epoch": 2.1701312103788886, "grad_norm": 3.3623748852169855, "learning_rate": 1.6165411953367948e-06, "loss": 1.0403, "step": 73600 }, { "epoch": 2.170278637770898, "grad_norm": 3.3742136680932067, "learning_rate": 1.6160122014688293e-06, "loss": 1.0577, "step": 73605 }, { "epoch": 2.170426065162907, "grad_norm": 3.5975567227622496, "learning_rate": 1.6154832703958891e-06, "loss": 1.0513, "step": 73610 }, { "epoch": 2.170573492554917, "grad_norm": 3.535640608715436, "learning_rate": 1.6149544021335436e-06, "loss": 1.0393, "step": 73615 }, { "epoch": 2.170720919946926, "grad_norm": 3.4818248441091497, "learning_rate": 1.6144255966973515e-06, "loss": 1.023, "step": 73620 }, { "epoch": 2.1708683473389354, "grad_norm": 3.40808191195052, "learning_rate": 1.6138968541028744e-06, "loss": 1.0758, "step": 73625 }, { "epoch": 2.171015774730945, "grad_norm": 3.5365125543825724, "learning_rate": 1.613368174365672e-06, "loss": 1.0303, "step": 73630 }, { "epoch": 2.1711632021229543, "grad_norm": 3.4589583402088406, "learning_rate": 1.6128395575013004e-06, "loss": 1.0818, "step": 73635 }, { "epoch": 2.171310629514964, "grad_norm": 3.4258242040316187, "learning_rate": 1.6123110035253156e-06, "loss": 1.0608, "step": 73640 }, { "epoch": 2.1714580569069732, "grad_norm": 3.4478938626888294, "learning_rate": 1.611782512453272e-06, "loss": 1.0421, "step": 73645 }, { "epoch": 2.1716054842989827, "grad_norm": 3.5491476629815537, "learning_rate": 1.6112540843007168e-06, "loss": 1.0556, "step": 73650 }, { "epoch": 2.171752911690992, "grad_norm": 3.5794099704984075, "learning_rate": 1.6107257190832046e-06, "loss": 1.0673, "step": 73655 }, { "epoch": 2.1719003390830016, "grad_norm": 3.305449680274449, "learning_rate": 1.6101974168162787e-06, "loss": 1.0488, "step": 73660 }, { "epoch": 2.172047766475011, "grad_norm": 3.549723108228629, "learning_rate": 1.6096691775154898e-06, "loss": 1.0456, "step": 73665 }, { "epoch": 2.1721951938670205, "grad_norm": 3.3021752061419813, "learning_rate": 1.6091410011963778e-06, "loss": 1.0341, "step": 73670 }, { "epoch": 2.17234262125903, "grad_norm": 3.378694883480005, "learning_rate": 1.6086128878744863e-06, "loss": 1.0592, "step": 73675 }, { "epoch": 2.1724900486510395, "grad_norm": 3.644775845948864, "learning_rate": 1.6080848375653573e-06, "loss": 1.0816, "step": 73680 }, { "epoch": 2.172637476043049, "grad_norm": 3.3920463829468184, "learning_rate": 1.6075568502845254e-06, "loss": 1.0477, "step": 73685 }, { "epoch": 2.1727849034350584, "grad_norm": 3.4646372430469365, "learning_rate": 1.6070289260475318e-06, "loss": 1.0495, "step": 73690 }, { "epoch": 2.172932330827068, "grad_norm": 3.3879060147067865, "learning_rate": 1.6065010648699082e-06, "loss": 1.0674, "step": 73695 }, { "epoch": 2.1730797582190773, "grad_norm": 3.2133840353651495, "learning_rate": 1.605973266767188e-06, "loss": 1.0141, "step": 73700 }, { "epoch": 2.1732271856110867, "grad_norm": 3.3683141424595417, "learning_rate": 1.605445531754903e-06, "loss": 1.0596, "step": 73705 }, { "epoch": 2.173374613003096, "grad_norm": 3.5733530214716422, "learning_rate": 1.6049178598485817e-06, "loss": 1.0596, "step": 73710 }, { "epoch": 2.173522040395105, "grad_norm": 3.417528881675033, "learning_rate": 1.6043902510637517e-06, "loss": 1.0663, "step": 73715 }, { "epoch": 2.1736694677871147, "grad_norm": 3.4584280653467774, "learning_rate": 1.6038627054159403e-06, "loss": 1.0741, "step": 73720 }, { "epoch": 2.173816895179124, "grad_norm": 3.39159686661433, "learning_rate": 1.6033352229206666e-06, "loss": 1.0733, "step": 73725 }, { "epoch": 2.1739643225711336, "grad_norm": 3.4043080692246193, "learning_rate": 1.6028078035934581e-06, "loss": 1.0291, "step": 73730 }, { "epoch": 2.174111749963143, "grad_norm": 3.5369602067367323, "learning_rate": 1.6022804474498302e-06, "loss": 1.0841, "step": 73735 }, { "epoch": 2.1742591773551525, "grad_norm": 3.486503666637904, "learning_rate": 1.6017531545053025e-06, "loss": 1.0864, "step": 73740 }, { "epoch": 2.174406604747162, "grad_norm": 3.4794888176297616, "learning_rate": 1.6012259247753918e-06, "loss": 1.0514, "step": 73745 }, { "epoch": 2.1745540321391714, "grad_norm": 3.3964157592205546, "learning_rate": 1.6006987582756113e-06, "loss": 1.0855, "step": 73750 }, { "epoch": 2.174701459531181, "grad_norm": 3.4143971386714926, "learning_rate": 1.6001716550214756e-06, "loss": 1.0425, "step": 73755 }, { "epoch": 2.1748488869231903, "grad_norm": 3.6531196360962475, "learning_rate": 1.599644615028491e-06, "loss": 1.0482, "step": 73760 }, { "epoch": 2.1749963143152, "grad_norm": 3.368926038196138, "learning_rate": 1.599117638312172e-06, "loss": 1.0321, "step": 73765 }, { "epoch": 2.1751437417072093, "grad_norm": 3.43578984497355, "learning_rate": 1.5985907248880211e-06, "loss": 1.0395, "step": 73770 }, { "epoch": 2.1752911690992187, "grad_norm": 3.6324401328113236, "learning_rate": 1.5980638747715448e-06, "loss": 1.053, "step": 73775 }, { "epoch": 2.175438596491228, "grad_norm": 3.5241222408095774, "learning_rate": 1.5975370879782463e-06, "loss": 1.037, "step": 73780 }, { "epoch": 2.1755860238832376, "grad_norm": 3.3112857372202806, "learning_rate": 1.5970103645236267e-06, "loss": 1.0767, "step": 73785 }, { "epoch": 2.175733451275247, "grad_norm": 3.5864511410399453, "learning_rate": 1.5964837044231858e-06, "loss": 1.0588, "step": 73790 }, { "epoch": 2.1758808786672565, "grad_norm": 3.566112980324764, "learning_rate": 1.5959571076924204e-06, "loss": 1.0447, "step": 73795 }, { "epoch": 2.176028306059266, "grad_norm": 3.294308651843343, "learning_rate": 1.5954305743468286e-06, "loss": 1.0314, "step": 73800 }, { "epoch": 2.1761757334512755, "grad_norm": 3.519487732159812, "learning_rate": 1.594904104401901e-06, "loss": 1.026, "step": 73805 }, { "epoch": 2.1763231608432845, "grad_norm": 3.1084254590906424, "learning_rate": 1.594377697873131e-06, "loss": 1.0084, "step": 73810 }, { "epoch": 2.176470588235294, "grad_norm": 3.2916945699799123, "learning_rate": 1.5938513547760084e-06, "loss": 1.0088, "step": 73815 }, { "epoch": 2.1766180156273034, "grad_norm": 3.4058352519688424, "learning_rate": 1.593325075126022e-06, "loss": 1.0593, "step": 73820 }, { "epoch": 2.176765443019313, "grad_norm": 3.333706597350659, "learning_rate": 1.592798858938658e-06, "loss": 1.0623, "step": 73825 }, { "epoch": 2.1769128704113223, "grad_norm": 3.5509276406215564, "learning_rate": 1.592272706229402e-06, "loss": 1.0823, "step": 73830 }, { "epoch": 2.1770602978033318, "grad_norm": 3.3999453625021414, "learning_rate": 1.5917466170137331e-06, "loss": 1.0683, "step": 73835 }, { "epoch": 2.1772077251953412, "grad_norm": 3.4301879100605803, "learning_rate": 1.591220591307137e-06, "loss": 1.0461, "step": 73840 }, { "epoch": 2.1773551525873507, "grad_norm": 3.3835677009917107, "learning_rate": 1.5906946291250887e-06, "loss": 1.0354, "step": 73845 }, { "epoch": 2.17750257997936, "grad_norm": 3.6086771366058006, "learning_rate": 1.5901687304830666e-06, "loss": 1.0808, "step": 73850 }, { "epoch": 2.1776500073713696, "grad_norm": 3.5155454638204415, "learning_rate": 1.5896428953965457e-06, "loss": 1.07, "step": 73855 }, { "epoch": 2.177797434763379, "grad_norm": 3.411929775286103, "learning_rate": 1.5891171238809995e-06, "loss": 1.0381, "step": 73860 }, { "epoch": 2.1779448621553885, "grad_norm": 3.4657549517400255, "learning_rate": 1.5885914159519011e-06, "loss": 1.0816, "step": 73865 }, { "epoch": 2.178092289547398, "grad_norm": 3.5906281315676463, "learning_rate": 1.5880657716247153e-06, "loss": 1.0876, "step": 73870 }, { "epoch": 2.1782397169394074, "grad_norm": 3.5403023658169457, "learning_rate": 1.5875401909149158e-06, "loss": 1.0789, "step": 73875 }, { "epoch": 2.178387144331417, "grad_norm": 3.386488097715029, "learning_rate": 1.587014673837964e-06, "loss": 1.0843, "step": 73880 }, { "epoch": 2.1785345717234263, "grad_norm": 3.5480588642064066, "learning_rate": 1.5864892204093251e-06, "loss": 1.0587, "step": 73885 }, { "epoch": 2.178681999115436, "grad_norm": 3.317920695663637, "learning_rate": 1.5859638306444616e-06, "loss": 1.0335, "step": 73890 }, { "epoch": 2.1788294265074453, "grad_norm": 3.4141844547064477, "learning_rate": 1.5854385045588334e-06, "loss": 1.021, "step": 73895 }, { "epoch": 2.1789768538994547, "grad_norm": 3.5942378608859857, "learning_rate": 1.584913242167899e-06, "loss": 1.0877, "step": 73900 }, { "epoch": 2.1791242812914637, "grad_norm": 3.278201353448765, "learning_rate": 1.584388043487116e-06, "loss": 1.0331, "step": 73905 }, { "epoch": 2.179271708683473, "grad_norm": 3.4526714855046223, "learning_rate": 1.583862908531935e-06, "loss": 1.0271, "step": 73910 }, { "epoch": 2.1794191360754827, "grad_norm": 3.474764697439676, "learning_rate": 1.583337837317814e-06, "loss": 1.0541, "step": 73915 }, { "epoch": 2.179566563467492, "grad_norm": 3.4060486336511286, "learning_rate": 1.5828128298601986e-06, "loss": 1.0438, "step": 73920 }, { "epoch": 2.1797139908595016, "grad_norm": 3.4376226738468816, "learning_rate": 1.5822878861745428e-06, "loss": 1.06, "step": 73925 }, { "epoch": 2.179861418251511, "grad_norm": 3.547260800688832, "learning_rate": 1.58176300627629e-06, "loss": 1.0315, "step": 73930 }, { "epoch": 2.1800088456435205, "grad_norm": 3.615857867108406, "learning_rate": 1.5812381901808865e-06, "loss": 1.0812, "step": 73935 }, { "epoch": 2.18015627303553, "grad_norm": 3.510647430821691, "learning_rate": 1.5807134379037773e-06, "loss": 1.079, "step": 73940 }, { "epoch": 2.1803037004275394, "grad_norm": 3.3332721164680006, "learning_rate": 1.5801887494603993e-06, "loss": 1.0575, "step": 73945 }, { "epoch": 2.180451127819549, "grad_norm": 3.4929523813040437, "learning_rate": 1.5796641248661977e-06, "loss": 1.0865, "step": 73950 }, { "epoch": 2.1805985552115583, "grad_norm": 3.394126779145168, "learning_rate": 1.5791395641366065e-06, "loss": 1.0236, "step": 73955 }, { "epoch": 2.180745982603568, "grad_norm": 3.452368629305877, "learning_rate": 1.5786150672870621e-06, "loss": 1.076, "step": 73960 }, { "epoch": 2.1808934099955772, "grad_norm": 3.4056996354310014, "learning_rate": 1.5780906343329995e-06, "loss": 1.0158, "step": 73965 }, { "epoch": 2.1810408373875867, "grad_norm": 3.4859208742521806, "learning_rate": 1.5775662652898494e-06, "loss": 1.063, "step": 73970 }, { "epoch": 2.181188264779596, "grad_norm": 3.564442379875138, "learning_rate": 1.5770419601730428e-06, "loss": 1.0378, "step": 73975 }, { "epoch": 2.1813356921716056, "grad_norm": 3.3465323384073904, "learning_rate": 1.576517718998009e-06, "loss": 1.0302, "step": 73980 }, { "epoch": 2.181483119563615, "grad_norm": 3.4880847628169787, "learning_rate": 1.5759935417801703e-06, "loss": 1.0445, "step": 73985 }, { "epoch": 2.1816305469556245, "grad_norm": 3.4068112567890267, "learning_rate": 1.5754694285349573e-06, "loss": 1.0859, "step": 73990 }, { "epoch": 2.181777974347634, "grad_norm": 3.245437411181978, "learning_rate": 1.574945379277788e-06, "loss": 1.0496, "step": 73995 }, { "epoch": 2.181925401739643, "grad_norm": 3.2916218580964194, "learning_rate": 1.574421394024084e-06, "loss": 1.0314, "step": 74000 }, { "epoch": 2.181925401739643, "eval_loss": 1.086010456085205, "eval_runtime": 4.1513, "eval_samples_per_second": 95.392, "eval_steps_per_second": 3.132, "step": 74000 }, { "epoch": 2.1820728291316525, "grad_norm": 3.434109690986636, "learning_rate": 1.5738974727892651e-06, "loss": 1.0543, "step": 74005 }, { "epoch": 2.182220256523662, "grad_norm": 3.543620597194923, "learning_rate": 1.573373615588748e-06, "loss": 1.079, "step": 74010 }, { "epoch": 2.1823676839156714, "grad_norm": 3.5512941392264317, "learning_rate": 1.572849822437949e-06, "loss": 1.0882, "step": 74015 }, { "epoch": 2.182515111307681, "grad_norm": 3.3931593128848236, "learning_rate": 1.5723260933522773e-06, "loss": 1.0509, "step": 74020 }, { "epoch": 2.1826625386996903, "grad_norm": 3.471115879731576, "learning_rate": 1.5718024283471497e-06, "loss": 1.0837, "step": 74025 }, { "epoch": 2.1828099660916997, "grad_norm": 3.4975652221395497, "learning_rate": 1.5712788274379716e-06, "loss": 1.0848, "step": 74030 }, { "epoch": 2.182957393483709, "grad_norm": 3.4804456464462246, "learning_rate": 1.5707552906401516e-06, "loss": 1.047, "step": 74035 }, { "epoch": 2.1831048208757187, "grad_norm": 3.445084768728081, "learning_rate": 1.5702318179690956e-06, "loss": 1.0494, "step": 74040 }, { "epoch": 2.183252248267728, "grad_norm": 3.501990922728885, "learning_rate": 1.5697084094402076e-06, "loss": 1.0778, "step": 74045 }, { "epoch": 2.1833996756597376, "grad_norm": 3.5447770665340688, "learning_rate": 1.569185065068889e-06, "loss": 1.0898, "step": 74050 }, { "epoch": 2.183547103051747, "grad_norm": 3.2427861324992837, "learning_rate": 1.5686617848705404e-06, "loss": 1.0443, "step": 74055 }, { "epoch": 2.1836945304437565, "grad_norm": 3.3912861692982723, "learning_rate": 1.5681385688605611e-06, "loss": 1.0346, "step": 74060 }, { "epoch": 2.183841957835766, "grad_norm": 3.311703511677776, "learning_rate": 1.567615417054344e-06, "loss": 1.0505, "step": 74065 }, { "epoch": 2.1839893852277754, "grad_norm": 3.431585746861845, "learning_rate": 1.5670923294672856e-06, "loss": 1.086, "step": 74070 }, { "epoch": 2.184136812619785, "grad_norm": 3.6376079320738595, "learning_rate": 1.5665693061147778e-06, "loss": 1.0335, "step": 74075 }, { "epoch": 2.1842842400117943, "grad_norm": 3.2214120129040587, "learning_rate": 1.5660463470122117e-06, "loss": 1.0063, "step": 74080 }, { "epoch": 2.184431667403804, "grad_norm": 3.4675619259418364, "learning_rate": 1.5655234521749752e-06, "loss": 1.0502, "step": 74085 }, { "epoch": 2.1845790947958132, "grad_norm": 3.476443232539473, "learning_rate": 1.5650006216184576e-06, "loss": 1.0452, "step": 74090 }, { "epoch": 2.1847265221878223, "grad_norm": 3.6043876476147374, "learning_rate": 1.5644778553580383e-06, "loss": 1.0773, "step": 74095 }, { "epoch": 2.184873949579832, "grad_norm": 3.4201889818585633, "learning_rate": 1.5639551534091068e-06, "loss": 1.0343, "step": 74100 }, { "epoch": 2.185021376971841, "grad_norm": 3.5010675654199845, "learning_rate": 1.5634325157870395e-06, "loss": 1.0881, "step": 74105 }, { "epoch": 2.1851688043638506, "grad_norm": 3.4389853175483283, "learning_rate": 1.5629099425072175e-06, "loss": 1.0526, "step": 74110 }, { "epoch": 2.18531623175586, "grad_norm": 3.4118788946226877, "learning_rate": 1.5623874335850178e-06, "loss": 1.0894, "step": 74115 }, { "epoch": 2.1854636591478696, "grad_norm": 3.2889086869857462, "learning_rate": 1.5618649890358157e-06, "loss": 1.0101, "step": 74120 }, { "epoch": 2.185611086539879, "grad_norm": 3.618712707730557, "learning_rate": 1.5613426088749863e-06, "loss": 1.0796, "step": 74125 }, { "epoch": 2.1857585139318885, "grad_norm": 3.29806568096386, "learning_rate": 1.560820293117897e-06, "loss": 1.0493, "step": 74130 }, { "epoch": 2.185905941323898, "grad_norm": 3.475326614874694, "learning_rate": 1.560298041779923e-06, "loss": 1.0488, "step": 74135 }, { "epoch": 2.1860533687159074, "grad_norm": 3.393423901492792, "learning_rate": 1.5597758548764283e-06, "loss": 1.0312, "step": 74140 }, { "epoch": 2.186200796107917, "grad_norm": 3.3730848280468226, "learning_rate": 1.5592537324227803e-06, "loss": 1.0278, "step": 74145 }, { "epoch": 2.1863482234999263, "grad_norm": 3.5362857884217958, "learning_rate": 1.5587316744343425e-06, "loss": 1.0788, "step": 74150 }, { "epoch": 2.1864956508919358, "grad_norm": 3.4971031736318356, "learning_rate": 1.5582096809264776e-06, "loss": 1.0628, "step": 74155 }, { "epoch": 2.186643078283945, "grad_norm": 3.6243721595855045, "learning_rate": 1.5576877519145451e-06, "loss": 1.0513, "step": 74160 }, { "epoch": 2.1867905056759547, "grad_norm": 3.5302833235406035, "learning_rate": 1.5571658874139055e-06, "loss": 1.0587, "step": 74165 }, { "epoch": 2.186937933067964, "grad_norm": 3.248715866822559, "learning_rate": 1.5566440874399108e-06, "loss": 1.0181, "step": 74170 }, { "epoch": 2.1870853604599736, "grad_norm": 3.416280408309049, "learning_rate": 1.5561223520079212e-06, "loss": 1.0661, "step": 74175 }, { "epoch": 2.187232787851983, "grad_norm": 3.459735476715877, "learning_rate": 1.555600681133284e-06, "loss": 1.0652, "step": 74180 }, { "epoch": 2.1873802152439925, "grad_norm": 3.5672071323100796, "learning_rate": 1.5550790748313553e-06, "loss": 1.0841, "step": 74185 }, { "epoch": 2.187527642636002, "grad_norm": 3.5177880241620487, "learning_rate": 1.5545575331174794e-06, "loss": 1.0471, "step": 74190 }, { "epoch": 2.1876750700280114, "grad_norm": 3.434695139088946, "learning_rate": 1.5540360560070052e-06, "loss": 1.0397, "step": 74195 }, { "epoch": 2.1878224974200204, "grad_norm": 3.3308591285148705, "learning_rate": 1.5535146435152788e-06, "loss": 1.0603, "step": 74200 }, { "epoch": 2.18796992481203, "grad_norm": 3.3746186206842563, "learning_rate": 1.5529932956576397e-06, "loss": 1.0394, "step": 74205 }, { "epoch": 2.1881173522040394, "grad_norm": 3.518083648667314, "learning_rate": 1.5524720124494339e-06, "loss": 1.0813, "step": 74210 }, { "epoch": 2.188264779596049, "grad_norm": 3.4800993109586074, "learning_rate": 1.5519507939059975e-06, "loss": 1.0643, "step": 74215 }, { "epoch": 2.1884122069880583, "grad_norm": 3.5006741298082567, "learning_rate": 1.5514296400426687e-06, "loss": 1.0486, "step": 74220 }, { "epoch": 2.1885596343800677, "grad_norm": 3.342494809302151, "learning_rate": 1.5509085508747834e-06, "loss": 1.0457, "step": 74225 }, { "epoch": 2.188707061772077, "grad_norm": 3.534201153747663, "learning_rate": 1.5503875264176748e-06, "loss": 1.0762, "step": 74230 }, { "epoch": 2.1888544891640866, "grad_norm": 3.650824452413423, "learning_rate": 1.5498665666866748e-06, "loss": 1.0483, "step": 74235 }, { "epoch": 2.189001916556096, "grad_norm": 3.552097898806751, "learning_rate": 1.549345671697115e-06, "loss": 1.0356, "step": 74240 }, { "epoch": 2.1891493439481056, "grad_norm": 3.44979673318015, "learning_rate": 1.5488248414643194e-06, "loss": 1.0362, "step": 74245 }, { "epoch": 2.189296771340115, "grad_norm": 3.4550745330932764, "learning_rate": 1.5483040760036186e-06, "loss": 1.0754, "step": 74250 }, { "epoch": 2.1894441987321245, "grad_norm": 3.511598775630829, "learning_rate": 1.547783375330333e-06, "loss": 1.0087, "step": 74255 }, { "epoch": 2.189591626124134, "grad_norm": 3.4744727840339973, "learning_rate": 1.5472627394597862e-06, "loss": 1.0548, "step": 74260 }, { "epoch": 2.1897390535161434, "grad_norm": 3.425803489699534, "learning_rate": 1.546742168407299e-06, "loss": 1.0309, "step": 74265 }, { "epoch": 2.189886480908153, "grad_norm": 3.4752608295711536, "learning_rate": 1.5462216621881888e-06, "loss": 1.0749, "step": 74270 }, { "epoch": 2.1900339083001623, "grad_norm": 3.3781378125029486, "learning_rate": 1.5457012208177746e-06, "loss": 1.0748, "step": 74275 }, { "epoch": 2.1901813356921718, "grad_norm": 3.464942773539332, "learning_rate": 1.5451808443113657e-06, "loss": 1.0661, "step": 74280 }, { "epoch": 2.1903287630841812, "grad_norm": 3.5013493335267247, "learning_rate": 1.5446605326842808e-06, "loss": 1.0334, "step": 74285 }, { "epoch": 2.1904761904761907, "grad_norm": 3.3464488372940817, "learning_rate": 1.544140285951827e-06, "loss": 1.022, "step": 74290 }, { "epoch": 2.1906236178681997, "grad_norm": 3.406016172865308, "learning_rate": 1.543620104129314e-06, "loss": 1.0284, "step": 74295 }, { "epoch": 2.190771045260209, "grad_norm": 3.5871375853279663, "learning_rate": 1.5430999872320489e-06, "loss": 1.0824, "step": 74300 }, { "epoch": 2.1909184726522186, "grad_norm": 3.4609646642808856, "learning_rate": 1.5425799352753368e-06, "loss": 1.0719, "step": 74305 }, { "epoch": 2.191065900044228, "grad_norm": 3.4479297376015774, "learning_rate": 1.5420599482744804e-06, "loss": 1.0875, "step": 74310 }, { "epoch": 2.1912133274362375, "grad_norm": 3.656447571089386, "learning_rate": 1.5415400262447812e-06, "loss": 1.0678, "step": 74315 }, { "epoch": 2.191360754828247, "grad_norm": 3.3750117995188886, "learning_rate": 1.5410201692015404e-06, "loss": 1.0314, "step": 74320 }, { "epoch": 2.1915081822202565, "grad_norm": 3.2948192530752918, "learning_rate": 1.5405003771600519e-06, "loss": 1.0625, "step": 74325 }, { "epoch": 2.191655609612266, "grad_norm": 3.678384867664705, "learning_rate": 1.5399806501356125e-06, "loss": 1.0776, "step": 74330 }, { "epoch": 2.1918030370042754, "grad_norm": 3.4688587244321214, "learning_rate": 1.5394609881435164e-06, "loss": 1.0329, "step": 74335 }, { "epoch": 2.191950464396285, "grad_norm": 3.3950977674088514, "learning_rate": 1.5389413911990545e-06, "loss": 1.0503, "step": 74340 }, { "epoch": 2.1920978917882943, "grad_norm": 3.443956854868992, "learning_rate": 1.5384218593175172e-06, "loss": 1.0187, "step": 74345 }, { "epoch": 2.1922453191803037, "grad_norm": 3.3355590815088862, "learning_rate": 1.5379023925141934e-06, "loss": 1.0275, "step": 74350 }, { "epoch": 2.192392746572313, "grad_norm": 3.4252622882492902, "learning_rate": 1.537382990804365e-06, "loss": 1.0616, "step": 74355 }, { "epoch": 2.1925401739643227, "grad_norm": 3.454949614593447, "learning_rate": 1.536863654203321e-06, "loss": 1.0617, "step": 74360 }, { "epoch": 2.192687601356332, "grad_norm": 3.534965452374377, "learning_rate": 1.5363443827263401e-06, "loss": 1.1085, "step": 74365 }, { "epoch": 2.1928350287483416, "grad_norm": 3.449788592542737, "learning_rate": 1.5358251763887037e-06, "loss": 1.0623, "step": 74370 }, { "epoch": 2.192982456140351, "grad_norm": 3.4311758339932403, "learning_rate": 1.5353060352056894e-06, "loss": 1.0746, "step": 74375 }, { "epoch": 2.1931298835323605, "grad_norm": 3.540967293543604, "learning_rate": 1.534786959192574e-06, "loss": 1.111, "step": 74380 }, { "epoch": 2.19327731092437, "grad_norm": 3.0568436494888176, "learning_rate": 1.534267948364633e-06, "loss": 1.0189, "step": 74385 }, { "epoch": 2.193424738316379, "grad_norm": 3.58315032490996, "learning_rate": 1.5337490027371354e-06, "loss": 1.0885, "step": 74390 }, { "epoch": 2.1935721657083884, "grad_norm": 3.371048530987937, "learning_rate": 1.5332301223253564e-06, "loss": 1.0346, "step": 74395 }, { "epoch": 2.193719593100398, "grad_norm": 3.4553787859141405, "learning_rate": 1.5327113071445614e-06, "loss": 1.0099, "step": 74400 }, { "epoch": 2.1938670204924073, "grad_norm": 3.688915210055654, "learning_rate": 1.5321925572100177e-06, "loss": 1.0161, "step": 74405 }, { "epoch": 2.194014447884417, "grad_norm": 3.4580817330339464, "learning_rate": 1.5316738725369907e-06, "loss": 1.0357, "step": 74410 }, { "epoch": 2.1941618752764263, "grad_norm": 3.47699934881131, "learning_rate": 1.531155253140743e-06, "loss": 1.0425, "step": 74415 }, { "epoch": 2.1943093026684357, "grad_norm": 3.353583822346166, "learning_rate": 1.530636699036536e-06, "loss": 1.0415, "step": 74420 }, { "epoch": 2.194456730060445, "grad_norm": 3.3487253892824738, "learning_rate": 1.5301182102396292e-06, "loss": 1.0498, "step": 74425 }, { "epoch": 2.1946041574524546, "grad_norm": 3.3284474056762, "learning_rate": 1.529599786765276e-06, "loss": 1.0304, "step": 74430 }, { "epoch": 2.194751584844464, "grad_norm": 3.3987219788962926, "learning_rate": 1.5290814286287376e-06, "loss": 0.997, "step": 74435 }, { "epoch": 2.1948990122364735, "grad_norm": 3.525802616435116, "learning_rate": 1.5285631358452617e-06, "loss": 1.0783, "step": 74440 }, { "epoch": 2.195046439628483, "grad_norm": 3.542890502841766, "learning_rate": 1.5280449084301046e-06, "loss": 1.062, "step": 74445 }, { "epoch": 2.1951938670204925, "grad_norm": 3.477777786643828, "learning_rate": 1.5275267463985119e-06, "loss": 1.0299, "step": 74450 }, { "epoch": 2.195341294412502, "grad_norm": 3.6329735000261985, "learning_rate": 1.5270086497657325e-06, "loss": 1.051, "step": 74455 }, { "epoch": 2.1954887218045114, "grad_norm": 3.370718434517237, "learning_rate": 1.5264906185470132e-06, "loss": 1.0615, "step": 74460 }, { "epoch": 2.195636149196521, "grad_norm": 3.720322402839096, "learning_rate": 1.5259726527575937e-06, "loss": 1.0932, "step": 74465 }, { "epoch": 2.1957835765885303, "grad_norm": 3.4349582885900105, "learning_rate": 1.5254547524127215e-06, "loss": 1.0332, "step": 74470 }, { "epoch": 2.1959310039805398, "grad_norm": 3.3450536660264816, "learning_rate": 1.524936917527632e-06, "loss": 1.0402, "step": 74475 }, { "epoch": 2.196078431372549, "grad_norm": 3.4737063595716773, "learning_rate": 1.5244191481175641e-06, "loss": 1.1162, "step": 74480 }, { "epoch": 2.1962258587645582, "grad_norm": 3.3599439749750295, "learning_rate": 1.5239014441977545e-06, "loss": 1.0363, "step": 74485 }, { "epoch": 2.1963732861565677, "grad_norm": 3.474521443004461, "learning_rate": 1.5233838057834366e-06, "loss": 1.0572, "step": 74490 }, { "epoch": 2.196520713548577, "grad_norm": 3.404228652516122, "learning_rate": 1.5228662328898426e-06, "loss": 1.0481, "step": 74495 }, { "epoch": 2.1966681409405866, "grad_norm": 3.50655184519618, "learning_rate": 1.5223487255322046e-06, "loss": 1.0527, "step": 74500 }, { "epoch": 2.1966681409405866, "eval_loss": 1.0856034755706787, "eval_runtime": 4.2686, "eval_samples_per_second": 92.771, "eval_steps_per_second": 3.046, "step": 74500 }, { "epoch": 2.196815568332596, "grad_norm": 3.433093109050627, "learning_rate": 1.5218312837257464e-06, "loss": 1.0076, "step": 74505 }, { "epoch": 2.1969629957246055, "grad_norm": 3.458464597832167, "learning_rate": 1.5213139074856992e-06, "loss": 1.0774, "step": 74510 }, { "epoch": 2.197110423116615, "grad_norm": 3.592926737308429, "learning_rate": 1.5207965968272842e-06, "loss": 1.0563, "step": 74515 }, { "epoch": 2.1972578505086244, "grad_norm": 3.2238355952744238, "learning_rate": 1.5202793517657246e-06, "loss": 1.0368, "step": 74520 }, { "epoch": 2.197405277900634, "grad_norm": 3.4889367433545795, "learning_rate": 1.5197621723162409e-06, "loss": 1.0237, "step": 74525 }, { "epoch": 2.1975527052926433, "grad_norm": 3.67939087390687, "learning_rate": 1.5192450584940523e-06, "loss": 1.0719, "step": 74530 }, { "epoch": 2.197700132684653, "grad_norm": 3.4501558498706015, "learning_rate": 1.5187280103143764e-06, "loss": 1.0344, "step": 74535 }, { "epoch": 2.1978475600766623, "grad_norm": 3.5786770162526156, "learning_rate": 1.518211027792424e-06, "loss": 1.0728, "step": 74540 }, { "epoch": 2.1979949874686717, "grad_norm": 3.592421549237896, "learning_rate": 1.517694110943413e-06, "loss": 1.0497, "step": 74545 }, { "epoch": 2.198142414860681, "grad_norm": 3.370741950712221, "learning_rate": 1.517177259782551e-06, "loss": 1.0071, "step": 74550 }, { "epoch": 2.1982898422526906, "grad_norm": 3.5509114645321715, "learning_rate": 1.516660474325047e-06, "loss": 1.0584, "step": 74555 }, { "epoch": 2.1984372696447, "grad_norm": 3.5071848712109954, "learning_rate": 1.5161437545861092e-06, "loss": 1.057, "step": 74560 }, { "epoch": 2.1985846970367096, "grad_norm": 3.2376537110346737, "learning_rate": 1.5156271005809422e-06, "loss": 1.0263, "step": 74565 }, { "epoch": 2.198732124428719, "grad_norm": 3.422443100633531, "learning_rate": 1.5151105123247494e-06, "loss": 1.0468, "step": 74570 }, { "epoch": 2.1988795518207285, "grad_norm": 3.5160438420432674, "learning_rate": 1.5145939898327312e-06, "loss": 1.0441, "step": 74575 }, { "epoch": 2.199026979212738, "grad_norm": 3.382847846747808, "learning_rate": 1.514077533120089e-06, "loss": 1.0618, "step": 74580 }, { "epoch": 2.1991744066047474, "grad_norm": 3.406131447711992, "learning_rate": 1.5135611422020174e-06, "loss": 1.0389, "step": 74585 }, { "epoch": 2.1993218339967564, "grad_norm": 3.378015659787985, "learning_rate": 1.5130448170937128e-06, "loss": 1.0087, "step": 74590 }, { "epoch": 2.199469261388766, "grad_norm": 3.5111424970535525, "learning_rate": 1.5125285578103686e-06, "loss": 1.0283, "step": 74595 }, { "epoch": 2.1996166887807753, "grad_norm": 3.5716042144985902, "learning_rate": 1.5120123643671768e-06, "loss": 1.0452, "step": 74600 }, { "epoch": 2.199764116172785, "grad_norm": 3.5118931393876593, "learning_rate": 1.5114962367793262e-06, "loss": 1.0618, "step": 74605 }, { "epoch": 2.1999115435647942, "grad_norm": 3.4171470806377506, "learning_rate": 1.5109801750620068e-06, "loss": 1.0318, "step": 74610 }, { "epoch": 2.2000589709568037, "grad_norm": 3.500368809445194, "learning_rate": 1.5104641792303997e-06, "loss": 1.0486, "step": 74615 }, { "epoch": 2.200206398348813, "grad_norm": 3.49291543067936, "learning_rate": 1.509948249299694e-06, "loss": 1.057, "step": 74620 }, { "epoch": 2.2003538257408226, "grad_norm": 3.631695256821907, "learning_rate": 1.5094323852850674e-06, "loss": 1.0499, "step": 74625 }, { "epoch": 2.200501253132832, "grad_norm": 3.423648655172395, "learning_rate": 1.5089165872017014e-06, "loss": 1.0719, "step": 74630 }, { "epoch": 2.2006486805248415, "grad_norm": 3.5288657058751007, "learning_rate": 1.5084008550647742e-06, "loss": 1.0752, "step": 74635 }, { "epoch": 2.200796107916851, "grad_norm": 3.2830370469564083, "learning_rate": 1.5078851888894613e-06, "loss": 1.0393, "step": 74640 }, { "epoch": 2.2009435353088604, "grad_norm": 3.5009189012951967, "learning_rate": 1.507369588690938e-06, "loss": 1.0508, "step": 74645 }, { "epoch": 2.20109096270087, "grad_norm": 3.4747244094603897, "learning_rate": 1.506854054484373e-06, "loss": 1.0587, "step": 74650 }, { "epoch": 2.2012383900928794, "grad_norm": 3.439719451258478, "learning_rate": 1.5063385862849415e-06, "loss": 1.0628, "step": 74655 }, { "epoch": 2.201385817484889, "grad_norm": 3.5009813133479506, "learning_rate": 1.5058231841078075e-06, "loss": 1.0313, "step": 74660 }, { "epoch": 2.2015332448768983, "grad_norm": 3.6954210343658396, "learning_rate": 1.5053078479681394e-06, "loss": 1.0783, "step": 74665 }, { "epoch": 2.2016806722689077, "grad_norm": 3.3088765817952885, "learning_rate": 1.5047925778811004e-06, "loss": 1.0278, "step": 74670 }, { "epoch": 2.201828099660917, "grad_norm": 3.4108707407745613, "learning_rate": 1.504277373861854e-06, "loss": 1.0454, "step": 74675 }, { "epoch": 2.2019755270529267, "grad_norm": 3.569564858185984, "learning_rate": 1.50376223592556e-06, "loss": 1.0725, "step": 74680 }, { "epoch": 2.2021229544449357, "grad_norm": 3.534053149862072, "learning_rate": 1.5032471640873787e-06, "loss": 1.0342, "step": 74685 }, { "epoch": 2.202270381836945, "grad_norm": 3.801403783385362, "learning_rate": 1.5027321583624623e-06, "loss": 1.0593, "step": 74690 }, { "epoch": 2.2024178092289546, "grad_norm": 3.5291557952598294, "learning_rate": 1.502217218765971e-06, "loss": 1.0578, "step": 74695 }, { "epoch": 2.202565236620964, "grad_norm": 3.3508638561513036, "learning_rate": 1.501702345313052e-06, "loss": 1.0242, "step": 74700 }, { "epoch": 2.2027126640129735, "grad_norm": 3.3874862525531317, "learning_rate": 1.5011875380188613e-06, "loss": 1.0669, "step": 74705 }, { "epoch": 2.202860091404983, "grad_norm": 3.301349296456852, "learning_rate": 1.500672796898544e-06, "loss": 1.0185, "step": 74710 }, { "epoch": 2.2030075187969924, "grad_norm": 3.2502138287966735, "learning_rate": 1.5001581219672482e-06, "loss": 1.0044, "step": 74715 }, { "epoch": 2.203154946189002, "grad_norm": 3.443126737177326, "learning_rate": 1.4996435132401203e-06, "loss": 1.0244, "step": 74720 }, { "epoch": 2.2033023735810113, "grad_norm": 3.397242574987019, "learning_rate": 1.4991289707322985e-06, "loss": 1.0863, "step": 74725 }, { "epoch": 2.203449800973021, "grad_norm": 3.5806680320294104, "learning_rate": 1.4986144944589299e-06, "loss": 1.0713, "step": 74730 }, { "epoch": 2.2035972283650302, "grad_norm": 3.3055100302411797, "learning_rate": 1.498100084435149e-06, "loss": 1.0318, "step": 74735 }, { "epoch": 2.2037446557570397, "grad_norm": 3.2787159648447166, "learning_rate": 1.497585740676095e-06, "loss": 0.992, "step": 74740 }, { "epoch": 2.203892083149049, "grad_norm": 3.396811279866984, "learning_rate": 1.4970714631969019e-06, "loss": 1.046, "step": 74745 }, { "epoch": 2.2040395105410586, "grad_norm": 3.482844151471688, "learning_rate": 1.4965572520127037e-06, "loss": 1.0704, "step": 74750 }, { "epoch": 2.204186937933068, "grad_norm": 3.4083420668179665, "learning_rate": 1.4960431071386321e-06, "loss": 0.9928, "step": 74755 }, { "epoch": 2.2043343653250775, "grad_norm": 3.4590997167371467, "learning_rate": 1.4955290285898167e-06, "loss": 1.0655, "step": 74760 }, { "epoch": 2.204481792717087, "grad_norm": 3.5654968876078414, "learning_rate": 1.4950150163813812e-06, "loss": 1.0959, "step": 74765 }, { "epoch": 2.2046292201090965, "grad_norm": 3.4655283134631816, "learning_rate": 1.4945010705284566e-06, "loss": 1.0771, "step": 74770 }, { "epoch": 2.204776647501106, "grad_norm": 3.3976055238832488, "learning_rate": 1.4939871910461623e-06, "loss": 1.0299, "step": 74775 }, { "epoch": 2.204924074893115, "grad_norm": 3.462368108970694, "learning_rate": 1.4934733779496208e-06, "loss": 1.0411, "step": 74780 }, { "epoch": 2.2050715022851244, "grad_norm": 3.329618206592433, "learning_rate": 1.492959631253952e-06, "loss": 1.0705, "step": 74785 }, { "epoch": 2.205218929677134, "grad_norm": 3.5223123791037545, "learning_rate": 1.4924459509742733e-06, "loss": 1.0851, "step": 74790 }, { "epoch": 2.2053663570691433, "grad_norm": 3.4266307160724563, "learning_rate": 1.4919323371257018e-06, "loss": 1.0572, "step": 74795 }, { "epoch": 2.2055137844611528, "grad_norm": 3.45198501170005, "learning_rate": 1.4914187897233468e-06, "loss": 1.0497, "step": 74800 }, { "epoch": 2.205661211853162, "grad_norm": 3.4306987422643505, "learning_rate": 1.4909053087823255e-06, "loss": 1.0329, "step": 74805 }, { "epoch": 2.2058086392451717, "grad_norm": 3.393070709270681, "learning_rate": 1.4903918943177443e-06, "loss": 1.0759, "step": 74810 }, { "epoch": 2.205956066637181, "grad_norm": 3.4375026444432897, "learning_rate": 1.4898785463447113e-06, "loss": 1.0336, "step": 74815 }, { "epoch": 2.2061034940291906, "grad_norm": 3.4295792210764455, "learning_rate": 1.489365264878333e-06, "loss": 1.0635, "step": 74820 }, { "epoch": 2.2062509214212, "grad_norm": 3.513625479699057, "learning_rate": 1.488852049933713e-06, "loss": 1.0566, "step": 74825 }, { "epoch": 2.2063983488132095, "grad_norm": 3.6147731111018437, "learning_rate": 1.4883389015259539e-06, "loss": 1.0576, "step": 74830 }, { "epoch": 2.206545776205219, "grad_norm": 3.2945119674683605, "learning_rate": 1.487825819670155e-06, "loss": 1.0597, "step": 74835 }, { "epoch": 2.2066932035972284, "grad_norm": 3.2883432166976077, "learning_rate": 1.4873128043814153e-06, "loss": 1.0351, "step": 74840 }, { "epoch": 2.206840630989238, "grad_norm": 3.6290412358883417, "learning_rate": 1.4867998556748294e-06, "loss": 1.0938, "step": 74845 }, { "epoch": 2.2069880583812473, "grad_norm": 3.4080171168247553, "learning_rate": 1.4862869735654918e-06, "loss": 1.0352, "step": 74850 }, { "epoch": 2.207135485773257, "grad_norm": 3.5522139212721213, "learning_rate": 1.4857741580684948e-06, "loss": 1.089, "step": 74855 }, { "epoch": 2.2072829131652663, "grad_norm": 3.274247685294313, "learning_rate": 1.4852614091989285e-06, "loss": 1.0728, "step": 74860 }, { "epoch": 2.2074303405572757, "grad_norm": 3.482400039388615, "learning_rate": 1.4847487269718815e-06, "loss": 1.0524, "step": 74865 }, { "epoch": 2.207577767949285, "grad_norm": 3.5138045625863863, "learning_rate": 1.4842361114024412e-06, "loss": 1.0841, "step": 74870 }, { "epoch": 2.207725195341294, "grad_norm": 3.4526018931674964, "learning_rate": 1.4837235625056878e-06, "loss": 1.0506, "step": 74875 }, { "epoch": 2.2078726227333036, "grad_norm": 3.401668449212604, "learning_rate": 1.4832110802967093e-06, "loss": 1.047, "step": 74880 }, { "epoch": 2.208020050125313, "grad_norm": 3.453400192425917, "learning_rate": 1.4826986647905816e-06, "loss": 1.0378, "step": 74885 }, { "epoch": 2.2081674775173226, "grad_norm": 3.4795412146619564, "learning_rate": 1.4821863160023847e-06, "loss": 1.0656, "step": 74890 }, { "epoch": 2.208314904909332, "grad_norm": 3.455338624985903, "learning_rate": 1.481674033947195e-06, "loss": 1.0474, "step": 74895 }, { "epoch": 2.2084623323013415, "grad_norm": 3.4910651166803714, "learning_rate": 1.4811618186400872e-06, "loss": 1.0465, "step": 74900 }, { "epoch": 2.208609759693351, "grad_norm": 3.530689418157138, "learning_rate": 1.4806496700961349e-06, "loss": 1.019, "step": 74905 }, { "epoch": 2.2087571870853604, "grad_norm": 3.448351093109292, "learning_rate": 1.4801375883304045e-06, "loss": 1.0663, "step": 74910 }, { "epoch": 2.20890461447737, "grad_norm": 3.5201606350558254, "learning_rate": 1.4796255733579706e-06, "loss": 1.0553, "step": 74915 }, { "epoch": 2.2090520418693793, "grad_norm": 3.4786471839562476, "learning_rate": 1.4791136251938949e-06, "loss": 1.0532, "step": 74920 }, { "epoch": 2.2091994692613888, "grad_norm": 3.4685781228344723, "learning_rate": 1.478601743853244e-06, "loss": 1.0132, "step": 74925 }, { "epoch": 2.2093468966533982, "grad_norm": 3.574056847115807, "learning_rate": 1.4780899293510804e-06, "loss": 1.018, "step": 74930 }, { "epoch": 2.2094943240454077, "grad_norm": 3.4669197610873796, "learning_rate": 1.477578181702465e-06, "loss": 1.0888, "step": 74935 }, { "epoch": 2.209641751437417, "grad_norm": 3.2876851417142317, "learning_rate": 1.4770665009224564e-06, "loss": 1.0303, "step": 74940 }, { "epoch": 2.2097891788294266, "grad_norm": 3.382341463994306, "learning_rate": 1.4765548870261127e-06, "loss": 1.0333, "step": 74945 }, { "epoch": 2.209936606221436, "grad_norm": 3.5162749343352555, "learning_rate": 1.4760433400284847e-06, "loss": 1.0415, "step": 74950 }, { "epoch": 2.2100840336134455, "grad_norm": 3.6814999797972066, "learning_rate": 1.4755318599446308e-06, "loss": 1.081, "step": 74955 }, { "epoch": 2.210231461005455, "grad_norm": 3.441002552991524, "learning_rate": 1.4750204467895964e-06, "loss": 1.0208, "step": 74960 }, { "epoch": 2.2103788883974644, "grad_norm": 3.52622949912913, "learning_rate": 1.4745091005784358e-06, "loss": 1.0655, "step": 74965 }, { "epoch": 2.2105263157894735, "grad_norm": 3.425870131302059, "learning_rate": 1.473997821326192e-06, "loss": 1.0001, "step": 74970 }, { "epoch": 2.2106737431814834, "grad_norm": 3.3612359591646475, "learning_rate": 1.4734866090479113e-06, "loss": 1.0432, "step": 74975 }, { "epoch": 2.2108211705734924, "grad_norm": 3.5773090250319224, "learning_rate": 1.4729754637586378e-06, "loss": 1.0606, "step": 74980 }, { "epoch": 2.210968597965502, "grad_norm": 3.3971825896541588, "learning_rate": 1.4724643854734086e-06, "loss": 1.0465, "step": 74985 }, { "epoch": 2.2111160253575113, "grad_norm": 3.5427757076839015, "learning_rate": 1.4719533742072686e-06, "loss": 1.077, "step": 74990 }, { "epoch": 2.2112634527495207, "grad_norm": 3.5192864996840725, "learning_rate": 1.47144242997525e-06, "loss": 1.0305, "step": 74995 }, { "epoch": 2.21141088014153, "grad_norm": 3.385324873145096, "learning_rate": 1.4709315527923899e-06, "loss": 1.0589, "step": 75000 }, { "epoch": 2.21141088014153, "eval_loss": 1.0860910415649414, "eval_runtime": 4.1805, "eval_samples_per_second": 94.727, "eval_steps_per_second": 3.11, "step": 75000 }, { "epoch": 2.2115583075335397, "grad_norm": 3.473135429410897, "learning_rate": 1.4704207426737213e-06, "loss": 1.0612, "step": 75005 }, { "epoch": 2.211705734925549, "grad_norm": 3.555472918536221, "learning_rate": 1.4699099996342755e-06, "loss": 1.0299, "step": 75010 }, { "epoch": 2.2118531623175586, "grad_norm": 3.522836927931772, "learning_rate": 1.4693993236890815e-06, "loss": 1.0597, "step": 75015 }, { "epoch": 2.212000589709568, "grad_norm": 3.5077839196910845, "learning_rate": 1.4688887148531675e-06, "loss": 1.0636, "step": 75020 }, { "epoch": 2.2121480171015775, "grad_norm": 3.477520552847218, "learning_rate": 1.4683781731415556e-06, "loss": 1.0589, "step": 75025 }, { "epoch": 2.212295444493587, "grad_norm": 3.4179190950995846, "learning_rate": 1.467867698569274e-06, "loss": 1.0138, "step": 75030 }, { "epoch": 2.2124428718855964, "grad_norm": 3.579850693286197, "learning_rate": 1.4673572911513395e-06, "loss": 1.0818, "step": 75035 }, { "epoch": 2.212590299277606, "grad_norm": 3.2253613510333796, "learning_rate": 1.466846950902774e-06, "loss": 1.0093, "step": 75040 }, { "epoch": 2.2127377266696153, "grad_norm": 3.5154292351234506, "learning_rate": 1.4663366778385933e-06, "loss": 1.0371, "step": 75045 }, { "epoch": 2.212885154061625, "grad_norm": 3.4713204801977104, "learning_rate": 1.4658264719738139e-06, "loss": 1.0463, "step": 75050 }, { "epoch": 2.2130325814536342, "grad_norm": 3.5058661910433626, "learning_rate": 1.4653163333234502e-06, "loss": 1.0481, "step": 75055 }, { "epoch": 2.2131800088456437, "grad_norm": 3.6614974493065744, "learning_rate": 1.4648062619025093e-06, "loss": 1.0239, "step": 75060 }, { "epoch": 2.213327436237653, "grad_norm": 3.3510174313034646, "learning_rate": 1.464296257726006e-06, "loss": 1.0534, "step": 75065 }, { "epoch": 2.2134748636296626, "grad_norm": 3.466663691573307, "learning_rate": 1.4637863208089446e-06, "loss": 1.0556, "step": 75070 }, { "epoch": 2.2136222910216716, "grad_norm": 3.331626001447071, "learning_rate": 1.463276451166331e-06, "loss": 1.0573, "step": 75075 }, { "epoch": 2.213769718413681, "grad_norm": 3.342716759901896, "learning_rate": 1.462766648813169e-06, "loss": 1.0501, "step": 75080 }, { "epoch": 2.2139171458056905, "grad_norm": 3.4730641171353294, "learning_rate": 1.4622569137644602e-06, "loss": 1.0463, "step": 75085 }, { "epoch": 2.2140645731977, "grad_norm": 3.408018424362602, "learning_rate": 1.461747246035204e-06, "loss": 1.0623, "step": 75090 }, { "epoch": 2.2142120005897095, "grad_norm": 3.4438198380612515, "learning_rate": 1.4612376456403977e-06, "loss": 1.0704, "step": 75095 }, { "epoch": 2.214359427981719, "grad_norm": 3.329047313905757, "learning_rate": 1.460728112595039e-06, "loss": 1.0273, "step": 75100 }, { "epoch": 2.2145068553737284, "grad_norm": 3.435096998364093, "learning_rate": 1.4602186469141178e-06, "loss": 1.0025, "step": 75105 }, { "epoch": 2.214654282765738, "grad_norm": 3.371633655141356, "learning_rate": 1.4597092486126279e-06, "loss": 1.049, "step": 75110 }, { "epoch": 2.2148017101577473, "grad_norm": 3.476638378083162, "learning_rate": 1.4591999177055585e-06, "loss": 1.062, "step": 75115 }, { "epoch": 2.2149491375497568, "grad_norm": 3.383444797234358, "learning_rate": 1.4586906542078976e-06, "loss": 1.0295, "step": 75120 }, { "epoch": 2.215096564941766, "grad_norm": 3.577129753101009, "learning_rate": 1.4581814581346301e-06, "loss": 1.1022, "step": 75125 }, { "epoch": 2.2152439923337757, "grad_norm": 3.601976124686376, "learning_rate": 1.457672329500742e-06, "loss": 1.0986, "step": 75130 }, { "epoch": 2.215391419725785, "grad_norm": 3.4254137506552906, "learning_rate": 1.45716326832121e-06, "loss": 1.0009, "step": 75135 }, { "epoch": 2.2155388471177946, "grad_norm": 3.287851062770489, "learning_rate": 1.4566542746110202e-06, "loss": 1.019, "step": 75140 }, { "epoch": 2.215686274509804, "grad_norm": 3.71616924433255, "learning_rate": 1.4561453483851455e-06, "loss": 1.0404, "step": 75145 }, { "epoch": 2.2158337019018135, "grad_norm": 3.3079035989564214, "learning_rate": 1.4556364896585632e-06, "loss": 1.0278, "step": 75150 }, { "epoch": 2.215981129293823, "grad_norm": 3.4947035435732747, "learning_rate": 1.455127698446247e-06, "loss": 1.0551, "step": 75155 }, { "epoch": 2.2161285566858324, "grad_norm": 3.351314643627622, "learning_rate": 1.454618974763169e-06, "loss": 1.0441, "step": 75160 }, { "epoch": 2.216275984077842, "grad_norm": 3.4071155907437536, "learning_rate": 1.4541103186243e-06, "loss": 1.0395, "step": 75165 }, { "epoch": 2.216423411469851, "grad_norm": 3.4695158417572953, "learning_rate": 1.4536017300446036e-06, "loss": 1.0482, "step": 75170 }, { "epoch": 2.2165708388618603, "grad_norm": 3.749120599254675, "learning_rate": 1.4530932090390511e-06, "loss": 1.0712, "step": 75175 }, { "epoch": 2.21671826625387, "grad_norm": 3.514694726468014, "learning_rate": 1.4525847556226026e-06, "loss": 1.021, "step": 75180 }, { "epoch": 2.2168656936458793, "grad_norm": 3.377629132392113, "learning_rate": 1.452076369810221e-06, "loss": 1.0004, "step": 75185 }, { "epoch": 2.2170131210378887, "grad_norm": 3.534642176584445, "learning_rate": 1.4515680516168662e-06, "loss": 1.0722, "step": 75190 }, { "epoch": 2.217160548429898, "grad_norm": 3.4453554545368195, "learning_rate": 1.4510598010574965e-06, "loss": 1.0675, "step": 75195 }, { "epoch": 2.2173079758219076, "grad_norm": 3.51371544990235, "learning_rate": 1.4505516181470665e-06, "loss": 1.0516, "step": 75200 }, { "epoch": 2.217455403213917, "grad_norm": 3.5498267999231854, "learning_rate": 1.4500435029005325e-06, "loss": 1.0415, "step": 75205 }, { "epoch": 2.2176028306059266, "grad_norm": 3.308235280911267, "learning_rate": 1.449535455332842e-06, "loss": 1.0627, "step": 75210 }, { "epoch": 2.217750257997936, "grad_norm": 3.309050602975139, "learning_rate": 1.4490274754589505e-06, "loss": 1.0321, "step": 75215 }, { "epoch": 2.2178976853899455, "grad_norm": 3.4896829044043853, "learning_rate": 1.4485195632937998e-06, "loss": 1.0028, "step": 75220 }, { "epoch": 2.218045112781955, "grad_norm": 3.2766462742046323, "learning_rate": 1.4480117188523418e-06, "loss": 1.0223, "step": 75225 }, { "epoch": 2.2181925401739644, "grad_norm": 3.9744504119929287, "learning_rate": 1.4475039421495166e-06, "loss": 1.0681, "step": 75230 }, { "epoch": 2.218339967565974, "grad_norm": 3.5721957184783566, "learning_rate": 1.4469962332002668e-06, "loss": 1.0401, "step": 75235 }, { "epoch": 2.2184873949579833, "grad_norm": 3.4917955259381794, "learning_rate": 1.4464885920195336e-06, "loss": 1.0705, "step": 75240 }, { "epoch": 2.2186348223499928, "grad_norm": 3.4531492519620004, "learning_rate": 1.4459810186222515e-06, "loss": 1.0617, "step": 75245 }, { "epoch": 2.218782249742002, "grad_norm": 3.452614709135261, "learning_rate": 1.4454735130233617e-06, "loss": 1.0516, "step": 75250 }, { "epoch": 2.2189296771340117, "grad_norm": 3.395333612217428, "learning_rate": 1.444966075237794e-06, "loss": 1.0356, "step": 75255 }, { "epoch": 2.219077104526021, "grad_norm": 3.4064946669048797, "learning_rate": 1.4444587052804811e-06, "loss": 1.0224, "step": 75260 }, { "epoch": 2.21922453191803, "grad_norm": 3.442978971595683, "learning_rate": 1.4439514031663542e-06, "loss": 1.0283, "step": 75265 }, { "epoch": 2.2193719593100396, "grad_norm": 3.3789374116270166, "learning_rate": 1.44344416891034e-06, "loss": 1.009, "step": 75270 }, { "epoch": 2.219519386702049, "grad_norm": 3.4188959756712305, "learning_rate": 1.4429370025273646e-06, "loss": 1.0778, "step": 75275 }, { "epoch": 2.2196668140940585, "grad_norm": 3.326046396083424, "learning_rate": 1.4424299040323541e-06, "loss": 1.0149, "step": 75280 }, { "epoch": 2.219814241486068, "grad_norm": 3.39121395286649, "learning_rate": 1.441922873440226e-06, "loss": 1.0442, "step": 75285 }, { "epoch": 2.2199616688780774, "grad_norm": 3.640413495582035, "learning_rate": 1.4414159107659054e-06, "loss": 1.0531, "step": 75290 }, { "epoch": 2.220109096270087, "grad_norm": 3.3793329975153514, "learning_rate": 1.4409090160243064e-06, "loss": 1.012, "step": 75295 }, { "epoch": 2.2202565236620964, "grad_norm": 3.5759915808103795, "learning_rate": 1.4404021892303464e-06, "loss": 1.0902, "step": 75300 }, { "epoch": 2.220403951054106, "grad_norm": 3.357152103509289, "learning_rate": 1.4398954303989393e-06, "loss": 1.0339, "step": 75305 }, { "epoch": 2.2205513784461153, "grad_norm": 3.3825433752326495, "learning_rate": 1.439388739544997e-06, "loss": 1.0919, "step": 75310 }, { "epoch": 2.2206988058381247, "grad_norm": 3.487529873453031, "learning_rate": 1.4388821166834307e-06, "loss": 1.0923, "step": 75315 }, { "epoch": 2.220846233230134, "grad_norm": 3.38684395040885, "learning_rate": 1.4383755618291447e-06, "loss": 1.0518, "step": 75320 }, { "epoch": 2.2209936606221437, "grad_norm": 3.5955275135160316, "learning_rate": 1.43786907499705e-06, "loss": 1.0798, "step": 75325 }, { "epoch": 2.221141088014153, "grad_norm": 3.348094186431945, "learning_rate": 1.4373626562020466e-06, "loss": 1.0614, "step": 75330 }, { "epoch": 2.2212885154061626, "grad_norm": 3.447290105065808, "learning_rate": 1.4368563054590376e-06, "loss": 1.0738, "step": 75335 }, { "epoch": 2.221435942798172, "grad_norm": 3.502275576715258, "learning_rate": 1.4363500227829235e-06, "loss": 1.0999, "step": 75340 }, { "epoch": 2.2215833701901815, "grad_norm": 3.4376175230265074, "learning_rate": 1.4358438081886014e-06, "loss": 1.0662, "step": 75345 }, { "epoch": 2.221730797582191, "grad_norm": 3.4854157015794778, "learning_rate": 1.435337661690968e-06, "loss": 1.0797, "step": 75350 }, { "epoch": 2.2218782249742004, "grad_norm": 3.7478710343957444, "learning_rate": 1.434831583304917e-06, "loss": 1.0744, "step": 75355 }, { "epoch": 2.2220256523662094, "grad_norm": 3.3688641510546615, "learning_rate": 1.4343255730453416e-06, "loss": 1.0686, "step": 75360 }, { "epoch": 2.222173079758219, "grad_norm": 3.4194399448544335, "learning_rate": 1.4338196309271292e-06, "loss": 1.0583, "step": 75365 }, { "epoch": 2.2223205071502283, "grad_norm": 3.3335022186072107, "learning_rate": 1.4333137569651687e-06, "loss": 1.0443, "step": 75370 }, { "epoch": 2.222467934542238, "grad_norm": 3.40666421609147, "learning_rate": 1.4328079511743463e-06, "loss": 1.0047, "step": 75375 }, { "epoch": 2.2226153619342472, "grad_norm": 3.3299718756403993, "learning_rate": 1.432302213569546e-06, "loss": 1.0559, "step": 75380 }, { "epoch": 2.2227627893262567, "grad_norm": 3.5482076139752645, "learning_rate": 1.4317965441656497e-06, "loss": 1.0634, "step": 75385 }, { "epoch": 2.222910216718266, "grad_norm": 3.5786009460458956, "learning_rate": 1.4312909429775384e-06, "loss": 1.0443, "step": 75390 }, { "epoch": 2.2230576441102756, "grad_norm": 3.4558731882523803, "learning_rate": 1.4307854100200863e-06, "loss": 1.0378, "step": 75395 }, { "epoch": 2.223205071502285, "grad_norm": 3.7358649381591746, "learning_rate": 1.4302799453081742e-06, "loss": 1.0837, "step": 75400 }, { "epoch": 2.2233524988942945, "grad_norm": 3.4147498689684874, "learning_rate": 1.4297745488566725e-06, "loss": 1.0347, "step": 75405 }, { "epoch": 2.223499926286304, "grad_norm": 3.3709448562690882, "learning_rate": 1.429269220680454e-06, "loss": 1.0297, "step": 75410 }, { "epoch": 2.2236473536783135, "grad_norm": 3.48102605711509, "learning_rate": 1.4287639607943885e-06, "loss": 1.0314, "step": 75415 }, { "epoch": 2.223794781070323, "grad_norm": 3.4581479281267744, "learning_rate": 1.4282587692133443e-06, "loss": 1.0589, "step": 75420 }, { "epoch": 2.2239422084623324, "grad_norm": 3.5037364254100125, "learning_rate": 1.4277536459521883e-06, "loss": 1.0408, "step": 75425 }, { "epoch": 2.224089635854342, "grad_norm": 3.486490011792888, "learning_rate": 1.42724859102578e-06, "loss": 0.9968, "step": 75430 }, { "epoch": 2.2242370632463513, "grad_norm": 3.5379766107894772, "learning_rate": 1.4267436044489874e-06, "loss": 1.0628, "step": 75435 }, { "epoch": 2.2243844906383607, "grad_norm": 3.5749406190543187, "learning_rate": 1.4262386862366654e-06, "loss": 1.032, "step": 75440 }, { "epoch": 2.22453191803037, "grad_norm": 3.365423295052137, "learning_rate": 1.4257338364036735e-06, "loss": 1.0563, "step": 75445 }, { "epoch": 2.2246793454223797, "grad_norm": 3.5252149068351195, "learning_rate": 1.4252290549648676e-06, "loss": 1.0614, "step": 75450 }, { "epoch": 2.224826772814389, "grad_norm": 3.191708905286535, "learning_rate": 1.4247243419351015e-06, "loss": 1.0014, "step": 75455 }, { "epoch": 2.2249742002063986, "grad_norm": 3.3534707046283603, "learning_rate": 1.4242196973292267e-06, "loss": 0.9994, "step": 75460 }, { "epoch": 2.2251216275984076, "grad_norm": 3.3732253712636644, "learning_rate": 1.423715121162094e-06, "loss": 1.0127, "step": 75465 }, { "epoch": 2.225269054990417, "grad_norm": 3.4342557967461684, "learning_rate": 1.4232106134485483e-06, "loss": 1.0549, "step": 75470 }, { "epoch": 2.2254164823824265, "grad_norm": 3.4363939392339016, "learning_rate": 1.4227061742034394e-06, "loss": 0.9638, "step": 75475 }, { "epoch": 2.225563909774436, "grad_norm": 3.5662381378710757, "learning_rate": 1.4222018034416061e-06, "loss": 1.0347, "step": 75480 }, { "epoch": 2.2257113371664454, "grad_norm": 3.4413504766585015, "learning_rate": 1.4216975011778954e-06, "loss": 1.0163, "step": 75485 }, { "epoch": 2.225858764558455, "grad_norm": 3.61513370282056, "learning_rate": 1.4211932674271433e-06, "loss": 1.016, "step": 75490 }, { "epoch": 2.2260061919504643, "grad_norm": 3.6007756667648594, "learning_rate": 1.4206891022041886e-06, "loss": 1.0807, "step": 75495 }, { "epoch": 2.226153619342474, "grad_norm": 3.3751603629945386, "learning_rate": 1.4201850055238682e-06, "loss": 1.1094, "step": 75500 }, { "epoch": 2.226153619342474, "eval_loss": 1.0855828523635864, "eval_runtime": 4.2736, "eval_samples_per_second": 92.662, "eval_steps_per_second": 3.042, "step": 75500 }, { "epoch": 2.2263010467344833, "grad_norm": 3.378970721943972, "learning_rate": 1.4196809774010113e-06, "loss": 1.0423, "step": 75505 }, { "epoch": 2.2264484741264927, "grad_norm": 3.3863821260003557, "learning_rate": 1.4191770178504557e-06, "loss": 1.0361, "step": 75510 }, { "epoch": 2.226595901518502, "grad_norm": 3.43760517601061, "learning_rate": 1.4186731268870264e-06, "loss": 1.0679, "step": 75515 }, { "epoch": 2.2267433289105116, "grad_norm": 3.294843274928293, "learning_rate": 1.4181693045255525e-06, "loss": 1.0178, "step": 75520 }, { "epoch": 2.226890756302521, "grad_norm": 3.4668856434954876, "learning_rate": 1.4176655507808595e-06, "loss": 1.0546, "step": 75525 }, { "epoch": 2.2270381836945305, "grad_norm": 3.4756267175941122, "learning_rate": 1.4171618656677707e-06, "loss": 1.0892, "step": 75530 }, { "epoch": 2.22718561108654, "grad_norm": 3.5079290539885837, "learning_rate": 1.4166582492011078e-06, "loss": 1.0552, "step": 75535 }, { "epoch": 2.2273330384785495, "grad_norm": 3.511610937880123, "learning_rate": 1.4161547013956919e-06, "loss": 1.1047, "step": 75540 }, { "epoch": 2.227480465870559, "grad_norm": 3.5790597243051545, "learning_rate": 1.415651222266336e-06, "loss": 1.0496, "step": 75545 }, { "epoch": 2.2276278932625684, "grad_norm": 3.5563035054034375, "learning_rate": 1.4151478118278611e-06, "loss": 1.0592, "step": 75550 }, { "epoch": 2.227775320654578, "grad_norm": 3.415793738474987, "learning_rate": 1.414644470095077e-06, "loss": 1.1026, "step": 75555 }, { "epoch": 2.227922748046587, "grad_norm": 3.4631201384820014, "learning_rate": 1.4141411970827953e-06, "loss": 1.0651, "step": 75560 }, { "epoch": 2.2280701754385963, "grad_norm": 3.48942724762782, "learning_rate": 1.4136379928058268e-06, "loss": 1.0686, "step": 75565 }, { "epoch": 2.2282176028306058, "grad_norm": 3.49843281581546, "learning_rate": 1.4131348572789781e-06, "loss": 1.0695, "step": 75570 }, { "epoch": 2.2283650302226152, "grad_norm": 3.404579423370355, "learning_rate": 1.412631790517056e-06, "loss": 1.0455, "step": 75575 }, { "epoch": 2.2285124576146247, "grad_norm": 3.3241084855712053, "learning_rate": 1.4121287925348596e-06, "loss": 1.0659, "step": 75580 }, { "epoch": 2.228659885006634, "grad_norm": 3.5197439211625055, "learning_rate": 1.4116258633471957e-06, "loss": 1.0191, "step": 75585 }, { "epoch": 2.2288073123986436, "grad_norm": 3.505580873563024, "learning_rate": 1.41112300296886e-06, "loss": 1.0392, "step": 75590 }, { "epoch": 2.228954739790653, "grad_norm": 3.4238479151781194, "learning_rate": 1.41062021141465e-06, "loss": 1.0391, "step": 75595 }, { "epoch": 2.2291021671826625, "grad_norm": 3.5062801201561555, "learning_rate": 1.4101174886993623e-06, "loss": 1.0717, "step": 75600 }, { "epoch": 2.229249594574672, "grad_norm": 3.554296548182553, "learning_rate": 1.409614834837789e-06, "loss": 1.0675, "step": 75605 }, { "epoch": 2.2293970219666814, "grad_norm": 3.4075366750990836, "learning_rate": 1.4091122498447214e-06, "loss": 1.0472, "step": 75610 }, { "epoch": 2.229544449358691, "grad_norm": 3.529155748080037, "learning_rate": 1.4086097337349493e-06, "loss": 1.0542, "step": 75615 }, { "epoch": 2.2296918767507004, "grad_norm": 3.442371579697193, "learning_rate": 1.4081072865232602e-06, "loss": 1.0548, "step": 75620 }, { "epoch": 2.22983930414271, "grad_norm": 3.3214925480720687, "learning_rate": 1.4076049082244374e-06, "loss": 1.0328, "step": 75625 }, { "epoch": 2.2299867315347193, "grad_norm": 3.380975663846386, "learning_rate": 1.407102598853265e-06, "loss": 1.0833, "step": 75630 }, { "epoch": 2.2301341589267287, "grad_norm": 3.532313385860364, "learning_rate": 1.4066003584245236e-06, "loss": 1.046, "step": 75635 }, { "epoch": 2.230281586318738, "grad_norm": 3.5208031836694453, "learning_rate": 1.4060981869529927e-06, "loss": 1.0453, "step": 75640 }, { "epoch": 2.2304290137107476, "grad_norm": 3.3759817717834295, "learning_rate": 1.4055960844534493e-06, "loss": 1.0442, "step": 75645 }, { "epoch": 2.230576441102757, "grad_norm": 3.391567397227665, "learning_rate": 1.405094050940669e-06, "loss": 1.0204, "step": 75650 }, { "epoch": 2.230723868494766, "grad_norm": 3.4320488437974626, "learning_rate": 1.4045920864294217e-06, "loss": 1.0761, "step": 75655 }, { "epoch": 2.2308712958867756, "grad_norm": 3.4079676318842815, "learning_rate": 1.4040901909344828e-06, "loss": 1.0312, "step": 75660 }, { "epoch": 2.231018723278785, "grad_norm": 3.497664034284727, "learning_rate": 1.4035883644706171e-06, "loss": 1.035, "step": 75665 }, { "epoch": 2.2311661506707945, "grad_norm": 3.307413250865341, "learning_rate": 1.4030866070525937e-06, "loss": 1.0095, "step": 75670 }, { "epoch": 2.231313578062804, "grad_norm": 3.423981511428132, "learning_rate": 1.4025849186951765e-06, "loss": 1.0908, "step": 75675 }, { "epoch": 2.2314610054548134, "grad_norm": 3.47792552165342, "learning_rate": 1.4020832994131286e-06, "loss": 1.041, "step": 75680 }, { "epoch": 2.231608432846823, "grad_norm": 3.469665737265475, "learning_rate": 1.4015817492212122e-06, "loss": 1.0307, "step": 75685 }, { "epoch": 2.2317558602388323, "grad_norm": 3.4824315898595226, "learning_rate": 1.4010802681341813e-06, "loss": 1.0385, "step": 75690 }, { "epoch": 2.231903287630842, "grad_norm": 3.4645283415718757, "learning_rate": 1.4005788561667987e-06, "loss": 1.0503, "step": 75695 }, { "epoch": 2.2320507150228512, "grad_norm": 3.5031499503002355, "learning_rate": 1.4000775133338148e-06, "loss": 1.0606, "step": 75700 }, { "epoch": 2.2321981424148607, "grad_norm": 3.3005048952750675, "learning_rate": 1.399576239649983e-06, "loss": 1.0331, "step": 75705 }, { "epoch": 2.23234556980687, "grad_norm": 3.4734913525465845, "learning_rate": 1.3990750351300543e-06, "loss": 1.0707, "step": 75710 }, { "epoch": 2.2324929971988796, "grad_norm": 3.6105282993018766, "learning_rate": 1.3985738997887775e-06, "loss": 1.0697, "step": 75715 }, { "epoch": 2.232640424590889, "grad_norm": 3.5846731815609747, "learning_rate": 1.3980728336408984e-06, "loss": 1.0277, "step": 75720 }, { "epoch": 2.2327878519828985, "grad_norm": 3.4898704743154405, "learning_rate": 1.3975718367011636e-06, "loss": 1.0077, "step": 75725 }, { "epoch": 2.232935279374908, "grad_norm": 3.554394849987094, "learning_rate": 1.3970709089843108e-06, "loss": 1.0504, "step": 75730 }, { "epoch": 2.2330827067669174, "grad_norm": 3.5680240619273396, "learning_rate": 1.3965700505050856e-06, "loss": 1.0208, "step": 75735 }, { "epoch": 2.233230134158927, "grad_norm": 3.3396114942313284, "learning_rate": 1.3960692612782218e-06, "loss": 1.0592, "step": 75740 }, { "epoch": 2.2333775615509364, "grad_norm": 3.3371579458477303, "learning_rate": 1.3955685413184604e-06, "loss": 1.066, "step": 75745 }, { "epoch": 2.2335249889429454, "grad_norm": 3.454559028829267, "learning_rate": 1.3950678906405314e-06, "loss": 1.0787, "step": 75750 }, { "epoch": 2.233672416334955, "grad_norm": 3.516733000707504, "learning_rate": 1.3945673092591693e-06, "loss": 1.0557, "step": 75755 }, { "epoch": 2.2338198437269643, "grad_norm": 3.392784727842088, "learning_rate": 1.394066797189105e-06, "loss": 1.0378, "step": 75760 }, { "epoch": 2.2339672711189738, "grad_norm": 3.31896900812938, "learning_rate": 1.3935663544450625e-06, "loss": 1.0862, "step": 75765 }, { "epoch": 2.234114698510983, "grad_norm": 3.4563663459061003, "learning_rate": 1.3930659810417738e-06, "loss": 1.0049, "step": 75770 }, { "epoch": 2.2342621259029927, "grad_norm": 3.50348359366027, "learning_rate": 1.3925656769939582e-06, "loss": 1.0178, "step": 75775 }, { "epoch": 2.234409553295002, "grad_norm": 3.7391164298136914, "learning_rate": 1.3920654423163397e-06, "loss": 1.0656, "step": 75780 }, { "epoch": 2.2345569806870116, "grad_norm": 3.3170349354905517, "learning_rate": 1.391565277023638e-06, "loss": 0.9911, "step": 75785 }, { "epoch": 2.234704408079021, "grad_norm": 3.6902436888881263, "learning_rate": 1.3910651811305713e-06, "loss": 1.0627, "step": 75790 }, { "epoch": 2.2348518354710305, "grad_norm": 3.45640726548047, "learning_rate": 1.390565154651855e-06, "loss": 1.0352, "step": 75795 }, { "epoch": 2.23499926286304, "grad_norm": 3.5471866372615484, "learning_rate": 1.3900651976022043e-06, "loss": 1.089, "step": 75800 }, { "epoch": 2.2351466902550494, "grad_norm": 3.4631450875191176, "learning_rate": 1.3895653099963278e-06, "loss": 1.0942, "step": 75805 }, { "epoch": 2.235294117647059, "grad_norm": 3.438801692768797, "learning_rate": 1.3890654918489397e-06, "loss": 1.0647, "step": 75810 }, { "epoch": 2.2354415450390683, "grad_norm": 3.4537442611334814, "learning_rate": 1.3885657431747428e-06, "loss": 1.0821, "step": 75815 }, { "epoch": 2.235588972431078, "grad_norm": 3.5237029667105015, "learning_rate": 1.388066063988448e-06, "loss": 1.0433, "step": 75820 }, { "epoch": 2.2357363998230873, "grad_norm": 3.4382973244435906, "learning_rate": 1.3875664543047554e-06, "loss": 1.0466, "step": 75825 }, { "epoch": 2.2358838272150967, "grad_norm": 3.3968213980178104, "learning_rate": 1.387066914138367e-06, "loss": 1.101, "step": 75830 }, { "epoch": 2.236031254607106, "grad_norm": 3.5781715821459823, "learning_rate": 1.3865674435039841e-06, "loss": 1.0718, "step": 75835 }, { "epoch": 2.2361786819991156, "grad_norm": 3.352193199521072, "learning_rate": 1.3860680424163007e-06, "loss": 1.0606, "step": 75840 }, { "epoch": 2.2363261093911246, "grad_norm": 3.594340842788122, "learning_rate": 1.385568710890017e-06, "loss": 1.0369, "step": 75845 }, { "epoch": 2.2364735367831345, "grad_norm": 3.422846167834208, "learning_rate": 1.3850694489398227e-06, "loss": 1.0061, "step": 75850 }, { "epoch": 2.2366209641751436, "grad_norm": 3.3746496006002404, "learning_rate": 1.38457025658041e-06, "loss": 1.0207, "step": 75855 }, { "epoch": 2.236768391567153, "grad_norm": 3.416441406379464, "learning_rate": 1.3840711338264687e-06, "loss": 1.054, "step": 75860 }, { "epoch": 2.2369158189591625, "grad_norm": 3.4197246182729697, "learning_rate": 1.3835720806926859e-06, "loss": 1.0366, "step": 75865 }, { "epoch": 2.237063246351172, "grad_norm": 3.4282428529489737, "learning_rate": 1.3830730971937468e-06, "loss": 1.0646, "step": 75870 }, { "epoch": 2.2372106737431814, "grad_norm": 3.443338303185216, "learning_rate": 1.3825741833443343e-06, "loss": 1.0208, "step": 75875 }, { "epoch": 2.237358101135191, "grad_norm": 3.5294518237537074, "learning_rate": 1.3820753391591303e-06, "loss": 1.0886, "step": 75880 }, { "epoch": 2.2375055285272003, "grad_norm": 3.4266339302732574, "learning_rate": 1.381576564652814e-06, "loss": 1.0374, "step": 75885 }, { "epoch": 2.2376529559192098, "grad_norm": 3.3513655722822535, "learning_rate": 1.381077859840061e-06, "loss": 1.0265, "step": 75890 }, { "epoch": 2.237800383311219, "grad_norm": 3.474952596645523, "learning_rate": 1.3805792247355465e-06, "loss": 1.0623, "step": 75895 }, { "epoch": 2.2379478107032287, "grad_norm": 3.491681501315442, "learning_rate": 1.3800806593539443e-06, "loss": 1.0514, "step": 75900 }, { "epoch": 2.238095238095238, "grad_norm": 3.455681496090967, "learning_rate": 1.3795821637099247e-06, "loss": 1.0349, "step": 75905 }, { "epoch": 2.2382426654872476, "grad_norm": 3.5692886982827794, "learning_rate": 1.379083737818158e-06, "loss": 1.0837, "step": 75910 }, { "epoch": 2.238390092879257, "grad_norm": 3.5562062908025895, "learning_rate": 1.3785853816933069e-06, "loss": 1.0753, "step": 75915 }, { "epoch": 2.2385375202712665, "grad_norm": 3.398876874197432, "learning_rate": 1.378087095350042e-06, "loss": 1.0661, "step": 75920 }, { "epoch": 2.238684947663276, "grad_norm": 3.6150123468978013, "learning_rate": 1.3775888788030205e-06, "loss": 1.092, "step": 75925 }, { "epoch": 2.2388323750552854, "grad_norm": 3.6140067612658955, "learning_rate": 1.3770907320669058e-06, "loss": 1.0255, "step": 75930 }, { "epoch": 2.238979802447295, "grad_norm": 3.522068251342522, "learning_rate": 1.3765926551563562e-06, "loss": 1.0283, "step": 75935 }, { "epoch": 2.2391272298393043, "grad_norm": 3.378813048273321, "learning_rate": 1.3760946480860275e-06, "loss": 1.0572, "step": 75940 }, { "epoch": 2.239274657231314, "grad_norm": 3.4762585319925026, "learning_rate": 1.3755967108705745e-06, "loss": 1.0476, "step": 75945 }, { "epoch": 2.239422084623323, "grad_norm": 3.4669524260408084, "learning_rate": 1.3750988435246496e-06, "loss": 1.0554, "step": 75950 }, { "epoch": 2.2395695120153323, "grad_norm": 3.4885124307505553, "learning_rate": 1.3746010460629042e-06, "loss": 1.0812, "step": 75955 }, { "epoch": 2.2397169394073417, "grad_norm": 3.5617798039493764, "learning_rate": 1.3741033184999841e-06, "loss": 1.0818, "step": 75960 }, { "epoch": 2.239864366799351, "grad_norm": 3.27260373105261, "learning_rate": 1.3736056608505372e-06, "loss": 0.9907, "step": 75965 }, { "epoch": 2.2400117941913607, "grad_norm": 3.4702580894808452, "learning_rate": 1.373108073129207e-06, "loss": 1.0498, "step": 75970 }, { "epoch": 2.24015922158337, "grad_norm": 3.3589896642464305, "learning_rate": 1.372610555350636e-06, "loss": 1.0416, "step": 75975 }, { "epoch": 2.2403066489753796, "grad_norm": 3.594914409479353, "learning_rate": 1.3721131075294642e-06, "loss": 1.0506, "step": 75980 }, { "epoch": 2.240454076367389, "grad_norm": 3.475347534033606, "learning_rate": 1.3716157296803306e-06, "loss": 1.0931, "step": 75985 }, { "epoch": 2.2406015037593985, "grad_norm": 3.4491179411580677, "learning_rate": 1.3711184218178674e-06, "loss": 1.0527, "step": 75990 }, { "epoch": 2.240748931151408, "grad_norm": 3.5641252183351573, "learning_rate": 1.370621183956714e-06, "loss": 1.1134, "step": 75995 }, { "epoch": 2.2408963585434174, "grad_norm": 3.5632044432779426, "learning_rate": 1.3701240161114963e-06, "loss": 1.0562, "step": 76000 }, { "epoch": 2.2408963585434174, "eval_loss": 1.0845855474472046, "eval_runtime": 4.1818, "eval_samples_per_second": 94.697, "eval_steps_per_second": 3.109, "step": 76000 }, { "epoch": 2.241043785935427, "grad_norm": 3.516935584069554, "learning_rate": 1.3696269182968493e-06, "loss": 1.0793, "step": 76005 }, { "epoch": 2.2411912133274363, "grad_norm": 3.345732645923962, "learning_rate": 1.3691298905273976e-06, "loss": 1.0178, "step": 76010 }, { "epoch": 2.2413386407194458, "grad_norm": 3.607537651557809, "learning_rate": 1.3686329328177668e-06, "loss": 1.0988, "step": 76015 }, { "epoch": 2.2414860681114552, "grad_norm": 3.5681796295612793, "learning_rate": 1.3681360451825831e-06, "loss": 1.0694, "step": 76020 }, { "epoch": 2.2416334955034647, "grad_norm": 3.3389790984173917, "learning_rate": 1.3676392276364632e-06, "loss": 1.042, "step": 76025 }, { "epoch": 2.241780922895474, "grad_norm": 3.5014105682028234, "learning_rate": 1.3671424801940325e-06, "loss": 1.0641, "step": 76030 }, { "epoch": 2.2419283502874836, "grad_norm": 3.479832663751967, "learning_rate": 1.3666458028699031e-06, "loss": 1.0741, "step": 76035 }, { "epoch": 2.242075777679493, "grad_norm": 3.438667350801984, "learning_rate": 1.366149195678693e-06, "loss": 1.0595, "step": 76040 }, { "epoch": 2.242223205071502, "grad_norm": 3.5149203659511166, "learning_rate": 1.3656526586350152e-06, "loss": 1.0576, "step": 76045 }, { "epoch": 2.2423706324635115, "grad_norm": 3.4267787314372153, "learning_rate": 1.3651561917534801e-06, "loss": 1.0324, "step": 76050 }, { "epoch": 2.242518059855521, "grad_norm": 3.578316342924708, "learning_rate": 1.3646597950486974e-06, "loss": 1.0449, "step": 76055 }, { "epoch": 2.2426654872475305, "grad_norm": 3.274034011630656, "learning_rate": 1.3641634685352749e-06, "loss": 1.0369, "step": 76060 }, { "epoch": 2.24281291463954, "grad_norm": 3.5700286225889926, "learning_rate": 1.3636672122278146e-06, "loss": 1.0706, "step": 76065 }, { "epoch": 2.2429603420315494, "grad_norm": 3.3506310519456837, "learning_rate": 1.3631710261409237e-06, "loss": 1.0841, "step": 76070 }, { "epoch": 2.243107769423559, "grad_norm": 3.3838121619936197, "learning_rate": 1.3626749102891984e-06, "loss": 1.0749, "step": 76075 }, { "epoch": 2.2432551968155683, "grad_norm": 3.532617322298192, "learning_rate": 1.3621788646872426e-06, "loss": 1.0715, "step": 76080 }, { "epoch": 2.2434026242075777, "grad_norm": 3.547995647124745, "learning_rate": 1.361682889349649e-06, "loss": 1.0619, "step": 76085 }, { "epoch": 2.243550051599587, "grad_norm": 3.4060219280525783, "learning_rate": 1.3611869842910136e-06, "loss": 1.0668, "step": 76090 }, { "epoch": 2.2436974789915967, "grad_norm": 3.5302597161061495, "learning_rate": 1.3606911495259299e-06, "loss": 1.0606, "step": 76095 }, { "epoch": 2.243844906383606, "grad_norm": 3.4129815760996585, "learning_rate": 1.3601953850689853e-06, "loss": 1.043, "step": 76100 }, { "epoch": 2.2439923337756156, "grad_norm": 3.439630685868949, "learning_rate": 1.3596996909347727e-06, "loss": 1.0543, "step": 76105 }, { "epoch": 2.244139761167625, "grad_norm": 3.549461063287248, "learning_rate": 1.3592040671378746e-06, "loss": 1.0537, "step": 76110 }, { "epoch": 2.2442871885596345, "grad_norm": 3.396736842020612, "learning_rate": 1.3587085136928773e-06, "loss": 1.0206, "step": 76115 }, { "epoch": 2.244434615951644, "grad_norm": 3.5594482441408024, "learning_rate": 1.3582130306143621e-06, "loss": 1.0518, "step": 76120 }, { "epoch": 2.2445820433436534, "grad_norm": 3.726764339746374, "learning_rate": 1.35771761791691e-06, "loss": 1.0498, "step": 76125 }, { "epoch": 2.244729470735663, "grad_norm": 3.417144813645715, "learning_rate": 1.3572222756150985e-06, "loss": 1.0641, "step": 76130 }, { "epoch": 2.2448768981276723, "grad_norm": 3.5352638933040472, "learning_rate": 1.3567270037235037e-06, "loss": 1.0387, "step": 76135 }, { "epoch": 2.2450243255196813, "grad_norm": 3.454830786086036, "learning_rate": 1.3562318022566999e-06, "loss": 1.027, "step": 76140 }, { "epoch": 2.245171752911691, "grad_norm": 3.602610885674931, "learning_rate": 1.3557366712292602e-06, "loss": 1.0643, "step": 76145 }, { "epoch": 2.2453191803037003, "grad_norm": 3.6009094934114603, "learning_rate": 1.355241610655751e-06, "loss": 1.0379, "step": 76150 }, { "epoch": 2.2454666076957097, "grad_norm": 3.50485274426301, "learning_rate": 1.3547466205507424e-06, "loss": 1.0389, "step": 76155 }, { "epoch": 2.245614035087719, "grad_norm": 3.4747621917917857, "learning_rate": 1.3542517009287994e-06, "loss": 1.0326, "step": 76160 }, { "epoch": 2.2457614624797286, "grad_norm": 3.2775673213586543, "learning_rate": 1.3537568518044858e-06, "loss": 0.9988, "step": 76165 }, { "epoch": 2.245908889871738, "grad_norm": 3.58649571622359, "learning_rate": 1.3532620731923641e-06, "loss": 1.1, "step": 76170 }, { "epoch": 2.2460563172637475, "grad_norm": 3.552701914552105, "learning_rate": 1.3527673651069903e-06, "loss": 1.0368, "step": 76175 }, { "epoch": 2.246203744655757, "grad_norm": 3.4399222347827916, "learning_rate": 1.3522727275629269e-06, "loss": 1.0446, "step": 76180 }, { "epoch": 2.2463511720477665, "grad_norm": 3.5663949322210873, "learning_rate": 1.3517781605747249e-06, "loss": 1.0463, "step": 76185 }, { "epoch": 2.246498599439776, "grad_norm": 3.376653268027207, "learning_rate": 1.351283664156939e-06, "loss": 1.0473, "step": 76190 }, { "epoch": 2.2466460268317854, "grad_norm": 3.4691205539688266, "learning_rate": 1.3507892383241207e-06, "loss": 1.0362, "step": 76195 }, { "epoch": 2.246793454223795, "grad_norm": 3.313986079017093, "learning_rate": 1.350294883090818e-06, "loss": 1.0013, "step": 76200 }, { "epoch": 2.2469408816158043, "grad_norm": 3.521974449529058, "learning_rate": 1.3498005984715793e-06, "loss": 1.0617, "step": 76205 }, { "epoch": 2.2470883090078138, "grad_norm": 3.3397339986235233, "learning_rate": 1.349306384480948e-06, "loss": 1.044, "step": 76210 }, { "epoch": 2.247235736399823, "grad_norm": 3.4898343440095556, "learning_rate": 1.3488122411334695e-06, "loss": 1.0618, "step": 76215 }, { "epoch": 2.2473831637918327, "grad_norm": 3.4889601449363736, "learning_rate": 1.3483181684436813e-06, "loss": 1.0471, "step": 76220 }, { "epoch": 2.247530591183842, "grad_norm": 3.4446632136792785, "learning_rate": 1.3478241664261232e-06, "loss": 1.0684, "step": 76225 }, { "epoch": 2.2476780185758516, "grad_norm": 3.520818347707987, "learning_rate": 1.3473302350953322e-06, "loss": 1.0215, "step": 76230 }, { "epoch": 2.2478254459678606, "grad_norm": 3.5442918926741163, "learning_rate": 1.3468363744658425e-06, "loss": 1.0626, "step": 76235 }, { "epoch": 2.24797287335987, "grad_norm": 3.471985457665603, "learning_rate": 1.3463425845521865e-06, "loss": 1.0221, "step": 76240 }, { "epoch": 2.2481203007518795, "grad_norm": 3.3370560415624286, "learning_rate": 1.3458488653688958e-06, "loss": 0.9842, "step": 76245 }, { "epoch": 2.248267728143889, "grad_norm": 3.4064542578273524, "learning_rate": 1.3453552169304954e-06, "loss": 1.1154, "step": 76250 }, { "epoch": 2.2484151555358984, "grad_norm": 3.6244642418401956, "learning_rate": 1.344861639251516e-06, "loss": 1.0616, "step": 76255 }, { "epoch": 2.248562582927908, "grad_norm": 3.419529612299329, "learning_rate": 1.3443681323464763e-06, "loss": 1.0629, "step": 76260 }, { "epoch": 2.2487100103199174, "grad_norm": 3.383604719637984, "learning_rate": 1.343874696229904e-06, "loss": 1.0649, "step": 76265 }, { "epoch": 2.248857437711927, "grad_norm": 3.6079930113101617, "learning_rate": 1.3433813309163153e-06, "loss": 1.0618, "step": 76270 }, { "epoch": 2.2490048651039363, "grad_norm": 3.615834267518189, "learning_rate": 1.3428880364202286e-06, "loss": 1.0442, "step": 76275 }, { "epoch": 2.2491522924959457, "grad_norm": 3.4125875905286054, "learning_rate": 1.3423948127561614e-06, "loss": 1.0314, "step": 76280 }, { "epoch": 2.249299719887955, "grad_norm": 3.4439413247292054, "learning_rate": 1.3419016599386235e-06, "loss": 1.0512, "step": 76285 }, { "epoch": 2.2494471472799646, "grad_norm": 3.5790993153103488, "learning_rate": 1.3414085779821316e-06, "loss": 1.0619, "step": 76290 }, { "epoch": 2.249594574671974, "grad_norm": 3.476651804886421, "learning_rate": 1.3409155669011913e-06, "loss": 1.0526, "step": 76295 }, { "epoch": 2.2497420020639836, "grad_norm": 3.459780933504324, "learning_rate": 1.3404226267103117e-06, "loss": 1.0497, "step": 76300 }, { "epoch": 2.249889429455993, "grad_norm": 3.4293687882766783, "learning_rate": 1.3399297574239975e-06, "loss": 1.0276, "step": 76305 }, { "epoch": 2.2500368568480025, "grad_norm": 3.435723210540303, "learning_rate": 1.3394369590567522e-06, "loss": 1.0361, "step": 76310 }, { "epoch": 2.250184284240012, "grad_norm": 3.4579145716994035, "learning_rate": 1.3389442316230768e-06, "loss": 1.0693, "step": 76315 }, { "epoch": 2.2503317116320214, "grad_norm": 3.4707750976250558, "learning_rate": 1.3384515751374717e-06, "loss": 1.051, "step": 76320 }, { "epoch": 2.250479139024031, "grad_norm": 3.5345913615712603, "learning_rate": 1.3379589896144304e-06, "loss": 1.0436, "step": 76325 }, { "epoch": 2.25062656641604, "grad_norm": 3.4515531223713087, "learning_rate": 1.3374664750684527e-06, "loss": 1.057, "step": 76330 }, { "epoch": 2.2507739938080498, "grad_norm": 3.474789661611902, "learning_rate": 1.3369740315140262e-06, "loss": 1.0265, "step": 76335 }, { "epoch": 2.250921421200059, "grad_norm": 3.440648730643209, "learning_rate": 1.3364816589656468e-06, "loss": 1.0261, "step": 76340 }, { "epoch": 2.2510688485920682, "grad_norm": 3.397239312328464, "learning_rate": 1.3359893574377992e-06, "loss": 1.0437, "step": 76345 }, { "epoch": 2.2512162759840777, "grad_norm": 3.4768183473983516, "learning_rate": 1.335497126944971e-06, "loss": 1.0165, "step": 76350 }, { "epoch": 2.251363703376087, "grad_norm": 3.6189473098754292, "learning_rate": 1.3350049675016483e-06, "loss": 1.0623, "step": 76355 }, { "epoch": 2.2515111307680966, "grad_norm": 3.514307355126303, "learning_rate": 1.33451287912231e-06, "loss": 1.1069, "step": 76360 }, { "epoch": 2.251658558160106, "grad_norm": 3.479919422824774, "learning_rate": 1.3340208618214405e-06, "loss": 1.0801, "step": 76365 }, { "epoch": 2.2518059855521155, "grad_norm": 3.428926594835649, "learning_rate": 1.3335289156135146e-06, "loss": 1.0162, "step": 76370 }, { "epoch": 2.251953412944125, "grad_norm": 3.4745260318069775, "learning_rate": 1.3330370405130098e-06, "loss": 0.9963, "step": 76375 }, { "epoch": 2.2521008403361344, "grad_norm": 3.56333266392433, "learning_rate": 1.3325452365344002e-06, "loss": 1.0434, "step": 76380 }, { "epoch": 2.252248267728144, "grad_norm": 3.4618609592985847, "learning_rate": 1.3320535036921571e-06, "loss": 1.0145, "step": 76385 }, { "epoch": 2.2523956951201534, "grad_norm": 3.597933942027098, "learning_rate": 1.3315618420007506e-06, "loss": 1.0309, "step": 76390 }, { "epoch": 2.252543122512163, "grad_norm": 3.6007625609941245, "learning_rate": 1.3310702514746487e-06, "loss": 1.0444, "step": 76395 }, { "epoch": 2.2526905499041723, "grad_norm": 3.47332719974959, "learning_rate": 1.330578732128317e-06, "loss": 1.0855, "step": 76400 }, { "epoch": 2.2528379772961817, "grad_norm": 3.451063457406205, "learning_rate": 1.3300872839762198e-06, "loss": 1.0303, "step": 76405 }, { "epoch": 2.252985404688191, "grad_norm": 3.6808380731355963, "learning_rate": 1.3295959070328164e-06, "loss": 1.0886, "step": 76410 }, { "epoch": 2.2531328320802007, "grad_norm": 3.3392319858305894, "learning_rate": 1.3291046013125668e-06, "loss": 1.0362, "step": 76415 }, { "epoch": 2.25328025947221, "grad_norm": 3.5102796602366895, "learning_rate": 1.3286133668299293e-06, "loss": 1.0643, "step": 76420 }, { "epoch": 2.253427686864219, "grad_norm": 3.2734994619529427, "learning_rate": 1.328122203599358e-06, "loss": 1.0275, "step": 76425 }, { "epoch": 2.253575114256229, "grad_norm": 3.432159343694224, "learning_rate": 1.3276311116353078e-06, "loss": 1.0971, "step": 76430 }, { "epoch": 2.253722541648238, "grad_norm": 3.5306917315010313, "learning_rate": 1.3271400909522255e-06, "loss": 1.0741, "step": 76435 }, { "epoch": 2.2538699690402475, "grad_norm": 3.6396448094560325, "learning_rate": 1.3266491415645655e-06, "loss": 1.0595, "step": 76440 }, { "epoch": 2.254017396432257, "grad_norm": 3.327549896846317, "learning_rate": 1.3261582634867702e-06, "loss": 1.0374, "step": 76445 }, { "epoch": 2.2541648238242664, "grad_norm": 3.379413216760661, "learning_rate": 1.3256674567332855e-06, "loss": 1.094, "step": 76450 }, { "epoch": 2.254312251216276, "grad_norm": 3.4932873741462656, "learning_rate": 1.3251767213185545e-06, "loss": 1.0806, "step": 76455 }, { "epoch": 2.2544596786082853, "grad_norm": 3.3725204555579866, "learning_rate": 1.3246860572570172e-06, "loss": 1.026, "step": 76460 }, { "epoch": 2.254607106000295, "grad_norm": 3.530597540996748, "learning_rate": 1.3241954645631118e-06, "loss": 1.0169, "step": 76465 }, { "epoch": 2.2547545333923042, "grad_norm": 3.4345672475683013, "learning_rate": 1.3237049432512755e-06, "loss": 1.0118, "step": 76470 }, { "epoch": 2.2549019607843137, "grad_norm": 3.396879657848212, "learning_rate": 1.3232144933359428e-06, "loss": 1.0585, "step": 76475 }, { "epoch": 2.255049388176323, "grad_norm": 3.401065054646069, "learning_rate": 1.3227241148315431e-06, "loss": 1.0435, "step": 76480 }, { "epoch": 2.2551968155683326, "grad_norm": 3.348855341042417, "learning_rate": 1.322233807752508e-06, "loss": 1.0288, "step": 76485 }, { "epoch": 2.255344242960342, "grad_norm": 3.3751280972390103, "learning_rate": 1.321743572113266e-06, "loss": 1.0551, "step": 76490 }, { "epoch": 2.2554916703523515, "grad_norm": 3.483355153067551, "learning_rate": 1.3212534079282414e-06, "loss": 1.0154, "step": 76495 }, { "epoch": 2.255639097744361, "grad_norm": 3.584386819962276, "learning_rate": 1.3207633152118592e-06, "loss": 1.0623, "step": 76500 }, { "epoch": 2.255639097744361, "eval_loss": 1.0846178531646729, "eval_runtime": 4.303, "eval_samples_per_second": 92.029, "eval_steps_per_second": 3.021, "step": 76500 }, { "epoch": 2.2557865251363705, "grad_norm": 3.4440127512660608, "learning_rate": 1.3202732939785415e-06, "loss": 1.0362, "step": 76505 }, { "epoch": 2.25593395252838, "grad_norm": 3.5402163264319038, "learning_rate": 1.3197833442427035e-06, "loss": 1.0463, "step": 76510 }, { "epoch": 2.2560813799203894, "grad_norm": 3.4312134826126406, "learning_rate": 1.3192934660187688e-06, "loss": 1.0218, "step": 76515 }, { "epoch": 2.256228807312399, "grad_norm": 3.5031119813865117, "learning_rate": 1.3188036593211464e-06, "loss": 1.0199, "step": 76520 }, { "epoch": 2.2563762347044083, "grad_norm": 3.358455189841514, "learning_rate": 1.3183139241642554e-06, "loss": 1.0405, "step": 76525 }, { "epoch": 2.2565236620964173, "grad_norm": 3.474733635616611, "learning_rate": 1.3178242605625023e-06, "loss": 1.0514, "step": 76530 }, { "epoch": 2.2566710894884268, "grad_norm": 3.4298007949546125, "learning_rate": 1.317334668530298e-06, "loss": 1.0412, "step": 76535 }, { "epoch": 2.256818516880436, "grad_norm": 3.25107377599585, "learning_rate": 1.3168451480820499e-06, "loss": 1.0155, "step": 76540 }, { "epoch": 2.2569659442724457, "grad_norm": 3.4644516004620733, "learning_rate": 1.3163556992321594e-06, "loss": 1.0777, "step": 76545 }, { "epoch": 2.257113371664455, "grad_norm": 3.434740319804333, "learning_rate": 1.3158663219950332e-06, "loss": 1.0351, "step": 76550 }, { "epoch": 2.2572607990564646, "grad_norm": 3.4165921279117404, "learning_rate": 1.3153770163850694e-06, "loss": 1.0055, "step": 76555 }, { "epoch": 2.257408226448474, "grad_norm": 3.354194294111375, "learning_rate": 1.314887782416667e-06, "loss": 1.0213, "step": 76560 }, { "epoch": 2.2575556538404835, "grad_norm": 3.5966003127246804, "learning_rate": 1.3143986201042214e-06, "loss": 1.045, "step": 76565 }, { "epoch": 2.257703081232493, "grad_norm": 3.4561560003229115, "learning_rate": 1.313909529462128e-06, "loss": 1.0468, "step": 76570 }, { "epoch": 2.2578505086245024, "grad_norm": 3.585737646833814, "learning_rate": 1.3134205105047788e-06, "loss": 1.0822, "step": 76575 }, { "epoch": 2.257997936016512, "grad_norm": 3.3887139451465167, "learning_rate": 1.3129315632465643e-06, "loss": 1.0416, "step": 76580 }, { "epoch": 2.2581453634085213, "grad_norm": 3.519565279382542, "learning_rate": 1.312442687701869e-06, "loss": 1.0504, "step": 76585 }, { "epoch": 2.258292790800531, "grad_norm": 3.6307909329483596, "learning_rate": 1.3119538838850831e-06, "loss": 1.0759, "step": 76590 }, { "epoch": 2.2584402181925403, "grad_norm": 3.253433376159925, "learning_rate": 1.3114651518105864e-06, "loss": 1.0499, "step": 76595 }, { "epoch": 2.2585876455845497, "grad_norm": 3.4120260769223685, "learning_rate": 1.3109764914927637e-06, "loss": 1.0436, "step": 76600 }, { "epoch": 2.258735072976559, "grad_norm": 3.5408309570922447, "learning_rate": 1.3104879029459926e-06, "loss": 1.0548, "step": 76605 }, { "epoch": 2.2588825003685686, "grad_norm": 3.554390017678614, "learning_rate": 1.3099993861846496e-06, "loss": 1.0441, "step": 76610 }, { "epoch": 2.259029927760578, "grad_norm": 3.407200286255679, "learning_rate": 1.3095109412231128e-06, "loss": 1.0232, "step": 76615 }, { "epoch": 2.2591773551525876, "grad_norm": 3.385397253224983, "learning_rate": 1.3090225680757502e-06, "loss": 1.0059, "step": 76620 }, { "epoch": 2.2593247825445966, "grad_norm": 3.4876027845137125, "learning_rate": 1.308534266756939e-06, "loss": 1.0613, "step": 76625 }, { "epoch": 2.2594722099366065, "grad_norm": 3.629695881403147, "learning_rate": 1.3080460372810432e-06, "loss": 1.0728, "step": 76630 }, { "epoch": 2.2596196373286155, "grad_norm": 3.4907395227227687, "learning_rate": 1.3075578796624312e-06, "loss": 1.027, "step": 76635 }, { "epoch": 2.259767064720625, "grad_norm": 3.5889769421432662, "learning_rate": 1.3070697939154677e-06, "loss": 1.0966, "step": 76640 }, { "epoch": 2.2599144921126344, "grad_norm": 3.36777096517209, "learning_rate": 1.3065817800545147e-06, "loss": 1.0188, "step": 76645 }, { "epoch": 2.260061919504644, "grad_norm": 3.5289740365632984, "learning_rate": 1.3060938380939334e-06, "loss": 1.0176, "step": 76650 }, { "epoch": 2.2602093468966533, "grad_norm": 3.4142249769719752, "learning_rate": 1.3056059680480813e-06, "loss": 1.0497, "step": 76655 }, { "epoch": 2.2603567742886628, "grad_norm": 3.4459476695035063, "learning_rate": 1.3051181699313148e-06, "loss": 1.0324, "step": 76660 }, { "epoch": 2.2605042016806722, "grad_norm": 3.4629255281230136, "learning_rate": 1.304630443757989e-06, "loss": 1.0579, "step": 76665 }, { "epoch": 2.2606516290726817, "grad_norm": 3.443413810461063, "learning_rate": 1.3041427895424537e-06, "loss": 1.0452, "step": 76670 }, { "epoch": 2.260799056464691, "grad_norm": 3.6810971747855845, "learning_rate": 1.3036552072990597e-06, "loss": 1.0637, "step": 76675 }, { "epoch": 2.2609464838567006, "grad_norm": 3.4475334823730184, "learning_rate": 1.3031676970421546e-06, "loss": 1.0503, "step": 76680 }, { "epoch": 2.26109391124871, "grad_norm": 3.34039590047905, "learning_rate": 1.3026802587860843e-06, "loss": 1.0431, "step": 76685 }, { "epoch": 2.2612413386407195, "grad_norm": 3.3678876514140397, "learning_rate": 1.3021928925451928e-06, "loss": 1.0555, "step": 76690 }, { "epoch": 2.261388766032729, "grad_norm": 3.6869677589693803, "learning_rate": 1.3017055983338177e-06, "loss": 1.0779, "step": 76695 }, { "epoch": 2.2615361934247384, "grad_norm": 3.43928983842678, "learning_rate": 1.301218376166304e-06, "loss": 1.053, "step": 76700 }, { "epoch": 2.261683620816748, "grad_norm": 3.475772088223395, "learning_rate": 1.3007312260569849e-06, "loss": 1.0385, "step": 76705 }, { "epoch": 2.2618310482087574, "grad_norm": 3.537218290764718, "learning_rate": 1.3002441480201955e-06, "loss": 1.09, "step": 76710 }, { "epoch": 2.261978475600767, "grad_norm": 3.372551695372136, "learning_rate": 1.2997571420702698e-06, "loss": 1.0642, "step": 76715 }, { "epoch": 2.262125902992776, "grad_norm": 3.4454754898471207, "learning_rate": 1.299270208221538e-06, "loss": 1.0351, "step": 76720 }, { "epoch": 2.2622733303847857, "grad_norm": 3.4826333264121736, "learning_rate": 1.2987833464883288e-06, "loss": 1.0051, "step": 76725 }, { "epoch": 2.2624207577767947, "grad_norm": 3.309922549414482, "learning_rate": 1.2982965568849687e-06, "loss": 1.0707, "step": 76730 }, { "epoch": 2.262568185168804, "grad_norm": 3.5404620269887097, "learning_rate": 1.297809839425783e-06, "loss": 1.0982, "step": 76735 }, { "epoch": 2.2627156125608137, "grad_norm": 3.483296616182402, "learning_rate": 1.2973231941250916e-06, "loss": 0.9905, "step": 76740 }, { "epoch": 2.262863039952823, "grad_norm": 3.337935956630164, "learning_rate": 1.2968366209972162e-06, "loss": 1.0371, "step": 76745 }, { "epoch": 2.2630104673448326, "grad_norm": 3.5951024536549903, "learning_rate": 1.2963501200564743e-06, "loss": 1.0435, "step": 76750 }, { "epoch": 2.263157894736842, "grad_norm": 3.36163042047922, "learning_rate": 1.2958636913171815e-06, "loss": 1.0367, "step": 76755 }, { "epoch": 2.2633053221288515, "grad_norm": 3.4276774369835206, "learning_rate": 1.2953773347936524e-06, "loss": 1.0603, "step": 76760 }, { "epoch": 2.263452749520861, "grad_norm": 3.488815854266385, "learning_rate": 1.2948910505001992e-06, "loss": 1.0688, "step": 76765 }, { "epoch": 2.2636001769128704, "grad_norm": 3.5702480384029305, "learning_rate": 1.2944048384511277e-06, "loss": 1.0437, "step": 76770 }, { "epoch": 2.26374760430488, "grad_norm": 3.345741688251344, "learning_rate": 1.2939186986607508e-06, "loss": 1.0664, "step": 76775 }, { "epoch": 2.2638950316968893, "grad_norm": 3.451096873887924, "learning_rate": 1.2934326311433676e-06, "loss": 1.0586, "step": 76780 }, { "epoch": 2.264042459088899, "grad_norm": 3.2892288261137566, "learning_rate": 1.2929466359132872e-06, "loss": 1.0277, "step": 76785 }, { "epoch": 2.2641898864809082, "grad_norm": 3.4683290086486522, "learning_rate": 1.2924607129848066e-06, "loss": 1.0735, "step": 76790 }, { "epoch": 2.2643373138729177, "grad_norm": 3.446604579428812, "learning_rate": 1.2919748623722254e-06, "loss": 1.0361, "step": 76795 }, { "epoch": 2.264484741264927, "grad_norm": 3.6146989102596665, "learning_rate": 1.2914890840898419e-06, "loss": 1.0674, "step": 76800 }, { "epoch": 2.2646321686569366, "grad_norm": 3.3684777524741807, "learning_rate": 1.2910033781519467e-06, "loss": 1.0574, "step": 76805 }, { "epoch": 2.264779596048946, "grad_norm": 3.4446695864261194, "learning_rate": 1.290517744572838e-06, "loss": 0.9928, "step": 76810 }, { "epoch": 2.264927023440955, "grad_norm": 3.458619901849788, "learning_rate": 1.2900321833668016e-06, "loss": 1.0796, "step": 76815 }, { "epoch": 2.265074450832965, "grad_norm": 3.5233767859850813, "learning_rate": 1.2895466945481272e-06, "loss": 1.0077, "step": 76820 }, { "epoch": 2.265221878224974, "grad_norm": 3.3033537385384544, "learning_rate": 1.289061278131101e-06, "loss": 1.0163, "step": 76825 }, { "epoch": 2.2653693056169835, "grad_norm": 3.5272599829565086, "learning_rate": 1.2885759341300064e-06, "loss": 1.0644, "step": 76830 }, { "epoch": 2.265516733008993, "grad_norm": 3.368617452417958, "learning_rate": 1.288090662559126e-06, "loss": 1.034, "step": 76835 }, { "epoch": 2.2656641604010024, "grad_norm": 3.5785213235696602, "learning_rate": 1.28760546343274e-06, "loss": 1.0457, "step": 76840 }, { "epoch": 2.265811587793012, "grad_norm": 3.608197926823729, "learning_rate": 1.2871203367651227e-06, "loss": 1.0447, "step": 76845 }, { "epoch": 2.2659590151850213, "grad_norm": 3.566914955349423, "learning_rate": 1.2866352825705546e-06, "loss": 1.0592, "step": 76850 }, { "epoch": 2.2661064425770308, "grad_norm": 3.514628961661207, "learning_rate": 1.2861503008633032e-06, "loss": 1.0443, "step": 76855 }, { "epoch": 2.26625386996904, "grad_norm": 3.4369930871692653, "learning_rate": 1.2856653916576452e-06, "loss": 1.034, "step": 76860 }, { "epoch": 2.2664012973610497, "grad_norm": 3.5119231885760365, "learning_rate": 1.2851805549678456e-06, "loss": 1.0646, "step": 76865 }, { "epoch": 2.266548724753059, "grad_norm": 3.398240215716808, "learning_rate": 1.284695790808173e-06, "loss": 1.0463, "step": 76870 }, { "epoch": 2.2666961521450686, "grad_norm": 3.483745508167764, "learning_rate": 1.2842110991928934e-06, "loss": 0.9848, "step": 76875 }, { "epoch": 2.266843579537078, "grad_norm": 3.3367632336050703, "learning_rate": 1.2837264801362647e-06, "loss": 1.0384, "step": 76880 }, { "epoch": 2.2669910069290875, "grad_norm": 3.3900422219617656, "learning_rate": 1.2832419336525531e-06, "loss": 1.0621, "step": 76885 }, { "epoch": 2.267138434321097, "grad_norm": 3.4598866498600644, "learning_rate": 1.2827574597560137e-06, "loss": 1.0523, "step": 76890 }, { "epoch": 2.2672858617131064, "grad_norm": 3.5327817987842955, "learning_rate": 1.2822730584609027e-06, "loss": 1.0903, "step": 76895 }, { "epoch": 2.267433289105116, "grad_norm": 3.406184517903259, "learning_rate": 1.2817887297814752e-06, "loss": 1.0359, "step": 76900 }, { "epoch": 2.2675807164971253, "grad_norm": 3.3997675135199485, "learning_rate": 1.2813044737319823e-06, "loss": 1.062, "step": 76905 }, { "epoch": 2.267728143889135, "grad_norm": 3.3888028673555404, "learning_rate": 1.2808202903266748e-06, "loss": 1.0957, "step": 76910 }, { "epoch": 2.2678755712811443, "grad_norm": 3.4028955739770694, "learning_rate": 1.2803361795797992e-06, "loss": 1.0626, "step": 76915 }, { "epoch": 2.2680229986731533, "grad_norm": 3.484660096627414, "learning_rate": 1.2798521415056017e-06, "loss": 1.0913, "step": 76920 }, { "epoch": 2.2681704260651627, "grad_norm": 3.3371767444577127, "learning_rate": 1.2793681761183273e-06, "loss": 1.0405, "step": 76925 }, { "epoch": 2.268317853457172, "grad_norm": 3.379110018772837, "learning_rate": 1.2788842834322136e-06, "loss": 1.0563, "step": 76930 }, { "epoch": 2.2684652808491816, "grad_norm": 3.5565697353314474, "learning_rate": 1.2784004634615019e-06, "loss": 1.0774, "step": 76935 }, { "epoch": 2.268612708241191, "grad_norm": 3.5105540395378325, "learning_rate": 1.2779167162204284e-06, "loss": 1.0094, "step": 76940 }, { "epoch": 2.2687601356332006, "grad_norm": 3.4398316162249887, "learning_rate": 1.277433041723229e-06, "loss": 1.0585, "step": 76945 }, { "epoch": 2.26890756302521, "grad_norm": 3.3267145761039707, "learning_rate": 1.2769494399841365e-06, "loss": 1.0103, "step": 76950 }, { "epoch": 2.2690549904172195, "grad_norm": 3.619948896740252, "learning_rate": 1.2764659110173785e-06, "loss": 1.0479, "step": 76955 }, { "epoch": 2.269202417809229, "grad_norm": 3.582916735343601, "learning_rate": 1.2759824548371877e-06, "loss": 1.0531, "step": 76960 }, { "epoch": 2.2693498452012384, "grad_norm": 3.6350563695347136, "learning_rate": 1.2754990714577872e-06, "loss": 1.0596, "step": 76965 }, { "epoch": 2.269497272593248, "grad_norm": 3.368147138778019, "learning_rate": 1.275015760893402e-06, "loss": 1.0252, "step": 76970 }, { "epoch": 2.2696446999852573, "grad_norm": 3.4449406939253318, "learning_rate": 1.2745325231582545e-06, "loss": 1.0478, "step": 76975 }, { "epoch": 2.2697921273772668, "grad_norm": 3.435292010951394, "learning_rate": 1.2740493582665635e-06, "loss": 1.0588, "step": 76980 }, { "epoch": 2.2699395547692762, "grad_norm": 3.5309874682238345, "learning_rate": 1.273566266232548e-06, "loss": 1.1088, "step": 76985 }, { "epoch": 2.2700869821612857, "grad_norm": 3.541945453003839, "learning_rate": 1.2730832470704226e-06, "loss": 1.0195, "step": 76990 }, { "epoch": 2.270234409553295, "grad_norm": 3.362559640071491, "learning_rate": 1.2726003007944023e-06, "loss": 0.9976, "step": 76995 }, { "epoch": 2.2703818369453046, "grad_norm": 3.450653212682694, "learning_rate": 1.2721174274186957e-06, "loss": 1.0518, "step": 77000 }, { "epoch": 2.2703818369453046, "eval_loss": 1.0846519470214844, "eval_runtime": 4.1388, "eval_samples_per_second": 95.679, "eval_steps_per_second": 3.141, "step": 77000 }, { "epoch": 2.270529264337314, "grad_norm": 3.618165440082663, "learning_rate": 1.2716346269575136e-06, "loss": 1.0622, "step": 77005 }, { "epoch": 2.2706766917293235, "grad_norm": 3.4730638288001905, "learning_rate": 1.2711518994250624e-06, "loss": 1.0426, "step": 77010 }, { "epoch": 2.2708241191213325, "grad_norm": 3.525202204511176, "learning_rate": 1.270669244835547e-06, "loss": 1.0658, "step": 77015 }, { "epoch": 2.270971546513342, "grad_norm": 3.4704944866982794, "learning_rate": 1.2701866632031699e-06, "loss": 1.0152, "step": 77020 }, { "epoch": 2.2711189739053514, "grad_norm": 3.459604492315983, "learning_rate": 1.2697041545421338e-06, "loss": 1.0556, "step": 77025 }, { "epoch": 2.271266401297361, "grad_norm": 3.409280042470529, "learning_rate": 1.269221718866632e-06, "loss": 1.0318, "step": 77030 }, { "epoch": 2.2714138286893704, "grad_norm": 3.5700703970058347, "learning_rate": 1.2687393561908669e-06, "loss": 1.0557, "step": 77035 }, { "epoch": 2.27156125608138, "grad_norm": 3.431724716358774, "learning_rate": 1.2682570665290268e-06, "loss": 1.0249, "step": 77040 }, { "epoch": 2.2717086834733893, "grad_norm": 3.5312214171572447, "learning_rate": 1.2677748498953088e-06, "loss": 1.0709, "step": 77045 }, { "epoch": 2.2718561108653987, "grad_norm": 3.4964138250510626, "learning_rate": 1.267292706303899e-06, "loss": 1.0327, "step": 77050 }, { "epoch": 2.272003538257408, "grad_norm": 3.5952859683176555, "learning_rate": 1.2668106357689867e-06, "loss": 1.0463, "step": 77055 }, { "epoch": 2.2721509656494177, "grad_norm": 3.5226328999014607, "learning_rate": 1.2663286383047576e-06, "loss": 1.0228, "step": 77060 }, { "epoch": 2.272298393041427, "grad_norm": 3.310356461526326, "learning_rate": 1.2658467139253921e-06, "loss": 1.003, "step": 77065 }, { "epoch": 2.2724458204334366, "grad_norm": 3.410806482660995, "learning_rate": 1.2653648626450764e-06, "loss": 1.0484, "step": 77070 }, { "epoch": 2.272593247825446, "grad_norm": 3.6002899268143556, "learning_rate": 1.2648830844779851e-06, "loss": 1.0682, "step": 77075 }, { "epoch": 2.2727406752174555, "grad_norm": 3.4300337399608143, "learning_rate": 1.264401379438297e-06, "loss": 1.0207, "step": 77080 }, { "epoch": 2.272888102609465, "grad_norm": 3.573088213287972, "learning_rate": 1.2639197475401864e-06, "loss": 1.0394, "step": 77085 }, { "epoch": 2.2730355300014744, "grad_norm": 3.34548839368033, "learning_rate": 1.2634381887978261e-06, "loss": 1.0348, "step": 77090 }, { "epoch": 2.273182957393484, "grad_norm": 3.6631073551712983, "learning_rate": 1.262956703225387e-06, "loss": 1.0409, "step": 77095 }, { "epoch": 2.2733303847854933, "grad_norm": 3.4290320805325694, "learning_rate": 1.2624752908370372e-06, "loss": 1.0429, "step": 77100 }, { "epoch": 2.2734778121775028, "grad_norm": 3.4758701286031313, "learning_rate": 1.2619939516469404e-06, "loss": 1.0428, "step": 77105 }, { "epoch": 2.273625239569512, "grad_norm": 3.1467896581964414, "learning_rate": 1.2615126856692652e-06, "loss": 0.9969, "step": 77110 }, { "epoch": 2.2737726669615217, "grad_norm": 3.5404363723906696, "learning_rate": 1.261031492918168e-06, "loss": 1.0169, "step": 77115 }, { "epoch": 2.2739200943535307, "grad_norm": 3.5221431347419836, "learning_rate": 1.2605503734078137e-06, "loss": 1.0564, "step": 77120 }, { "epoch": 2.27406752174554, "grad_norm": 3.4956617876836784, "learning_rate": 1.2600693271523563e-06, "loss": 1.0534, "step": 77125 }, { "epoch": 2.2742149491375496, "grad_norm": 3.482638007868387, "learning_rate": 1.2595883541659518e-06, "loss": 1.0728, "step": 77130 }, { "epoch": 2.274362376529559, "grad_norm": 3.561071513706765, "learning_rate": 1.2591074544627551e-06, "loss": 1.0629, "step": 77135 }, { "epoch": 2.2745098039215685, "grad_norm": 3.406713234214338, "learning_rate": 1.2586266280569136e-06, "loss": 1.043, "step": 77140 }, { "epoch": 2.274657231313578, "grad_norm": 3.4216037315757877, "learning_rate": 1.2581458749625808e-06, "loss": 1.0411, "step": 77145 }, { "epoch": 2.2748046587055875, "grad_norm": 3.621357651851221, "learning_rate": 1.2576651951938997e-06, "loss": 1.0675, "step": 77150 }, { "epoch": 2.274952086097597, "grad_norm": 3.453629666041703, "learning_rate": 1.2571845887650158e-06, "loss": 1.0479, "step": 77155 }, { "epoch": 2.2750995134896064, "grad_norm": 3.464662346853899, "learning_rate": 1.2567040556900724e-06, "loss": 1.0559, "step": 77160 }, { "epoch": 2.275246940881616, "grad_norm": 3.450876978052927, "learning_rate": 1.2562235959832095e-06, "loss": 1.041, "step": 77165 }, { "epoch": 2.2753943682736253, "grad_norm": 3.369656445611853, "learning_rate": 1.2557432096585641e-06, "loss": 0.9959, "step": 77170 }, { "epoch": 2.2755417956656347, "grad_norm": 3.4688517294699728, "learning_rate": 1.2552628967302732e-06, "loss": 1.0631, "step": 77175 }, { "epoch": 2.275689223057644, "grad_norm": 3.469290120053048, "learning_rate": 1.2547826572124705e-06, "loss": 1.0412, "step": 77180 }, { "epoch": 2.2758366504496537, "grad_norm": 3.439481689319106, "learning_rate": 1.2543024911192883e-06, "loss": 1.0439, "step": 77185 }, { "epoch": 2.275984077841663, "grad_norm": 3.5553355669686497, "learning_rate": 1.2538223984648541e-06, "loss": 1.0679, "step": 77190 }, { "epoch": 2.2761315052336726, "grad_norm": 3.474322510398696, "learning_rate": 1.2533423792632958e-06, "loss": 1.0475, "step": 77195 }, { "epoch": 2.276278932625682, "grad_norm": 3.3905916877758204, "learning_rate": 1.2528624335287392e-06, "loss": 1.0847, "step": 77200 }, { "epoch": 2.276426360017691, "grad_norm": 3.419432105364414, "learning_rate": 1.2523825612753071e-06, "loss": 1.0804, "step": 77205 }, { "epoch": 2.276573787409701, "grad_norm": 3.398524912943206, "learning_rate": 1.251902762517121e-06, "loss": 1.0193, "step": 77210 }, { "epoch": 2.27672121480171, "grad_norm": 3.312681639115634, "learning_rate": 1.251423037268296e-06, "loss": 1.023, "step": 77215 }, { "epoch": 2.2768686421937194, "grad_norm": 3.6328202589573184, "learning_rate": 1.250943385542954e-06, "loss": 1.077, "step": 77220 }, { "epoch": 2.277016069585729, "grad_norm": 3.528912075107536, "learning_rate": 1.2504638073552052e-06, "loss": 1.045, "step": 77225 }, { "epoch": 2.2771634969777383, "grad_norm": 3.401324559401551, "learning_rate": 1.2499843027191629e-06, "loss": 1.0544, "step": 77230 }, { "epoch": 2.277310924369748, "grad_norm": 3.576140625147377, "learning_rate": 1.2495048716489374e-06, "loss": 1.0494, "step": 77235 }, { "epoch": 2.2774583517617573, "grad_norm": 3.3113473980655934, "learning_rate": 1.2490255141586362e-06, "loss": 1.0519, "step": 77240 }, { "epoch": 2.2776057791537667, "grad_norm": 3.412664074568432, "learning_rate": 1.2485462302623653e-06, "loss": 1.0725, "step": 77245 }, { "epoch": 2.277753206545776, "grad_norm": 3.5432233390578465, "learning_rate": 1.2480670199742277e-06, "loss": 1.0635, "step": 77250 }, { "epoch": 2.2779006339377856, "grad_norm": 3.3241732999625855, "learning_rate": 1.247587883308326e-06, "loss": 0.9964, "step": 77255 }, { "epoch": 2.278048061329795, "grad_norm": 3.2477316080273413, "learning_rate": 1.2471088202787572e-06, "loss": 1.0459, "step": 77260 }, { "epoch": 2.2781954887218046, "grad_norm": 3.42172944241066, "learning_rate": 1.2466298308996196e-06, "loss": 1.039, "step": 77265 }, { "epoch": 2.278342916113814, "grad_norm": 3.3989767655825918, "learning_rate": 1.2461509151850076e-06, "loss": 1.0814, "step": 77270 }, { "epoch": 2.2784903435058235, "grad_norm": 3.420817645585324, "learning_rate": 1.245672073149014e-06, "loss": 1.0253, "step": 77275 }, { "epoch": 2.278637770897833, "grad_norm": 3.46363833016575, "learning_rate": 1.2451933048057293e-06, "loss": 1.0577, "step": 77280 }, { "epoch": 2.2787851982898424, "grad_norm": 3.5228023575085565, "learning_rate": 1.244714610169243e-06, "loss": 1.0771, "step": 77285 }, { "epoch": 2.278932625681852, "grad_norm": 3.5653723230646883, "learning_rate": 1.2442359892536378e-06, "loss": 1.0584, "step": 77290 }, { "epoch": 2.2790800530738613, "grad_norm": 3.3713405537628023, "learning_rate": 1.243757442073002e-06, "loss": 1.0003, "step": 77295 }, { "epoch": 2.2792274804658703, "grad_norm": 3.370971511938083, "learning_rate": 1.2432789686414133e-06, "loss": 1.0457, "step": 77300 }, { "epoch": 2.27937490785788, "grad_norm": 3.5734214579148422, "learning_rate": 1.2428005689729555e-06, "loss": 1.0608, "step": 77305 }, { "epoch": 2.2795223352498892, "grad_norm": 3.44118972501599, "learning_rate": 1.2423222430817025e-06, "loss": 1.079, "step": 77310 }, { "epoch": 2.2796697626418987, "grad_norm": 3.3567524935326385, "learning_rate": 1.2418439909817313e-06, "loss": 1.0527, "step": 77315 }, { "epoch": 2.279817190033908, "grad_norm": 3.4548288501674467, "learning_rate": 1.2413658126871156e-06, "loss": 1.0508, "step": 77320 }, { "epoch": 2.2799646174259176, "grad_norm": 3.317139849024563, "learning_rate": 1.2408877082119231e-06, "loss": 1.0399, "step": 77325 }, { "epoch": 2.280112044817927, "grad_norm": 3.457570463471214, "learning_rate": 1.2404096775702273e-06, "loss": 1.0735, "step": 77330 }, { "epoch": 2.2802594722099365, "grad_norm": 3.4837111193920705, "learning_rate": 1.239931720776091e-06, "loss": 1.0868, "step": 77335 }, { "epoch": 2.280406899601946, "grad_norm": 3.4264202120503273, "learning_rate": 1.2394538378435801e-06, "loss": 1.0753, "step": 77340 }, { "epoch": 2.2805543269939554, "grad_norm": 3.397310664373133, "learning_rate": 1.238976028786757e-06, "loss": 1.0453, "step": 77345 }, { "epoch": 2.280701754385965, "grad_norm": 3.631749380150686, "learning_rate": 1.2384982936196814e-06, "loss": 1.0514, "step": 77350 }, { "epoch": 2.2808491817779744, "grad_norm": 3.521844697281418, "learning_rate": 1.2380206323564115e-06, "loss": 1.0688, "step": 77355 }, { "epoch": 2.280996609169984, "grad_norm": 3.5962322253241497, "learning_rate": 1.2375430450110038e-06, "loss": 1.0628, "step": 77360 }, { "epoch": 2.2811440365619933, "grad_norm": 3.4648125878511067, "learning_rate": 1.2370655315975089e-06, "loss": 1.024, "step": 77365 }, { "epoch": 2.2812914639540027, "grad_norm": 3.5327655131275715, "learning_rate": 1.2365880921299822e-06, "loss": 1.0428, "step": 77370 }, { "epoch": 2.281438891346012, "grad_norm": 3.56444708575668, "learning_rate": 1.236110726622469e-06, "loss": 1.067, "step": 77375 }, { "epoch": 2.2815863187380216, "grad_norm": 3.483523072215537, "learning_rate": 1.23563343508902e-06, "loss": 1.0941, "step": 77380 }, { "epoch": 2.281733746130031, "grad_norm": 3.5441380139840435, "learning_rate": 1.2351562175436774e-06, "loss": 1.0652, "step": 77385 }, { "epoch": 2.2818811735220406, "grad_norm": 3.366007985035125, "learning_rate": 1.2346790740004846e-06, "loss": 1.0637, "step": 77390 }, { "epoch": 2.28202860091405, "grad_norm": 3.505060250132341, "learning_rate": 1.2342020044734838e-06, "loss": 1.0351, "step": 77395 }, { "epoch": 2.2821760283060595, "grad_norm": 3.449789735854027, "learning_rate": 1.233725008976709e-06, "loss": 1.069, "step": 77400 }, { "epoch": 2.2823234556980685, "grad_norm": 3.5317212289533937, "learning_rate": 1.2332480875242017e-06, "loss": 1.0421, "step": 77405 }, { "epoch": 2.282470883090078, "grad_norm": 3.597066298016008, "learning_rate": 1.232771240129992e-06, "loss": 1.0646, "step": 77410 }, { "epoch": 2.2826183104820874, "grad_norm": 3.511620474573686, "learning_rate": 1.2322944668081127e-06, "loss": 1.0643, "step": 77415 }, { "epoch": 2.282765737874097, "grad_norm": 3.4198790260070875, "learning_rate": 1.231817767572594e-06, "loss": 1.0299, "step": 77420 }, { "epoch": 2.2829131652661063, "grad_norm": 3.424264246441437, "learning_rate": 1.2313411424374628e-06, "loss": 1.0516, "step": 77425 }, { "epoch": 2.283060592658116, "grad_norm": 3.533713574584476, "learning_rate": 1.2308645914167446e-06, "loss": 1.0912, "step": 77430 }, { "epoch": 2.2832080200501252, "grad_norm": 3.4885621688607826, "learning_rate": 1.230388114524462e-06, "loss": 1.0936, "step": 77435 }, { "epoch": 2.2833554474421347, "grad_norm": 3.542690580800081, "learning_rate": 1.2299117117746365e-06, "loss": 1.032, "step": 77440 }, { "epoch": 2.283502874834144, "grad_norm": 3.53435590589159, "learning_rate": 1.2294353831812875e-06, "loss": 1.0707, "step": 77445 }, { "epoch": 2.2836503022261536, "grad_norm": 3.368143022172836, "learning_rate": 1.2289591287584297e-06, "loss": 1.0519, "step": 77450 }, { "epoch": 2.283797729618163, "grad_norm": 3.532036650485331, "learning_rate": 1.2284829485200778e-06, "loss": 1.0636, "step": 77455 }, { "epoch": 2.2839451570101725, "grad_norm": 3.4874839633045784, "learning_rate": 1.228006842480245e-06, "loss": 1.045, "step": 77460 }, { "epoch": 2.284092584402182, "grad_norm": 3.4492038571021495, "learning_rate": 1.2275308106529403e-06, "loss": 1.0624, "step": 77465 }, { "epoch": 2.2842400117941914, "grad_norm": 3.318353680685378, "learning_rate": 1.227054853052173e-06, "loss": 1.0326, "step": 77470 }, { "epoch": 2.284387439186201, "grad_norm": 3.569216178930244, "learning_rate": 1.2265789696919455e-06, "loss": 1.0937, "step": 77475 }, { "epoch": 2.2845348665782104, "grad_norm": 3.371727598887326, "learning_rate": 1.2261031605862654e-06, "loss": 1.0325, "step": 77480 }, { "epoch": 2.28468229397022, "grad_norm": 3.634937741939421, "learning_rate": 1.2256274257491302e-06, "loss": 1.0514, "step": 77485 }, { "epoch": 2.2848297213622293, "grad_norm": 3.3906608619791876, "learning_rate": 1.2251517651945412e-06, "loss": 1.0137, "step": 77490 }, { "epoch": 2.2849771487542387, "grad_norm": 3.3709074630058136, "learning_rate": 1.2246761789364943e-06, "loss": 1.0593, "step": 77495 }, { "epoch": 2.2851245761462478, "grad_norm": 3.4829368431686056, "learning_rate": 1.2242006669889846e-06, "loss": 1.0461, "step": 77500 }, { "epoch": 2.2851245761462478, "eval_loss": 1.0842194557189941, "eval_runtime": 4.3129, "eval_samples_per_second": 91.818, "eval_steps_per_second": 3.014, "step": 77500 }, { "epoch": 2.2852720035382577, "grad_norm": 3.7639442267679617, "learning_rate": 1.2237252293660042e-06, "loss": 1.0817, "step": 77505 }, { "epoch": 2.2854194309302667, "grad_norm": 3.4502051707268175, "learning_rate": 1.223249866081544e-06, "loss": 0.9922, "step": 77510 }, { "epoch": 2.285566858322276, "grad_norm": 3.7197743352589656, "learning_rate": 1.2227745771495923e-06, "loss": 1.0678, "step": 77515 }, { "epoch": 2.2857142857142856, "grad_norm": 3.4232572320330545, "learning_rate": 1.222299362584134e-06, "loss": 1.0987, "step": 77520 }, { "epoch": 2.285861713106295, "grad_norm": 3.443368355897209, "learning_rate": 1.2218242223991525e-06, "loss": 1.0502, "step": 77525 }, { "epoch": 2.2860091404983045, "grad_norm": 3.4991305586331496, "learning_rate": 1.2213491566086305e-06, "loss": 1.0386, "step": 77530 }, { "epoch": 2.286156567890314, "grad_norm": 3.444759500599603, "learning_rate": 1.2208741652265472e-06, "loss": 1.0297, "step": 77535 }, { "epoch": 2.2863039952823234, "grad_norm": 3.3885969081077896, "learning_rate": 1.2203992482668796e-06, "loss": 1.0251, "step": 77540 }, { "epoch": 2.286451422674333, "grad_norm": 3.5348520206617557, "learning_rate": 1.2199244057436035e-06, "loss": 1.0737, "step": 77545 }, { "epoch": 2.2865988500663423, "grad_norm": 3.673084753739975, "learning_rate": 1.2194496376706884e-06, "loss": 1.096, "step": 77550 }, { "epoch": 2.286746277458352, "grad_norm": 3.5208330045374363, "learning_rate": 1.21897494406211e-06, "loss": 1.0669, "step": 77555 }, { "epoch": 2.2868937048503613, "grad_norm": 3.4539012763324326, "learning_rate": 1.2185003249318309e-06, "loss": 1.0202, "step": 77560 }, { "epoch": 2.2870411322423707, "grad_norm": 3.4972216858558673, "learning_rate": 1.2180257802938234e-06, "loss": 1.0435, "step": 77565 }, { "epoch": 2.28718855963438, "grad_norm": 3.463323140277082, "learning_rate": 1.2175513101620475e-06, "loss": 1.0184, "step": 77570 }, { "epoch": 2.2873359870263896, "grad_norm": 3.2087545892804616, "learning_rate": 1.2170769145504658e-06, "loss": 1.0321, "step": 77575 }, { "epoch": 2.287483414418399, "grad_norm": 3.286902368870109, "learning_rate": 1.2166025934730392e-06, "loss": 1.0327, "step": 77580 }, { "epoch": 2.2876308418104085, "grad_norm": 3.4822002979810414, "learning_rate": 1.216128346943722e-06, "loss": 1.0164, "step": 77585 }, { "epoch": 2.287778269202418, "grad_norm": 3.4648181244343648, "learning_rate": 1.2156541749764736e-06, "loss": 1.023, "step": 77590 }, { "epoch": 2.287925696594427, "grad_norm": 3.452444683698894, "learning_rate": 1.2151800775852441e-06, "loss": 1.0697, "step": 77595 }, { "epoch": 2.288073123986437, "grad_norm": 3.4796113205760917, "learning_rate": 1.2147060547839855e-06, "loss": 1.0641, "step": 77600 }, { "epoch": 2.288220551378446, "grad_norm": 3.4316407349963596, "learning_rate": 1.214232106586646e-06, "loss": 1.0443, "step": 77605 }, { "epoch": 2.2883679787704554, "grad_norm": 3.5368703526936285, "learning_rate": 1.2137582330071722e-06, "loss": 1.0676, "step": 77610 }, { "epoch": 2.288515406162465, "grad_norm": 3.3452840960653583, "learning_rate": 1.2132844340595085e-06, "loss": 1.0345, "step": 77615 }, { "epoch": 2.2886628335544743, "grad_norm": 3.3946230558345296, "learning_rate": 1.2128107097575982e-06, "loss": 1.0458, "step": 77620 }, { "epoch": 2.2888102609464838, "grad_norm": 3.519077226998789, "learning_rate": 1.212337060115378e-06, "loss": 1.0776, "step": 77625 }, { "epoch": 2.2889576883384932, "grad_norm": 3.2396180570464863, "learning_rate": 1.2118634851467893e-06, "loss": 0.9941, "step": 77630 }, { "epoch": 2.2891051157305027, "grad_norm": 3.3577701342945194, "learning_rate": 1.2113899848657634e-06, "loss": 1.0741, "step": 77635 }, { "epoch": 2.289252543122512, "grad_norm": 3.70657320601139, "learning_rate": 1.2109165592862386e-06, "loss": 1.0954, "step": 77640 }, { "epoch": 2.2893999705145216, "grad_norm": 3.5411817362296096, "learning_rate": 1.210443208422142e-06, "loss": 1.0664, "step": 77645 }, { "epoch": 2.289547397906531, "grad_norm": 3.4388970454006027, "learning_rate": 1.209969932287404e-06, "loss": 0.9944, "step": 77650 }, { "epoch": 2.2896948252985405, "grad_norm": 3.5360133816376043, "learning_rate": 1.2094967308959525e-06, "loss": 1.0297, "step": 77655 }, { "epoch": 2.28984225269055, "grad_norm": 3.518064609672314, "learning_rate": 1.2090236042617078e-06, "loss": 1.0859, "step": 77660 }, { "epoch": 2.2899896800825594, "grad_norm": 3.5555659230912293, "learning_rate": 1.2085505523985978e-06, "loss": 1.0425, "step": 77665 }, { "epoch": 2.290137107474569, "grad_norm": 3.4055511355166317, "learning_rate": 1.2080775753205389e-06, "loss": 1.0376, "step": 77670 }, { "epoch": 2.2902845348665783, "grad_norm": 3.473136447050902, "learning_rate": 1.2076046730414493e-06, "loss": 1.0685, "step": 77675 }, { "epoch": 2.290431962258588, "grad_norm": 3.450518302982846, "learning_rate": 1.2071318455752458e-06, "loss": 1.0585, "step": 77680 }, { "epoch": 2.2905793896505973, "grad_norm": 3.5882452657239536, "learning_rate": 1.206659092935841e-06, "loss": 1.0577, "step": 77685 }, { "epoch": 2.2907268170426063, "grad_norm": 3.4614873225256213, "learning_rate": 1.2061864151371466e-06, "loss": 1.0334, "step": 77690 }, { "epoch": 2.290874244434616, "grad_norm": 3.372336927015124, "learning_rate": 1.205713812193072e-06, "loss": 1.0671, "step": 77695 }, { "epoch": 2.291021671826625, "grad_norm": 3.4639808098089424, "learning_rate": 1.2052412841175234e-06, "loss": 1.058, "step": 77700 }, { "epoch": 2.2911690992186347, "grad_norm": 3.3786025518013356, "learning_rate": 1.2047688309244074e-06, "loss": 1.0288, "step": 77705 }, { "epoch": 2.291316526610644, "grad_norm": 3.5292664865451076, "learning_rate": 1.2042964526276234e-06, "loss": 1.0724, "step": 77710 }, { "epoch": 2.2914639540026536, "grad_norm": 3.2642531072332637, "learning_rate": 1.2038241492410733e-06, "loss": 1.0665, "step": 77715 }, { "epoch": 2.291611381394663, "grad_norm": 3.5141137243456555, "learning_rate": 1.203351920778655e-06, "loss": 1.0248, "step": 77720 }, { "epoch": 2.2917588087866725, "grad_norm": 3.7049324739394427, "learning_rate": 1.2028797672542642e-06, "loss": 1.0632, "step": 77725 }, { "epoch": 2.291906236178682, "grad_norm": 3.4245701877390875, "learning_rate": 1.2024076886817964e-06, "loss": 1.0202, "step": 77730 }, { "epoch": 2.2920536635706914, "grad_norm": 3.4220704484341455, "learning_rate": 1.2019356850751389e-06, "loss": 1.0723, "step": 77735 }, { "epoch": 2.292201090962701, "grad_norm": 3.503697472338726, "learning_rate": 1.2014637564481853e-06, "loss": 1.034, "step": 77740 }, { "epoch": 2.2923485183547103, "grad_norm": 3.5884071983730887, "learning_rate": 1.2009919028148204e-06, "loss": 1.0657, "step": 77745 }, { "epoch": 2.2924959457467198, "grad_norm": 3.4344190636313496, "learning_rate": 1.2005201241889287e-06, "loss": 1.0301, "step": 77750 }, { "epoch": 2.2926433731387292, "grad_norm": 3.5549965360551514, "learning_rate": 1.200048420584394e-06, "loss": 1.0442, "step": 77755 }, { "epoch": 2.2927908005307387, "grad_norm": 3.56922482161351, "learning_rate": 1.1995767920150958e-06, "loss": 1.0647, "step": 77760 }, { "epoch": 2.292938227922748, "grad_norm": 3.775255330622815, "learning_rate": 1.1991052384949135e-06, "loss": 1.0896, "step": 77765 }, { "epoch": 2.2930856553147576, "grad_norm": 3.4381146844493986, "learning_rate": 1.1986337600377219e-06, "loss": 1.0437, "step": 77770 }, { "epoch": 2.293233082706767, "grad_norm": 3.522444982364009, "learning_rate": 1.1981623566573963e-06, "loss": 1.0549, "step": 77775 }, { "epoch": 2.2933805100987765, "grad_norm": 3.488760553760466, "learning_rate": 1.1976910283678063e-06, "loss": 1.0294, "step": 77780 }, { "epoch": 2.2935279374907855, "grad_norm": 3.416785491411154, "learning_rate": 1.1972197751828223e-06, "loss": 1.0103, "step": 77785 }, { "epoch": 2.2936753648827954, "grad_norm": 3.603523973972127, "learning_rate": 1.1967485971163115e-06, "loss": 1.0494, "step": 77790 }, { "epoch": 2.2938227922748045, "grad_norm": 3.5920059146975682, "learning_rate": 1.1962774941821383e-06, "loss": 1.0902, "step": 77795 }, { "epoch": 2.293970219666814, "grad_norm": 3.416283614554438, "learning_rate": 1.1958064663941662e-06, "loss": 1.0524, "step": 77800 }, { "epoch": 2.2941176470588234, "grad_norm": 3.5879139264959123, "learning_rate": 1.1953355137662565e-06, "loss": 1.0903, "step": 77805 }, { "epoch": 2.294265074450833, "grad_norm": 3.4798485478461196, "learning_rate": 1.194864636312264e-06, "loss": 1.0543, "step": 77810 }, { "epoch": 2.2944125018428423, "grad_norm": 3.4449393114990006, "learning_rate": 1.1943938340460494e-06, "loss": 1.06, "step": 77815 }, { "epoch": 2.2945599292348517, "grad_norm": 3.662203346879699, "learning_rate": 1.1939231069814627e-06, "loss": 1.1034, "step": 77820 }, { "epoch": 2.294707356626861, "grad_norm": 3.3998481574065913, "learning_rate": 1.1934524551323594e-06, "loss": 1.0136, "step": 77825 }, { "epoch": 2.2948547840188707, "grad_norm": 3.408676387930992, "learning_rate": 1.1929818785125853e-06, "loss": 1.0746, "step": 77830 }, { "epoch": 2.29500221141088, "grad_norm": 3.2851627132597403, "learning_rate": 1.1925113771359893e-06, "loss": 1.036, "step": 77835 }, { "epoch": 2.2951496388028896, "grad_norm": 3.632873576583587, "learning_rate": 1.192040951016417e-06, "loss": 1.0722, "step": 77840 }, { "epoch": 2.295297066194899, "grad_norm": 3.511062155411504, "learning_rate": 1.1915706001677102e-06, "loss": 1.0974, "step": 77845 }, { "epoch": 2.2954444935869085, "grad_norm": 3.319793345115476, "learning_rate": 1.191100324603711e-06, "loss": 1.0484, "step": 77850 }, { "epoch": 2.295591920978918, "grad_norm": 3.3540811941371036, "learning_rate": 1.1906301243382555e-06, "loss": 1.0212, "step": 77855 }, { "epoch": 2.2957393483709274, "grad_norm": 3.458773404269387, "learning_rate": 1.1901599993851814e-06, "loss": 1.035, "step": 77860 }, { "epoch": 2.295886775762937, "grad_norm": 3.524724980399442, "learning_rate": 1.1896899497583222e-06, "loss": 1.0191, "step": 77865 }, { "epoch": 2.2960342031549463, "grad_norm": 3.570240093540371, "learning_rate": 1.1892199754715097e-06, "loss": 1.0229, "step": 77870 }, { "epoch": 2.296181630546956, "grad_norm": 3.5933251414202094, "learning_rate": 1.1887500765385737e-06, "loss": 1.0128, "step": 77875 }, { "epoch": 2.2963290579389652, "grad_norm": 3.6100719658409384, "learning_rate": 1.1882802529733421e-06, "loss": 1.0812, "step": 77880 }, { "epoch": 2.2964764853309747, "grad_norm": 3.289705070744346, "learning_rate": 1.1878105047896372e-06, "loss": 1.0279, "step": 77885 }, { "epoch": 2.2966239127229837, "grad_norm": 3.3768035003396006, "learning_rate": 1.1873408320012864e-06, "loss": 1.0455, "step": 77890 }, { "epoch": 2.296771340114993, "grad_norm": 3.2923083342454267, "learning_rate": 1.1868712346221051e-06, "loss": 1.0518, "step": 77895 }, { "epoch": 2.2969187675070026, "grad_norm": 3.570445042084234, "learning_rate": 1.1864017126659172e-06, "loss": 1.1132, "step": 77900 }, { "epoch": 2.297066194899012, "grad_norm": 3.2936886600585633, "learning_rate": 1.1859322661465349e-06, "loss": 1.0377, "step": 77905 }, { "epoch": 2.2972136222910216, "grad_norm": 3.459278509158535, "learning_rate": 1.1854628950777738e-06, "loss": 1.0564, "step": 77910 }, { "epoch": 2.297361049683031, "grad_norm": 3.5764745021958024, "learning_rate": 1.1849935994734462e-06, "loss": 1.0357, "step": 77915 }, { "epoch": 2.2975084770750405, "grad_norm": 3.5488036399405907, "learning_rate": 1.1845243793473588e-06, "loss": 1.0295, "step": 77920 }, { "epoch": 2.29765590446705, "grad_norm": 3.4441664743626053, "learning_rate": 1.1840552347133231e-06, "loss": 1.0452, "step": 77925 }, { "epoch": 2.2978033318590594, "grad_norm": 3.4345994874618273, "learning_rate": 1.183586165585141e-06, "loss": 1.0791, "step": 77930 }, { "epoch": 2.297950759251069, "grad_norm": 3.7246114577274247, "learning_rate": 1.1831171719766168e-06, "loss": 1.0943, "step": 77935 }, { "epoch": 2.2980981866430783, "grad_norm": 3.3377471961949396, "learning_rate": 1.1826482539015507e-06, "loss": 1.028, "step": 77940 }, { "epoch": 2.2982456140350878, "grad_norm": 3.4197060425290564, "learning_rate": 1.1821794113737413e-06, "loss": 1.0474, "step": 77945 }, { "epoch": 2.298393041427097, "grad_norm": 3.559776380298634, "learning_rate": 1.1817106444069848e-06, "loss": 1.0863, "step": 77950 }, { "epoch": 2.2985404688191067, "grad_norm": 3.4908264630281853, "learning_rate": 1.1812419530150751e-06, "loss": 1.026, "step": 77955 }, { "epoch": 2.298687896211116, "grad_norm": 3.5205294502108297, "learning_rate": 1.1807733372118045e-06, "loss": 1.0634, "step": 77960 }, { "epoch": 2.2988353236031256, "grad_norm": 3.4146670759296667, "learning_rate": 1.1803047970109632e-06, "loss": 1.0246, "step": 77965 }, { "epoch": 2.298982750995135, "grad_norm": 3.597718713630338, "learning_rate": 1.1798363324263354e-06, "loss": 1.0675, "step": 77970 }, { "epoch": 2.2991301783871445, "grad_norm": 3.5455502991393826, "learning_rate": 1.179367943471711e-06, "loss": 1.0762, "step": 77975 }, { "epoch": 2.299277605779154, "grad_norm": 3.476349340780963, "learning_rate": 1.1788996301608685e-06, "loss": 1.0262, "step": 77980 }, { "epoch": 2.299425033171163, "grad_norm": 3.453876735276916, "learning_rate": 1.17843139250759e-06, "loss": 1.0475, "step": 77985 }, { "epoch": 2.299572460563173, "grad_norm": 3.251624426460169, "learning_rate": 1.1779632305256557e-06, "loss": 1.0064, "step": 77990 }, { "epoch": 2.299719887955182, "grad_norm": 3.3471406312964187, "learning_rate": 1.177495144228838e-06, "loss": 1.0481, "step": 77995 }, { "epoch": 2.2998673153471914, "grad_norm": 3.6072281131402, "learning_rate": 1.1770271336309156e-06, "loss": 1.0185, "step": 78000 }, { "epoch": 2.2998673153471914, "eval_loss": 1.0835130214691162, "eval_runtime": 4.2217, "eval_samples_per_second": 93.801, "eval_steps_per_second": 3.079, "step": 78000 }, { "epoch": 2.300014742739201, "grad_norm": 3.4719132264106216, "learning_rate": 1.1765591987456568e-06, "loss": 1.0064, "step": 78005 }, { "epoch": 2.3001621701312103, "grad_norm": 3.5251747090916115, "learning_rate": 1.1760913395868315e-06, "loss": 1.055, "step": 78010 }, { "epoch": 2.3003095975232197, "grad_norm": 3.6134282838758076, "learning_rate": 1.175623556168208e-06, "loss": 1.0512, "step": 78015 }, { "epoch": 2.300457024915229, "grad_norm": 3.484703452306829, "learning_rate": 1.1751558485035513e-06, "loss": 1.0337, "step": 78020 }, { "epoch": 2.3006044523072386, "grad_norm": 3.382418549639187, "learning_rate": 1.1746882166066234e-06, "loss": 1.0091, "step": 78025 }, { "epoch": 2.300751879699248, "grad_norm": 3.6688396442711957, "learning_rate": 1.1742206604911856e-06, "loss": 1.0389, "step": 78030 }, { "epoch": 2.3008993070912576, "grad_norm": 3.4335920899524135, "learning_rate": 1.1737531801709963e-06, "loss": 1.0517, "step": 78035 }, { "epoch": 2.301046734483267, "grad_norm": 3.4896233242973795, "learning_rate": 1.1732857756598124e-06, "loss": 1.0562, "step": 78040 }, { "epoch": 2.3011941618752765, "grad_norm": 3.4726878505852006, "learning_rate": 1.1728184469713857e-06, "loss": 1.0609, "step": 78045 }, { "epoch": 2.301341589267286, "grad_norm": 3.467960331366657, "learning_rate": 1.1723511941194693e-06, "loss": 1.0521, "step": 78050 }, { "epoch": 2.3014890166592954, "grad_norm": 3.6188217454608087, "learning_rate": 1.171884017117812e-06, "loss": 1.07, "step": 78055 }, { "epoch": 2.301636444051305, "grad_norm": 3.650019390462241, "learning_rate": 1.1714169159801615e-06, "loss": 1.0714, "step": 78060 }, { "epoch": 2.3017838714433143, "grad_norm": 3.38155575287846, "learning_rate": 1.170949890720264e-06, "loss": 1.0665, "step": 78065 }, { "epoch": 2.3019312988353238, "grad_norm": 3.347699052517672, "learning_rate": 1.1704829413518582e-06, "loss": 1.0478, "step": 78070 }, { "epoch": 2.3020787262273332, "grad_norm": 3.574953747576644, "learning_rate": 1.1700160678886902e-06, "loss": 1.0692, "step": 78075 }, { "epoch": 2.3022261536193422, "grad_norm": 3.5872652339438273, "learning_rate": 1.169549270344493e-06, "loss": 1.0654, "step": 78080 }, { "epoch": 2.302373581011352, "grad_norm": 3.4233321947844106, "learning_rate": 1.169082548733007e-06, "loss": 1.0771, "step": 78085 }, { "epoch": 2.302521008403361, "grad_norm": 3.515563144610301, "learning_rate": 1.1686159030679635e-06, "loss": 1.0987, "step": 78090 }, { "epoch": 2.3026684357953706, "grad_norm": 3.3249294644421674, "learning_rate": 1.1681493333630944e-06, "loss": 1.0351, "step": 78095 }, { "epoch": 2.30281586318738, "grad_norm": 3.281244448679736, "learning_rate": 1.1676828396321292e-06, "loss": 1.0834, "step": 78100 }, { "epoch": 2.3029632905793895, "grad_norm": 3.4284894205250924, "learning_rate": 1.1672164218887953e-06, "loss": 1.061, "step": 78105 }, { "epoch": 2.303110717971399, "grad_norm": 3.450352873745609, "learning_rate": 1.1667500801468185e-06, "loss": 1.0384, "step": 78110 }, { "epoch": 2.3032581453634084, "grad_norm": 3.6108862147147542, "learning_rate": 1.1662838144199194e-06, "loss": 1.06, "step": 78115 }, { "epoch": 2.303405572755418, "grad_norm": 3.619288934572965, "learning_rate": 1.1658176247218194e-06, "loss": 1.0462, "step": 78120 }, { "epoch": 2.3035530001474274, "grad_norm": 3.6250051315266614, "learning_rate": 1.1653515110662361e-06, "loss": 1.0556, "step": 78125 }, { "epoch": 2.303700427539437, "grad_norm": 3.465035137230864, "learning_rate": 1.1648854734668862e-06, "loss": 1.0751, "step": 78130 }, { "epoch": 2.3038478549314463, "grad_norm": 3.379372561499627, "learning_rate": 1.1644195119374833e-06, "loss": 1.0829, "step": 78135 }, { "epoch": 2.3039952823234557, "grad_norm": 3.465514444852436, "learning_rate": 1.16395362649174e-06, "loss": 1.0699, "step": 78140 }, { "epoch": 2.304142709715465, "grad_norm": 3.5431567514405353, "learning_rate": 1.1634878171433615e-06, "loss": 1.0493, "step": 78145 }, { "epoch": 2.3042901371074747, "grad_norm": 3.607984100713724, "learning_rate": 1.1630220839060598e-06, "loss": 1.0696, "step": 78150 }, { "epoch": 2.304437564499484, "grad_norm": 3.4110363139517332, "learning_rate": 1.162556426793535e-06, "loss": 1.0206, "step": 78155 }, { "epoch": 2.3045849918914936, "grad_norm": 3.307870082948728, "learning_rate": 1.1620908458194944e-06, "loss": 1.0414, "step": 78160 }, { "epoch": 2.304732419283503, "grad_norm": 3.5194755103644337, "learning_rate": 1.1616253409976346e-06, "loss": 1.0317, "step": 78165 }, { "epoch": 2.3048798466755125, "grad_norm": 3.3634170187522328, "learning_rate": 1.1611599123416546e-06, "loss": 1.0117, "step": 78170 }, { "epoch": 2.3050272740675215, "grad_norm": 3.522264059795253, "learning_rate": 1.1606945598652516e-06, "loss": 1.0581, "step": 78175 }, { "epoch": 2.3051747014595314, "grad_norm": 3.3826110705950465, "learning_rate": 1.160229283582115e-06, "loss": 1.0729, "step": 78180 }, { "epoch": 2.3053221288515404, "grad_norm": 3.406541640811323, "learning_rate": 1.159764083505942e-06, "loss": 1.0043, "step": 78185 }, { "epoch": 2.30546955624355, "grad_norm": 3.524739188874198, "learning_rate": 1.1592989596504173e-06, "loss": 1.0329, "step": 78190 }, { "epoch": 2.3056169836355593, "grad_norm": 3.51363636311614, "learning_rate": 1.1588339120292286e-06, "loss": 1.0827, "step": 78195 }, { "epoch": 2.305764411027569, "grad_norm": 3.535406707897665, "learning_rate": 1.158368940656061e-06, "loss": 1.0365, "step": 78200 }, { "epoch": 2.3059118384195783, "grad_norm": 3.4551459514904397, "learning_rate": 1.1579040455445968e-06, "loss": 1.0227, "step": 78205 }, { "epoch": 2.3060592658115877, "grad_norm": 3.3612981011980767, "learning_rate": 1.157439226708516e-06, "loss": 1.0792, "step": 78210 }, { "epoch": 2.306206693203597, "grad_norm": 3.6361575321266497, "learning_rate": 1.1569744841614962e-06, "loss": 1.0915, "step": 78215 }, { "epoch": 2.3063541205956066, "grad_norm": 3.4836487812815626, "learning_rate": 1.156509817917213e-06, "loss": 1.0232, "step": 78220 }, { "epoch": 2.306501547987616, "grad_norm": 3.624388503469822, "learning_rate": 1.1560452279893413e-06, "loss": 1.0974, "step": 78225 }, { "epoch": 2.3066489753796255, "grad_norm": 3.517832440933852, "learning_rate": 1.155580714391548e-06, "loss": 1.0878, "step": 78230 }, { "epoch": 2.306796402771635, "grad_norm": 3.5340371303206624, "learning_rate": 1.1551162771375076e-06, "loss": 1.0285, "step": 78235 }, { "epoch": 2.3069438301636445, "grad_norm": 3.6243932497833056, "learning_rate": 1.1546519162408822e-06, "loss": 1.0658, "step": 78240 }, { "epoch": 2.307091257555654, "grad_norm": 3.456875180659293, "learning_rate": 1.1541876317153378e-06, "loss": 1.0378, "step": 78245 }, { "epoch": 2.3072386849476634, "grad_norm": 3.4066183547158317, "learning_rate": 1.153723423574538e-06, "loss": 1.074, "step": 78250 }, { "epoch": 2.307386112339673, "grad_norm": 3.5687422048299906, "learning_rate": 1.1532592918321384e-06, "loss": 1.0335, "step": 78255 }, { "epoch": 2.3075335397316823, "grad_norm": 3.4228793826444006, "learning_rate": 1.1527952365018017e-06, "loss": 1.0311, "step": 78260 }, { "epoch": 2.3076809671236918, "grad_norm": 3.415252497814916, "learning_rate": 1.1523312575971794e-06, "loss": 1.0381, "step": 78265 }, { "epoch": 2.307828394515701, "grad_norm": 3.4188572590070496, "learning_rate": 1.1518673551319266e-06, "loss": 1.0653, "step": 78270 }, { "epoch": 2.3079758219077107, "grad_norm": 3.508496491824287, "learning_rate": 1.151403529119693e-06, "loss": 1.0098, "step": 78275 }, { "epoch": 2.3081232492997197, "grad_norm": 3.316234000443898, "learning_rate": 1.1509397795741285e-06, "loss": 1.0689, "step": 78280 }, { "epoch": 2.308270676691729, "grad_norm": 3.2363701027939427, "learning_rate": 1.1504761065088783e-06, "loss": 1.01, "step": 78285 }, { "epoch": 2.3084181040837386, "grad_norm": 3.3787961847465, "learning_rate": 1.1500125099375875e-06, "loss": 1.0665, "step": 78290 }, { "epoch": 2.308565531475748, "grad_norm": 3.5555717319182865, "learning_rate": 1.1495489898738971e-06, "loss": 1.0767, "step": 78295 }, { "epoch": 2.3087129588677575, "grad_norm": 3.448133958704905, "learning_rate": 1.1490855463314482e-06, "loss": 1.0292, "step": 78300 }, { "epoch": 2.308860386259767, "grad_norm": 3.520859351014346, "learning_rate": 1.1486221793238762e-06, "loss": 1.0964, "step": 78305 }, { "epoch": 2.3090078136517764, "grad_norm": 3.366223320813686, "learning_rate": 1.1481588888648166e-06, "loss": 1.0109, "step": 78310 }, { "epoch": 2.309155241043786, "grad_norm": 3.467954390813218, "learning_rate": 1.1476956749679029e-06, "loss": 1.0058, "step": 78315 }, { "epoch": 2.3093026684357953, "grad_norm": 3.4449019373869176, "learning_rate": 1.1472325376467655e-06, "loss": 1.0452, "step": 78320 }, { "epoch": 2.309450095827805, "grad_norm": 3.594273383217054, "learning_rate": 1.146769476915034e-06, "loss": 1.0539, "step": 78325 }, { "epoch": 2.3095975232198143, "grad_norm": 3.4403765095090675, "learning_rate": 1.146306492786331e-06, "loss": 1.042, "step": 78330 }, { "epoch": 2.3097449506118237, "grad_norm": 3.398769739379737, "learning_rate": 1.1458435852742847e-06, "loss": 1.0348, "step": 78335 }, { "epoch": 2.309892378003833, "grad_norm": 3.455078286046274, "learning_rate": 1.145380754392512e-06, "loss": 0.9945, "step": 78340 }, { "epoch": 2.3100398053958426, "grad_norm": 3.40875886299965, "learning_rate": 1.144918000154637e-06, "loss": 1.0717, "step": 78345 }, { "epoch": 2.310187232787852, "grad_norm": 3.4009346696282163, "learning_rate": 1.144455322574274e-06, "loss": 1.04, "step": 78350 }, { "epoch": 2.3103346601798616, "grad_norm": 3.3767039074553944, "learning_rate": 1.143992721665038e-06, "loss": 1.0476, "step": 78355 }, { "epoch": 2.310482087571871, "grad_norm": 3.6427066279910125, "learning_rate": 1.1435301974405417e-06, "loss": 1.0642, "step": 78360 }, { "epoch": 2.3106295149638805, "grad_norm": 3.443536207367442, "learning_rate": 1.143067749914396e-06, "loss": 1.06, "step": 78365 }, { "epoch": 2.31077694235589, "grad_norm": 3.3532466794760682, "learning_rate": 1.1426053791002095e-06, "loss": 1.0707, "step": 78370 }, { "epoch": 2.310924369747899, "grad_norm": 3.558136464871572, "learning_rate": 1.142143085011586e-06, "loss": 1.0837, "step": 78375 }, { "epoch": 2.3110717971399084, "grad_norm": 3.406496339820383, "learning_rate": 1.1416808676621298e-06, "loss": 1.0751, "step": 78380 }, { "epoch": 2.311219224531918, "grad_norm": 3.5218606986971146, "learning_rate": 1.1412187270654427e-06, "loss": 1.066, "step": 78385 }, { "epoch": 2.3113666519239273, "grad_norm": 3.3525234131811117, "learning_rate": 1.1407566632351238e-06, "loss": 1.0265, "step": 78390 }, { "epoch": 2.3115140793159368, "grad_norm": 3.5315103815437183, "learning_rate": 1.1402946761847689e-06, "loss": 1.0652, "step": 78395 }, { "epoch": 2.3116615067079462, "grad_norm": 3.511784648353444, "learning_rate": 1.1398327659279748e-06, "loss": 1.0854, "step": 78400 }, { "epoch": 2.3118089340999557, "grad_norm": 3.5245105300401662, "learning_rate": 1.1393709324783295e-06, "loss": 1.0456, "step": 78405 }, { "epoch": 2.311956361491965, "grad_norm": 3.418111009166487, "learning_rate": 1.1389091758494279e-06, "loss": 1.0962, "step": 78410 }, { "epoch": 2.3121037888839746, "grad_norm": 3.3463971083868844, "learning_rate": 1.138447496054853e-06, "loss": 1.0165, "step": 78415 }, { "epoch": 2.312251216275984, "grad_norm": 3.2771588493556822, "learning_rate": 1.1379858931081948e-06, "loss": 1.0228, "step": 78420 }, { "epoch": 2.3123986436679935, "grad_norm": 3.579658152558906, "learning_rate": 1.137524367023033e-06, "loss": 1.0531, "step": 78425 }, { "epoch": 2.312546071060003, "grad_norm": 3.404460394219248, "learning_rate": 1.1370629178129506e-06, "loss": 1.0609, "step": 78430 }, { "epoch": 2.3126934984520124, "grad_norm": 3.4096194750275317, "learning_rate": 1.136601545491526e-06, "loss": 1.0784, "step": 78435 }, { "epoch": 2.312840925844022, "grad_norm": 3.4420091410640112, "learning_rate": 1.1361402500723331e-06, "loss": 1.0375, "step": 78440 }, { "epoch": 2.3129883532360314, "grad_norm": 3.5726747424586476, "learning_rate": 1.1356790315689502e-06, "loss": 1.0256, "step": 78445 }, { "epoch": 2.313135780628041, "grad_norm": 3.456766790171712, "learning_rate": 1.1352178899949458e-06, "loss": 1.0645, "step": 78450 }, { "epoch": 2.3132832080200503, "grad_norm": 3.609139865484473, "learning_rate": 1.1347568253638912e-06, "loss": 1.0405, "step": 78455 }, { "epoch": 2.3134306354120597, "grad_norm": 3.6246830969668764, "learning_rate": 1.134295837689353e-06, "loss": 1.0273, "step": 78460 }, { "epoch": 2.313578062804069, "grad_norm": 3.5327774225812947, "learning_rate": 1.1338349269848964e-06, "loss": 1.0615, "step": 78465 }, { "epoch": 2.313725490196078, "grad_norm": 3.2689409787321138, "learning_rate": 1.1333740932640845e-06, "loss": 1.0292, "step": 78470 }, { "epoch": 2.313872917588088, "grad_norm": 3.547913946650412, "learning_rate": 1.1329133365404776e-06, "loss": 1.0298, "step": 78475 }, { "epoch": 2.314020344980097, "grad_norm": 3.4425465231049786, "learning_rate": 1.1324526568276348e-06, "loss": 1.0258, "step": 78480 }, { "epoch": 2.3141677723721066, "grad_norm": 3.4449418955033497, "learning_rate": 1.1319920541391124e-06, "loss": 1.0552, "step": 78485 }, { "epoch": 2.314315199764116, "grad_norm": 3.727625368834105, "learning_rate": 1.1315315284884607e-06, "loss": 1.0785, "step": 78490 }, { "epoch": 2.3144626271561255, "grad_norm": 3.59881072478597, "learning_rate": 1.131071079889236e-06, "loss": 1.0824, "step": 78495 }, { "epoch": 2.314610054548135, "grad_norm": 3.4600932436830942, "learning_rate": 1.1306107083549846e-06, "loss": 1.0673, "step": 78500 }, { "epoch": 2.314610054548135, "eval_loss": 1.083837866783142, "eval_runtime": 4.2468, "eval_samples_per_second": 93.247, "eval_steps_per_second": 3.061, "step": 78500 }, { "epoch": 2.3147574819401444, "grad_norm": 3.5824277330577083, "learning_rate": 1.1301504138992538e-06, "loss": 1.0337, "step": 78505 }, { "epoch": 2.314904909332154, "grad_norm": 3.3683259520826336, "learning_rate": 1.12969019653559e-06, "loss": 1.0524, "step": 78510 }, { "epoch": 2.3150523367241633, "grad_norm": 3.419122058160508, "learning_rate": 1.1292300562775317e-06, "loss": 1.0273, "step": 78515 }, { "epoch": 2.315199764116173, "grad_norm": 3.583259976299855, "learning_rate": 1.1287699931386241e-06, "loss": 1.0655, "step": 78520 }, { "epoch": 2.3153471915081822, "grad_norm": 3.391410384988411, "learning_rate": 1.1283100071324013e-06, "loss": 1.0452, "step": 78525 }, { "epoch": 2.3154946189001917, "grad_norm": 3.6321011543602655, "learning_rate": 1.1278500982724004e-06, "loss": 1.0603, "step": 78530 }, { "epoch": 2.315642046292201, "grad_norm": 3.478396869850459, "learning_rate": 1.1273902665721543e-06, "loss": 1.042, "step": 78535 }, { "epoch": 2.3157894736842106, "grad_norm": 3.523339867874543, "learning_rate": 1.1269305120451944e-06, "loss": 1.0516, "step": 78540 }, { "epoch": 2.31593690107622, "grad_norm": 3.4670603924254335, "learning_rate": 1.1264708347050493e-06, "loss": 1.0618, "step": 78545 }, { "epoch": 2.3160843284682295, "grad_norm": 3.525905815981814, "learning_rate": 1.1260112345652456e-06, "loss": 1.0482, "step": 78550 }, { "epoch": 2.316231755860239, "grad_norm": 3.5303513580724664, "learning_rate": 1.1255517116393078e-06, "loss": 1.0158, "step": 78555 }, { "epoch": 2.3163791832522485, "grad_norm": 3.478495077468392, "learning_rate": 1.1250922659407587e-06, "loss": 1.0622, "step": 78560 }, { "epoch": 2.3165266106442575, "grad_norm": 3.4684251953429395, "learning_rate": 1.124632897483116e-06, "loss": 1.0556, "step": 78565 }, { "epoch": 2.3166740380362674, "grad_norm": 3.5133351399123227, "learning_rate": 1.124173606279898e-06, "loss": 1.0306, "step": 78570 }, { "epoch": 2.3168214654282764, "grad_norm": 3.4654652167384343, "learning_rate": 1.1237143923446203e-06, "loss": 1.052, "step": 78575 }, { "epoch": 2.316968892820286, "grad_norm": 3.4551365869010877, "learning_rate": 1.123255255690795e-06, "loss": 1.0284, "step": 78580 }, { "epoch": 2.3171163202122953, "grad_norm": 3.30577380680796, "learning_rate": 1.1227961963319349e-06, "loss": 1.073, "step": 78585 }, { "epoch": 2.3172637476043048, "grad_norm": 3.5985480740373497, "learning_rate": 1.1223372142815444e-06, "loss": 1.0213, "step": 78590 }, { "epoch": 2.317411174996314, "grad_norm": 3.457914357172124, "learning_rate": 1.1218783095531337e-06, "loss": 1.0467, "step": 78595 }, { "epoch": 2.3175586023883237, "grad_norm": 3.2718508767861927, "learning_rate": 1.1214194821602024e-06, "loss": 1.0589, "step": 78600 }, { "epoch": 2.317706029780333, "grad_norm": 3.2412956663073476, "learning_rate": 1.1209607321162572e-06, "loss": 1.0274, "step": 78605 }, { "epoch": 2.3178534571723426, "grad_norm": 3.4316555372947173, "learning_rate": 1.1205020594347929e-06, "loss": 1.0663, "step": 78610 }, { "epoch": 2.318000884564352, "grad_norm": 3.421993852526412, "learning_rate": 1.1200434641293085e-06, "loss": 1.0445, "step": 78615 }, { "epoch": 2.3181483119563615, "grad_norm": 3.5472415897729177, "learning_rate": 1.119584946213298e-06, "loss": 1.0521, "step": 78620 }, { "epoch": 2.318295739348371, "grad_norm": 3.338627070069859, "learning_rate": 1.1191265057002541e-06, "loss": 1.0313, "step": 78625 }, { "epoch": 2.3184431667403804, "grad_norm": 3.523899282531896, "learning_rate": 1.118668142603668e-06, "loss": 1.0323, "step": 78630 }, { "epoch": 2.31859059413239, "grad_norm": 3.5230468459894198, "learning_rate": 1.118209856937025e-06, "loss": 1.0743, "step": 78635 }, { "epoch": 2.3187380215243993, "grad_norm": 3.621450793139277, "learning_rate": 1.1177516487138127e-06, "loss": 1.0337, "step": 78640 }, { "epoch": 2.318885448916409, "grad_norm": 3.473276446179934, "learning_rate": 1.1172935179475137e-06, "loss": 1.0456, "step": 78645 }, { "epoch": 2.3190328763084183, "grad_norm": 3.560080730139094, "learning_rate": 1.1168354646516088e-06, "loss": 1.0968, "step": 78650 }, { "epoch": 2.3191803037004277, "grad_norm": 3.4269177772178976, "learning_rate": 1.1163774888395777e-06, "loss": 1.0521, "step": 78655 }, { "epoch": 2.3193277310924367, "grad_norm": 3.5481049860830898, "learning_rate": 1.1159195905248963e-06, "loss": 1.0379, "step": 78660 }, { "epoch": 2.3194751584844466, "grad_norm": 3.408512465015311, "learning_rate": 1.1154617697210384e-06, "loss": 0.9972, "step": 78665 }, { "epoch": 2.3196225858764556, "grad_norm": 3.5711276986910354, "learning_rate": 1.1150040264414773e-06, "loss": 1.0771, "step": 78670 }, { "epoch": 2.319770013268465, "grad_norm": 3.393865924093063, "learning_rate": 1.1145463606996794e-06, "loss": 1.0653, "step": 78675 }, { "epoch": 2.3199174406604746, "grad_norm": 3.440241614890637, "learning_rate": 1.1140887725091168e-06, "loss": 1.0574, "step": 78680 }, { "epoch": 2.320064868052484, "grad_norm": 3.3165416741632874, "learning_rate": 1.1136312618832504e-06, "loss": 1.0339, "step": 78685 }, { "epoch": 2.3202122954444935, "grad_norm": 3.463699744361695, "learning_rate": 1.1131738288355448e-06, "loss": 1.0457, "step": 78690 }, { "epoch": 2.320359722836503, "grad_norm": 3.440654728671708, "learning_rate": 1.1127164733794613e-06, "loss": 1.0058, "step": 78695 }, { "epoch": 2.3205071502285124, "grad_norm": 3.3484329469615024, "learning_rate": 1.1122591955284544e-06, "loss": 1.0217, "step": 78700 }, { "epoch": 2.320654577620522, "grad_norm": 3.6087473593034822, "learning_rate": 1.1118019952959853e-06, "loss": 1.0497, "step": 78705 }, { "epoch": 2.3208020050125313, "grad_norm": 3.535343011846825, "learning_rate": 1.1113448726955038e-06, "loss": 1.0148, "step": 78710 }, { "epoch": 2.3209494324045408, "grad_norm": 3.5191759132057774, "learning_rate": 1.1108878277404625e-06, "loss": 1.0331, "step": 78715 }, { "epoch": 2.3210968597965502, "grad_norm": 3.4852181820048083, "learning_rate": 1.1104308604443102e-06, "loss": 1.0607, "step": 78720 }, { "epoch": 2.3212442871885597, "grad_norm": 3.406205075271683, "learning_rate": 1.1099739708204943e-06, "loss": 1.0236, "step": 78725 }, { "epoch": 2.321391714580569, "grad_norm": 3.602333324862917, "learning_rate": 1.1095171588824585e-06, "loss": 1.037, "step": 78730 }, { "epoch": 2.3215391419725786, "grad_norm": 3.5207782207689555, "learning_rate": 1.1090604246436456e-06, "loss": 1.0263, "step": 78735 }, { "epoch": 2.321686569364588, "grad_norm": 3.485211632229619, "learning_rate": 1.1086037681174957e-06, "loss": 1.0688, "step": 78740 }, { "epoch": 2.3218339967565975, "grad_norm": 3.564524835167166, "learning_rate": 1.1081471893174468e-06, "loss": 1.0211, "step": 78745 }, { "epoch": 2.321981424148607, "grad_norm": 3.400335242020021, "learning_rate": 1.107690688256931e-06, "loss": 1.0581, "step": 78750 }, { "epoch": 2.3221288515406164, "grad_norm": 3.355080155695219, "learning_rate": 1.1072342649493867e-06, "loss": 1.0683, "step": 78755 }, { "epoch": 2.322276278932626, "grad_norm": 3.581328740199798, "learning_rate": 1.1067779194082404e-06, "loss": 1.0688, "step": 78760 }, { "epoch": 2.322423706324635, "grad_norm": 3.288282198316038, "learning_rate": 1.106321651646922e-06, "loss": 1.0564, "step": 78765 }, { "epoch": 2.3225711337166444, "grad_norm": 3.2699128581010464, "learning_rate": 1.1058654616788595e-06, "loss": 1.017, "step": 78770 }, { "epoch": 2.322718561108654, "grad_norm": 3.4123928815162685, "learning_rate": 1.1054093495174723e-06, "loss": 1.0202, "step": 78775 }, { "epoch": 2.3228659885006633, "grad_norm": 3.351922653145361, "learning_rate": 1.1049533151761873e-06, "loss": 1.0608, "step": 78780 }, { "epoch": 2.3230134158926727, "grad_norm": 3.425749074750199, "learning_rate": 1.1044973586684199e-06, "loss": 1.0343, "step": 78785 }, { "epoch": 2.323160843284682, "grad_norm": 3.398330887607979, "learning_rate": 1.1040414800075888e-06, "loss": 1.0299, "step": 78790 }, { "epoch": 2.3233082706766917, "grad_norm": 3.5404054233979583, "learning_rate": 1.1035856792071084e-06, "loss": 1.0147, "step": 78795 }, { "epoch": 2.323455698068701, "grad_norm": 3.6192392392429995, "learning_rate": 1.1031299562803913e-06, "loss": 1.0618, "step": 78800 }, { "epoch": 2.3236031254607106, "grad_norm": 3.336831379082711, "learning_rate": 1.1026743112408474e-06, "loss": 1.0464, "step": 78805 }, { "epoch": 2.32375055285272, "grad_norm": 3.249354969403763, "learning_rate": 1.1022187441018856e-06, "loss": 0.9905, "step": 78810 }, { "epoch": 2.3238979802447295, "grad_norm": 3.4232113458547113, "learning_rate": 1.10176325487691e-06, "loss": 1.0736, "step": 78815 }, { "epoch": 2.324045407636739, "grad_norm": 3.34496465345952, "learning_rate": 1.1013078435793258e-06, "loss": 1.0149, "step": 78820 }, { "epoch": 2.3241928350287484, "grad_norm": 3.3937444983223983, "learning_rate": 1.1008525102225317e-06, "loss": 1.0326, "step": 78825 }, { "epoch": 2.324340262420758, "grad_norm": 3.34402358207738, "learning_rate": 1.1003972548199278e-06, "loss": 1.033, "step": 78830 }, { "epoch": 2.3244876898127673, "grad_norm": 3.4841287059379544, "learning_rate": 1.0999420773849099e-06, "loss": 1.0474, "step": 78835 }, { "epoch": 2.324635117204777, "grad_norm": 3.490509775882418, "learning_rate": 1.0994869779308726e-06, "loss": 1.0731, "step": 78840 }, { "epoch": 2.3247825445967862, "grad_norm": 3.553439135780328, "learning_rate": 1.0990319564712085e-06, "loss": 1.0373, "step": 78845 }, { "epoch": 2.3249299719887957, "grad_norm": 3.5699178512444183, "learning_rate": 1.098577013019304e-06, "loss": 1.061, "step": 78850 }, { "epoch": 2.325077399380805, "grad_norm": 3.3503685362984106, "learning_rate": 1.0981221475885508e-06, "loss": 1.0496, "step": 78855 }, { "epoch": 2.325224826772814, "grad_norm": 3.4528967511985003, "learning_rate": 1.097667360192329e-06, "loss": 1.0639, "step": 78860 }, { "epoch": 2.325372254164824, "grad_norm": 3.384040354103103, "learning_rate": 1.0972126508440266e-06, "loss": 1.0182, "step": 78865 }, { "epoch": 2.325519681556833, "grad_norm": 3.4460857698808343, "learning_rate": 1.0967580195570193e-06, "loss": 1.0459, "step": 78870 }, { "epoch": 2.3256671089488425, "grad_norm": 3.491849026316907, "learning_rate": 1.0963034663446874e-06, "loss": 1.0277, "step": 78875 }, { "epoch": 2.325814536340852, "grad_norm": 3.465414990630029, "learning_rate": 1.095848991220406e-06, "loss": 1.0575, "step": 78880 }, { "epoch": 2.3259619637328615, "grad_norm": 3.5415175339663847, "learning_rate": 1.0953945941975488e-06, "loss": 1.066, "step": 78885 }, { "epoch": 2.326109391124871, "grad_norm": 3.487118396894947, "learning_rate": 1.0949402752894883e-06, "loss": 1.0208, "step": 78890 }, { "epoch": 2.3262568185168804, "grad_norm": 3.5746313575772857, "learning_rate": 1.0944860345095907e-06, "loss": 1.0294, "step": 78895 }, { "epoch": 2.32640424590889, "grad_norm": 3.713417108658972, "learning_rate": 1.0940318718712237e-06, "loss": 1.0564, "step": 78900 }, { "epoch": 2.3265516733008993, "grad_norm": 3.329819051094513, "learning_rate": 1.0935777873877513e-06, "loss": 1.0503, "step": 78905 }, { "epoch": 2.3266991006929088, "grad_norm": 3.332518035981055, "learning_rate": 1.0931237810725364e-06, "loss": 1.049, "step": 78910 }, { "epoch": 2.326846528084918, "grad_norm": 3.413912807842122, "learning_rate": 1.092669852938938e-06, "loss": 1.0524, "step": 78915 }, { "epoch": 2.3269939554769277, "grad_norm": 3.3632678601583144, "learning_rate": 1.0922160030003134e-06, "loss": 1.0507, "step": 78920 }, { "epoch": 2.327141382868937, "grad_norm": 3.5084226388698774, "learning_rate": 1.0917622312700178e-06, "loss": 1.0235, "step": 78925 }, { "epoch": 2.3272888102609466, "grad_norm": 3.40552053053882, "learning_rate": 1.091308537761405e-06, "loss": 1.0545, "step": 78930 }, { "epoch": 2.327436237652956, "grad_norm": 3.361795353229186, "learning_rate": 1.0908549224878223e-06, "loss": 1.0471, "step": 78935 }, { "epoch": 2.3275836650449655, "grad_norm": 3.4274655372699376, "learning_rate": 1.090401385462622e-06, "loss": 1.0473, "step": 78940 }, { "epoch": 2.327731092436975, "grad_norm": 3.417294409096818, "learning_rate": 1.0899479266991469e-06, "loss": 1.0721, "step": 78945 }, { "epoch": 2.3278785198289844, "grad_norm": 3.533895299563903, "learning_rate": 1.0894945462107414e-06, "loss": 1.0365, "step": 78950 }, { "epoch": 2.3280259472209934, "grad_norm": 3.322851930728371, "learning_rate": 1.089041244010748e-06, "loss": 1.03, "step": 78955 }, { "epoch": 2.3281733746130033, "grad_norm": 3.4788406285233657, "learning_rate": 1.0885880201125025e-06, "loss": 1.036, "step": 78960 }, { "epoch": 2.3283208020050123, "grad_norm": 3.29107386216725, "learning_rate": 1.0881348745293455e-06, "loss": 1.0081, "step": 78965 }, { "epoch": 2.328468229397022, "grad_norm": 3.5423235019753623, "learning_rate": 1.0876818072746082e-06, "loss": 1.06, "step": 78970 }, { "epoch": 2.3286156567890313, "grad_norm": 3.5390912212635173, "learning_rate": 1.087228818361624e-06, "loss": 1.0798, "step": 78975 }, { "epoch": 2.3287630841810407, "grad_norm": 3.4940596704931735, "learning_rate": 1.0867759078037221e-06, "loss": 1.0273, "step": 78980 }, { "epoch": 2.32891051157305, "grad_norm": 3.4440107918097893, "learning_rate": 1.08632307561423e-06, "loss": 1.0513, "step": 78985 }, { "epoch": 2.3290579389650596, "grad_norm": 3.488902842251115, "learning_rate": 1.0858703218064737e-06, "loss": 1.042, "step": 78990 }, { "epoch": 2.329205366357069, "grad_norm": 3.5871787765796657, "learning_rate": 1.0854176463937747e-06, "loss": 1.0294, "step": 78995 }, { "epoch": 2.3293527937490786, "grad_norm": 3.60643861436279, "learning_rate": 1.0849650493894543e-06, "loss": 1.0243, "step": 79000 }, { "epoch": 2.3293527937490786, "eval_loss": 1.083800196647644, "eval_runtime": 4.1474, "eval_samples_per_second": 95.481, "eval_steps_per_second": 3.134, "step": 79000 }, { "epoch": 2.329500221141088, "grad_norm": 3.413124495307767, "learning_rate": 1.0845125308068318e-06, "loss": 1.0161, "step": 79005 }, { "epoch": 2.3296476485330975, "grad_norm": 3.4273908488600835, "learning_rate": 1.084060090659219e-06, "loss": 1.0663, "step": 79010 }, { "epoch": 2.329795075925107, "grad_norm": 3.5992676266366934, "learning_rate": 1.083607728959935e-06, "loss": 1.0704, "step": 79015 }, { "epoch": 2.3299425033171164, "grad_norm": 3.510859345459362, "learning_rate": 1.083155445722287e-06, "loss": 1.0414, "step": 79020 }, { "epoch": 2.330089930709126, "grad_norm": 3.3753064864038262, "learning_rate": 1.0827032409595851e-06, "loss": 1.0509, "step": 79025 }, { "epoch": 2.3302373581011353, "grad_norm": 3.51939278788501, "learning_rate": 1.0822511146851373e-06, "loss": 1.0552, "step": 79030 }, { "epoch": 2.3303847854931448, "grad_norm": 3.595213011057218, "learning_rate": 1.0817990669122445e-06, "loss": 1.0393, "step": 79035 }, { "epoch": 2.330532212885154, "grad_norm": 3.3816658480708406, "learning_rate": 1.0813470976542129e-06, "loss": 1.0197, "step": 79040 }, { "epoch": 2.3306796402771637, "grad_norm": 3.554295940208269, "learning_rate": 1.0808952069243392e-06, "loss": 1.0595, "step": 79045 }, { "epoch": 2.3308270676691727, "grad_norm": 3.473757475833243, "learning_rate": 1.080443394735922e-06, "loss": 1.0634, "step": 79050 }, { "epoch": 2.3309744950611826, "grad_norm": 3.44278275121134, "learning_rate": 1.0799916611022561e-06, "loss": 1.0203, "step": 79055 }, { "epoch": 2.3311219224531916, "grad_norm": 3.498185935190055, "learning_rate": 1.0795400060366345e-06, "loss": 1.0628, "step": 79060 }, { "epoch": 2.331269349845201, "grad_norm": 3.4374072745692152, "learning_rate": 1.0790884295523472e-06, "loss": 1.042, "step": 79065 }, { "epoch": 2.3314167772372105, "grad_norm": 3.3970216802807145, "learning_rate": 1.0786369316626832e-06, "loss": 1.0267, "step": 79070 }, { "epoch": 2.33156420462922, "grad_norm": 3.56694959729988, "learning_rate": 1.0781855123809277e-06, "loss": 1.0443, "step": 79075 }, { "epoch": 2.3317116320212294, "grad_norm": 3.4709307579118414, "learning_rate": 1.0777341717203656e-06, "loss": 1.036, "step": 79080 }, { "epoch": 2.331859059413239, "grad_norm": 3.6046168767673405, "learning_rate": 1.0772829096942756e-06, "loss": 1.0155, "step": 79085 }, { "epoch": 2.3320064868052484, "grad_norm": 3.4512724389238203, "learning_rate": 1.076831726315938e-06, "loss": 1.054, "step": 79090 }, { "epoch": 2.332153914197258, "grad_norm": 3.3965407610998426, "learning_rate": 1.0763806215986292e-06, "loss": 1.056, "step": 79095 }, { "epoch": 2.3323013415892673, "grad_norm": 3.671486405737946, "learning_rate": 1.0759295955556236e-06, "loss": 1.0606, "step": 79100 }, { "epoch": 2.3324487689812767, "grad_norm": 3.4599898991139217, "learning_rate": 1.0754786482001945e-06, "loss": 1.0308, "step": 79105 }, { "epoch": 2.332596196373286, "grad_norm": 3.4120804425479974, "learning_rate": 1.0750277795456075e-06, "loss": 1.036, "step": 79110 }, { "epoch": 2.3327436237652956, "grad_norm": 3.2971302854316535, "learning_rate": 1.0745769896051353e-06, "loss": 1.0085, "step": 79115 }, { "epoch": 2.332891051157305, "grad_norm": 3.335999793463928, "learning_rate": 1.0741262783920372e-06, "loss": 1.014, "step": 79120 }, { "epoch": 2.3330384785493146, "grad_norm": 3.4776201047895383, "learning_rate": 1.073675645919582e-06, "loss": 1.0252, "step": 79125 }, { "epoch": 2.333185905941324, "grad_norm": 3.434865749058255, "learning_rate": 1.0732250922010254e-06, "loss": 1.0403, "step": 79130 }, { "epoch": 2.3333333333333335, "grad_norm": 3.213556891966232, "learning_rate": 1.0727746172496268e-06, "loss": 1.0711, "step": 79135 }, { "epoch": 2.333480760725343, "grad_norm": 3.4030586776337026, "learning_rate": 1.0723242210786423e-06, "loss": 1.0509, "step": 79140 }, { "epoch": 2.3336281881173524, "grad_norm": 3.6386334240286202, "learning_rate": 1.0718739037013254e-06, "loss": 1.044, "step": 79145 }, { "epoch": 2.333775615509362, "grad_norm": 3.424609794275727, "learning_rate": 1.0714236651309282e-06, "loss": 0.9955, "step": 79150 }, { "epoch": 2.333923042901371, "grad_norm": 3.587408310950824, "learning_rate": 1.0709735053806968e-06, "loss": 1.0263, "step": 79155 }, { "epoch": 2.3340704702933803, "grad_norm": 3.3699168511715296, "learning_rate": 1.0705234244638793e-06, "loss": 1.0647, "step": 79160 }, { "epoch": 2.33421789768539, "grad_norm": 3.4404491058085127, "learning_rate": 1.0700734223937198e-06, "loss": 1.0466, "step": 79165 }, { "epoch": 2.3343653250773992, "grad_norm": 3.2882513172275836, "learning_rate": 1.0696234991834598e-06, "loss": 1.0569, "step": 79170 }, { "epoch": 2.3345127524694087, "grad_norm": 3.4955668572576046, "learning_rate": 1.069173654846339e-06, "loss": 1.0559, "step": 79175 }, { "epoch": 2.334660179861418, "grad_norm": 3.447118787443471, "learning_rate": 1.0687238893955949e-06, "loss": 1.0607, "step": 79180 }, { "epoch": 2.3348076072534276, "grad_norm": 3.513850567931013, "learning_rate": 1.0682742028444618e-06, "loss": 1.1311, "step": 79185 }, { "epoch": 2.334955034645437, "grad_norm": 3.562854842176707, "learning_rate": 1.0678245952061735e-06, "loss": 1.0836, "step": 79190 }, { "epoch": 2.3351024620374465, "grad_norm": 3.3125823503922227, "learning_rate": 1.067375066493957e-06, "loss": 1.0398, "step": 79195 }, { "epoch": 2.335249889429456, "grad_norm": 3.4731299647657035, "learning_rate": 1.066925616721045e-06, "loss": 1.0386, "step": 79200 }, { "epoch": 2.3353973168214655, "grad_norm": 3.5219744809667537, "learning_rate": 1.0664762459006588e-06, "loss": 1.0237, "step": 79205 }, { "epoch": 2.335544744213475, "grad_norm": 3.488055438445309, "learning_rate": 1.0660269540460235e-06, "loss": 1.0118, "step": 79210 }, { "epoch": 2.3356921716054844, "grad_norm": 3.720449995996349, "learning_rate": 1.0655777411703612e-06, "loss": 1.0938, "step": 79215 }, { "epoch": 2.335839598997494, "grad_norm": 3.716452930982112, "learning_rate": 1.0651286072868868e-06, "loss": 1.0299, "step": 79220 }, { "epoch": 2.3359870263895033, "grad_norm": 3.396307978127981, "learning_rate": 1.0646795524088212e-06, "loss": 1.0584, "step": 79225 }, { "epoch": 2.3361344537815127, "grad_norm": 3.455653767128534, "learning_rate": 1.0642305765493748e-06, "loss": 1.0243, "step": 79230 }, { "epoch": 2.336281881173522, "grad_norm": 3.5091656824890087, "learning_rate": 1.0637816797217608e-06, "loss": 1.0681, "step": 79235 }, { "epoch": 2.3364293085655317, "grad_norm": 3.4757668941071787, "learning_rate": 1.063332861939188e-06, "loss": 1.009, "step": 79240 }, { "epoch": 2.336576735957541, "grad_norm": 3.4483145127460837, "learning_rate": 1.0628841232148636e-06, "loss": 1.0263, "step": 79245 }, { "epoch": 2.33672416334955, "grad_norm": 3.404023406482852, "learning_rate": 1.0624354635619922e-06, "loss": 1.0435, "step": 79250 }, { "epoch": 2.3368715907415596, "grad_norm": 3.2941799318674247, "learning_rate": 1.0619868829937763e-06, "loss": 1.0306, "step": 79255 }, { "epoch": 2.337019018133569, "grad_norm": 3.4824853122984316, "learning_rate": 1.0615383815234159e-06, "loss": 1.0514, "step": 79260 }, { "epoch": 2.3371664455255785, "grad_norm": 3.43127515254736, "learning_rate": 1.0610899591641092e-06, "loss": 1.019, "step": 79265 }, { "epoch": 2.337313872917588, "grad_norm": 3.622906874024979, "learning_rate": 1.0606416159290492e-06, "loss": 1.0619, "step": 79270 }, { "epoch": 2.3374613003095974, "grad_norm": 3.563102028498317, "learning_rate": 1.0601933518314322e-06, "loss": 1.004, "step": 79275 }, { "epoch": 2.337608727701607, "grad_norm": 3.5883264044886753, "learning_rate": 1.0597451668844467e-06, "loss": 1.0732, "step": 79280 }, { "epoch": 2.3377561550936163, "grad_norm": 3.3733736744256824, "learning_rate": 1.0592970611012812e-06, "loss": 1.0583, "step": 79285 }, { "epoch": 2.337903582485626, "grad_norm": 3.334878542526572, "learning_rate": 1.0588490344951233e-06, "loss": 1.0111, "step": 79290 }, { "epoch": 2.3380510098776353, "grad_norm": 3.649750156056738, "learning_rate": 1.0584010870791537e-06, "loss": 1.0742, "step": 79295 }, { "epoch": 2.3381984372696447, "grad_norm": 3.6863788715087082, "learning_rate": 1.0579532188665573e-06, "loss": 1.0047, "step": 79300 }, { "epoch": 2.338345864661654, "grad_norm": 3.4892042922757205, "learning_rate": 1.0575054298705092e-06, "loss": 1.0471, "step": 79305 }, { "epoch": 2.3384932920536636, "grad_norm": 3.6207121925005628, "learning_rate": 1.057057720104191e-06, "loss": 1.0635, "step": 79310 }, { "epoch": 2.338640719445673, "grad_norm": 3.4315617124332394, "learning_rate": 1.056610089580773e-06, "loss": 1.0193, "step": 79315 }, { "epoch": 2.3387881468376825, "grad_norm": 3.3394738618720243, "learning_rate": 1.0561625383134289e-06, "loss": 1.0062, "step": 79320 }, { "epoch": 2.338935574229692, "grad_norm": 3.554680069371118, "learning_rate": 1.0557150663153278e-06, "loss": 1.0619, "step": 79325 }, { "epoch": 2.3390830016217015, "grad_norm": 3.413782701409359, "learning_rate": 1.0552676735996375e-06, "loss": 1.0356, "step": 79330 }, { "epoch": 2.339230429013711, "grad_norm": 3.389444869618907, "learning_rate": 1.0548203601795231e-06, "loss": 1.0432, "step": 79335 }, { "epoch": 2.3393778564057204, "grad_norm": 3.3819924938501043, "learning_rate": 1.0543731260681482e-06, "loss": 1.041, "step": 79340 }, { "epoch": 2.3395252837977294, "grad_norm": 3.529296082036836, "learning_rate": 1.0539259712786714e-06, "loss": 1.0528, "step": 79345 }, { "epoch": 2.3396727111897393, "grad_norm": 3.4086119566215283, "learning_rate": 1.053478895824251e-06, "loss": 1.0164, "step": 79350 }, { "epoch": 2.3398201385817483, "grad_norm": 3.570174481489913, "learning_rate": 1.0530318997180435e-06, "loss": 1.0382, "step": 79355 }, { "epoch": 2.3399675659737578, "grad_norm": 3.5099083776992677, "learning_rate": 1.0525849829732017e-06, "loss": 1.0751, "step": 79360 }, { "epoch": 2.3401149933657672, "grad_norm": 3.4644720256653905, "learning_rate": 1.0521381456028783e-06, "loss": 1.0335, "step": 79365 }, { "epoch": 2.3402624207577767, "grad_norm": 3.4320308888057256, "learning_rate": 1.051691387620218e-06, "loss": 1.0848, "step": 79370 }, { "epoch": 2.340409848149786, "grad_norm": 3.400398398493297, "learning_rate": 1.0512447090383724e-06, "loss": 1.0188, "step": 79375 }, { "epoch": 2.3405572755417956, "grad_norm": 3.43260435410351, "learning_rate": 1.0507981098704803e-06, "loss": 1.0645, "step": 79380 }, { "epoch": 2.340704702933805, "grad_norm": 3.7353269136907237, "learning_rate": 1.0503515901296882e-06, "loss": 1.0638, "step": 79385 }, { "epoch": 2.3408521303258145, "grad_norm": 3.3238792102970875, "learning_rate": 1.0499051498291326e-06, "loss": 1.0209, "step": 79390 }, { "epoch": 2.340999557717824, "grad_norm": 3.368164433305097, "learning_rate": 1.0494587889819508e-06, "loss": 1.0, "step": 79395 }, { "epoch": 2.3411469851098334, "grad_norm": 3.4896370499018405, "learning_rate": 1.0490125076012778e-06, "loss": 1.0942, "step": 79400 }, { "epoch": 2.341294412501843, "grad_norm": 3.326167654228733, "learning_rate": 1.048566305700246e-06, "loss": 1.0459, "step": 79405 }, { "epoch": 2.3414418398938524, "grad_norm": 3.484739784390428, "learning_rate": 1.0481201832919867e-06, "loss": 1.0015, "step": 79410 }, { "epoch": 2.341589267285862, "grad_norm": 3.3614362321202567, "learning_rate": 1.047674140389625e-06, "loss": 1.018, "step": 79415 }, { "epoch": 2.3417366946778713, "grad_norm": 3.6141337040668295, "learning_rate": 1.0472281770062875e-06, "loss": 1.0672, "step": 79420 }, { "epoch": 2.3418841220698807, "grad_norm": 3.3418486393504825, "learning_rate": 1.0467822931550967e-06, "loss": 1.0626, "step": 79425 }, { "epoch": 2.34203154946189, "grad_norm": 3.550010088596773, "learning_rate": 1.0463364888491743e-06, "loss": 1.0597, "step": 79430 }, { "epoch": 2.3421789768538996, "grad_norm": 3.2500107088023626, "learning_rate": 1.0458907641016376e-06, "loss": 1.0803, "step": 79435 }, { "epoch": 2.3423264042459087, "grad_norm": 3.418941430366901, "learning_rate": 1.045445118925603e-06, "loss": 1.0421, "step": 79440 }, { "epoch": 2.3424738316379186, "grad_norm": 3.5077333691757104, "learning_rate": 1.0449995533341843e-06, "loss": 1.0214, "step": 79445 }, { "epoch": 2.3426212590299276, "grad_norm": 3.5516166331069314, "learning_rate": 1.0445540673404933e-06, "loss": 1.0882, "step": 79450 }, { "epoch": 2.342768686421937, "grad_norm": 3.3441198458912234, "learning_rate": 1.044108660957636e-06, "loss": 1.0314, "step": 79455 }, { "epoch": 2.3429161138139465, "grad_norm": 3.44406407136471, "learning_rate": 1.0436633341987233e-06, "loss": 1.0215, "step": 79460 }, { "epoch": 2.343063541205956, "grad_norm": 3.4008825476395326, "learning_rate": 1.0432180870768563e-06, "loss": 0.9959, "step": 79465 }, { "epoch": 2.3432109685979654, "grad_norm": 3.5784238210602437, "learning_rate": 1.042772919605138e-06, "loss": 1.022, "step": 79470 }, { "epoch": 2.343358395989975, "grad_norm": 3.5153389658894034, "learning_rate": 1.042327831796669e-06, "loss": 1.039, "step": 79475 }, { "epoch": 2.3435058233819843, "grad_norm": 3.6170625443931868, "learning_rate": 1.041882823664543e-06, "loss": 1.059, "step": 79480 }, { "epoch": 2.343653250773994, "grad_norm": 3.3954910759866626, "learning_rate": 1.0414378952218596e-06, "loss": 1.0503, "step": 79485 }, { "epoch": 2.3438006781660032, "grad_norm": 3.4145328364520355, "learning_rate": 1.0409930464817079e-06, "loss": 1.0267, "step": 79490 }, { "epoch": 2.3439481055580127, "grad_norm": 3.3304212640471267, "learning_rate": 1.040548277457179e-06, "loss": 1.0023, "step": 79495 }, { "epoch": 2.344095532950022, "grad_norm": 3.5208097322876957, "learning_rate": 1.0401035881613611e-06, "loss": 1.0381, "step": 79500 }, { "epoch": 2.344095532950022, "eval_loss": 1.083081603050232, "eval_runtime": 4.2728, "eval_samples_per_second": 92.679, "eval_steps_per_second": 3.043, "step": 79500 }, { "epoch": 2.3442429603420316, "grad_norm": 3.3531337000778323, "learning_rate": 1.0396589786073394e-06, "loss": 1.0085, "step": 79505 }, { "epoch": 2.344390387734041, "grad_norm": 3.5423282455534437, "learning_rate": 1.0392144488081972e-06, "loss": 1.0327, "step": 79510 }, { "epoch": 2.3445378151260505, "grad_norm": 3.5458534390522463, "learning_rate": 1.038769998777015e-06, "loss": 1.0475, "step": 79515 }, { "epoch": 2.34468524251806, "grad_norm": 3.5916420396957216, "learning_rate": 1.0383256285268717e-06, "loss": 1.0341, "step": 79520 }, { "epoch": 2.3448326699100694, "grad_norm": 3.50440760661455, "learning_rate": 1.0378813380708443e-06, "loss": 1.0872, "step": 79525 }, { "epoch": 2.344980097302079, "grad_norm": 3.517583589532331, "learning_rate": 1.0374371274220032e-06, "loss": 1.0824, "step": 79530 }, { "epoch": 2.345127524694088, "grad_norm": 3.5592410379287993, "learning_rate": 1.0369929965934245e-06, "loss": 1.059, "step": 79535 }, { "epoch": 2.345274952086098, "grad_norm": 3.507258321029101, "learning_rate": 1.0365489455981731e-06, "loss": 1.0864, "step": 79540 }, { "epoch": 2.345422379478107, "grad_norm": 3.4526855871113225, "learning_rate": 1.0361049744493177e-06, "loss": 1.009, "step": 79545 }, { "epoch": 2.3455698068701163, "grad_norm": 3.326047567354546, "learning_rate": 1.0356610831599237e-06, "loss": 1.0377, "step": 79550 }, { "epoch": 2.3457172342621257, "grad_norm": 3.6302063661743245, "learning_rate": 1.0352172717430492e-06, "loss": 1.0422, "step": 79555 }, { "epoch": 2.345864661654135, "grad_norm": 3.514253578332904, "learning_rate": 1.0347735402117593e-06, "loss": 1.0122, "step": 79560 }, { "epoch": 2.3460120890461447, "grad_norm": 3.4219729811377864, "learning_rate": 1.0343298885791056e-06, "loss": 1.0776, "step": 79565 }, { "epoch": 2.346159516438154, "grad_norm": 3.6396339091076215, "learning_rate": 1.0338863168581484e-06, "loss": 1.0537, "step": 79570 }, { "epoch": 2.3463069438301636, "grad_norm": 3.634176829211458, "learning_rate": 1.0334428250619363e-06, "loss": 1.0739, "step": 79575 }, { "epoch": 2.346454371222173, "grad_norm": 3.4741422038203686, "learning_rate": 1.0329994132035215e-06, "loss": 1.0616, "step": 79580 }, { "epoch": 2.3466017986141825, "grad_norm": 3.3541338119262663, "learning_rate": 1.0325560812959513e-06, "loss": 1.0272, "step": 79585 }, { "epoch": 2.346749226006192, "grad_norm": 3.445009050535526, "learning_rate": 1.0321128293522711e-06, "loss": 1.0435, "step": 79590 }, { "epoch": 2.3468966533982014, "grad_norm": 3.4231212644148905, "learning_rate": 1.031669657385525e-06, "loss": 1.0079, "step": 79595 }, { "epoch": 2.347044080790211, "grad_norm": 3.6123909426083927, "learning_rate": 1.0312265654087543e-06, "loss": 1.0519, "step": 79600 }, { "epoch": 2.3471915081822203, "grad_norm": 3.4939248966026137, "learning_rate": 1.0307835534349947e-06, "loss": 1.0688, "step": 79605 }, { "epoch": 2.34733893557423, "grad_norm": 3.5334890865359596, "learning_rate": 1.0303406214772846e-06, "loss": 1.0528, "step": 79610 }, { "epoch": 2.3474863629662392, "grad_norm": 3.562110432494837, "learning_rate": 1.0298977695486573e-06, "loss": 1.065, "step": 79615 }, { "epoch": 2.3476337903582487, "grad_norm": 3.4648254468140185, "learning_rate": 1.0294549976621436e-06, "loss": 1.0799, "step": 79620 }, { "epoch": 2.347781217750258, "grad_norm": 3.5715672625224473, "learning_rate": 1.0290123058307747e-06, "loss": 1.0386, "step": 79625 }, { "epoch": 2.3479286451422676, "grad_norm": 3.5720185533532316, "learning_rate": 1.0285696940675733e-06, "loss": 1.0536, "step": 79630 }, { "epoch": 2.348076072534277, "grad_norm": 3.624845134307419, "learning_rate": 1.0281271623855686e-06, "loss": 1.0906, "step": 79635 }, { "epoch": 2.348223499926286, "grad_norm": 3.442354271520297, "learning_rate": 1.027684710797778e-06, "loss": 1.0493, "step": 79640 }, { "epoch": 2.3483709273182956, "grad_norm": 3.3945259429773573, "learning_rate": 1.0272423393172253e-06, "loss": 1.0375, "step": 79645 }, { "epoch": 2.348518354710305, "grad_norm": 3.5364987822162974, "learning_rate": 1.0268000479569248e-06, "loss": 1.0689, "step": 79650 }, { "epoch": 2.3486657821023145, "grad_norm": 3.558743788681179, "learning_rate": 1.0263578367298925e-06, "loss": 1.0459, "step": 79655 }, { "epoch": 2.348813209494324, "grad_norm": 3.4598044350705175, "learning_rate": 1.0259157056491407e-06, "loss": 1.041, "step": 79660 }, { "epoch": 2.3489606368863334, "grad_norm": 3.5529308016866366, "learning_rate": 1.0254736547276804e-06, "loss": 1.0534, "step": 79665 }, { "epoch": 2.349108064278343, "grad_norm": 3.260410632824712, "learning_rate": 1.0250316839785195e-06, "loss": 1.0427, "step": 79670 }, { "epoch": 2.3492554916703523, "grad_norm": 3.5926929182293095, "learning_rate": 1.024589793414662e-06, "loss": 1.0866, "step": 79675 }, { "epoch": 2.3494029190623618, "grad_norm": 3.3677267432555333, "learning_rate": 1.0241479830491122e-06, "loss": 1.0606, "step": 79680 }, { "epoch": 2.349550346454371, "grad_norm": 3.5238247261071662, "learning_rate": 1.02370625289487e-06, "loss": 1.0825, "step": 79685 }, { "epoch": 2.3496977738463807, "grad_norm": 3.5618898984389813, "learning_rate": 1.023264602964935e-06, "loss": 1.0635, "step": 79690 }, { "epoch": 2.34984520123839, "grad_norm": 3.427406353697334, "learning_rate": 1.022823033272302e-06, "loss": 1.0481, "step": 79695 }, { "epoch": 2.3499926286303996, "grad_norm": 3.533889523675658, "learning_rate": 1.022381543829966e-06, "loss": 1.0497, "step": 79700 }, { "epoch": 2.350140056022409, "grad_norm": 3.5785089831817407, "learning_rate": 1.0219401346509173e-06, "loss": 1.0914, "step": 79705 }, { "epoch": 2.3502874834144185, "grad_norm": 3.4550230549134864, "learning_rate": 1.0214988057481466e-06, "loss": 1.0044, "step": 79710 }, { "epoch": 2.350434910806428, "grad_norm": 3.3639549106705458, "learning_rate": 1.0210575571346367e-06, "loss": 1.0412, "step": 79715 }, { "epoch": 2.3505823381984374, "grad_norm": 3.457863179432258, "learning_rate": 1.0206163888233766e-06, "loss": 1.0442, "step": 79720 }, { "epoch": 2.350729765590447, "grad_norm": 3.4144810128635004, "learning_rate": 1.0201753008273447e-06, "loss": 1.0469, "step": 79725 }, { "epoch": 2.3508771929824563, "grad_norm": 3.3895778316913585, "learning_rate": 1.0197342931595219e-06, "loss": 1.069, "step": 79730 }, { "epoch": 2.3510246203744654, "grad_norm": 3.4737181934596806, "learning_rate": 1.0192933658328865e-06, "loss": 1.0351, "step": 79735 }, { "epoch": 2.3511720477664753, "grad_norm": 3.57943299282404, "learning_rate": 1.018852518860409e-06, "loss": 1.0344, "step": 79740 }, { "epoch": 2.3513194751584843, "grad_norm": 3.499868386230626, "learning_rate": 1.0184117522550674e-06, "loss": 1.0602, "step": 79745 }, { "epoch": 2.3514669025504937, "grad_norm": 3.5753171725829977, "learning_rate": 1.0179710660298284e-06, "loss": 1.0469, "step": 79750 }, { "epoch": 2.351614329942503, "grad_norm": 3.579402600429488, "learning_rate": 1.01753046019766e-06, "loss": 1.082, "step": 79755 }, { "epoch": 2.3517617573345126, "grad_norm": 3.4251420842130087, "learning_rate": 1.0170899347715279e-06, "loss": 1.0992, "step": 79760 }, { "epoch": 2.351909184726522, "grad_norm": 3.5088796247505156, "learning_rate": 1.0166494897643951e-06, "loss": 1.0045, "step": 79765 }, { "epoch": 2.3520566121185316, "grad_norm": 3.489912595951346, "learning_rate": 1.0162091251892228e-06, "loss": 1.0169, "step": 79770 }, { "epoch": 2.352204039510541, "grad_norm": 3.3500714765129604, "learning_rate": 1.0157688410589681e-06, "loss": 1.0547, "step": 79775 }, { "epoch": 2.3523514669025505, "grad_norm": 3.49663092151877, "learning_rate": 1.0153286373865876e-06, "loss": 1.0473, "step": 79780 }, { "epoch": 2.35249889429456, "grad_norm": 3.3191190272434077, "learning_rate": 1.0148885141850355e-06, "loss": 1.0557, "step": 79785 }, { "epoch": 2.3526463216865694, "grad_norm": 3.4135403466771512, "learning_rate": 1.01444847146726e-06, "loss": 1.054, "step": 79790 }, { "epoch": 2.352793749078579, "grad_norm": 3.4235654054546876, "learning_rate": 1.0140085092462143e-06, "loss": 1.0564, "step": 79795 }, { "epoch": 2.3529411764705883, "grad_norm": 3.6258636406582063, "learning_rate": 1.013568627534841e-06, "loss": 1.0523, "step": 79800 }, { "epoch": 2.3530886038625978, "grad_norm": 3.5148852068272944, "learning_rate": 1.0131288263460854e-06, "loss": 1.0392, "step": 79805 }, { "epoch": 2.3532360312546072, "grad_norm": 3.359012616598829, "learning_rate": 1.0126891056928908e-06, "loss": 0.9878, "step": 79810 }, { "epoch": 2.3533834586466167, "grad_norm": 3.4908750710972205, "learning_rate": 1.0122494655881924e-06, "loss": 1.0286, "step": 79815 }, { "epoch": 2.353530886038626, "grad_norm": 3.551587977388942, "learning_rate": 1.0118099060449319e-06, "loss": 1.0352, "step": 79820 }, { "epoch": 2.3536783134306356, "grad_norm": 3.400860011543939, "learning_rate": 1.0113704270760394e-06, "loss": 1.0217, "step": 79825 }, { "epoch": 2.3538257408226446, "grad_norm": 3.3365071616750095, "learning_rate": 1.0109310286944519e-06, "loss": 1.0517, "step": 79830 }, { "epoch": 2.3539731682146545, "grad_norm": 3.446543409893217, "learning_rate": 1.010491710913095e-06, "loss": 1.0229, "step": 79835 }, { "epoch": 2.3541205956066635, "grad_norm": 3.459154939658518, "learning_rate": 1.0100524737448979e-06, "loss": 1.0232, "step": 79840 }, { "epoch": 2.354268022998673, "grad_norm": 3.518324878834656, "learning_rate": 1.0096133172027854e-06, "loss": 1.0649, "step": 79845 }, { "epoch": 2.3544154503906825, "grad_norm": 3.630399156490573, "learning_rate": 1.00917424129968e-06, "loss": 1.0612, "step": 79850 }, { "epoch": 2.354562877782692, "grad_norm": 3.4875710711509202, "learning_rate": 1.0087352460485025e-06, "loss": 1.05, "step": 79855 }, { "epoch": 2.3547103051747014, "grad_norm": 3.3981143039077155, "learning_rate": 1.0082963314621715e-06, "loss": 1.0538, "step": 79860 }, { "epoch": 2.354857732566711, "grad_norm": 3.451413897312544, "learning_rate": 1.0078574975536008e-06, "loss": 1.0376, "step": 79865 }, { "epoch": 2.3550051599587203, "grad_norm": 3.499606849168255, "learning_rate": 1.0074187443357045e-06, "loss": 1.0424, "step": 79870 }, { "epoch": 2.3551525873507297, "grad_norm": 3.578555754890898, "learning_rate": 1.006980071821393e-06, "loss": 1.0567, "step": 79875 }, { "epoch": 2.355300014742739, "grad_norm": 3.40446002008608, "learning_rate": 1.006541480023575e-06, "loss": 1.0276, "step": 79880 }, { "epoch": 2.3554474421347487, "grad_norm": 3.5225158391949454, "learning_rate": 1.0061029689551581e-06, "loss": 1.0253, "step": 79885 }, { "epoch": 2.355594869526758, "grad_norm": 3.4450582130913134, "learning_rate": 1.0056645386290425e-06, "loss": 1.0494, "step": 79890 }, { "epoch": 2.3557422969187676, "grad_norm": 3.411976799561073, "learning_rate": 1.0052261890581335e-06, "loss": 1.0676, "step": 79895 }, { "epoch": 2.355889724310777, "grad_norm": 3.484616475453257, "learning_rate": 1.0047879202553255e-06, "loss": 1.0375, "step": 79900 }, { "epoch": 2.3560371517027865, "grad_norm": 3.5968448824587793, "learning_rate": 1.0043497322335207e-06, "loss": 1.0526, "step": 79905 }, { "epoch": 2.356184579094796, "grad_norm": 3.5149873896306865, "learning_rate": 1.0039116250056086e-06, "loss": 1.0315, "step": 79910 }, { "epoch": 2.3563320064868054, "grad_norm": 3.655462198596131, "learning_rate": 1.0034735985844823e-06, "loss": 1.0739, "step": 79915 }, { "epoch": 2.356479433878815, "grad_norm": 3.431778408041996, "learning_rate": 1.003035652983032e-06, "loss": 1.0466, "step": 79920 }, { "epoch": 2.356626861270824, "grad_norm": 3.404798092922349, "learning_rate": 1.0025977882141445e-06, "loss": 1.0445, "step": 79925 }, { "epoch": 2.356774288662834, "grad_norm": 3.4919384308320556, "learning_rate": 1.002160004290704e-06, "loss": 1.0805, "step": 79930 }, { "epoch": 2.356921716054843, "grad_norm": 3.320150769657985, "learning_rate": 1.0017223012255943e-06, "loss": 1.0549, "step": 79935 }, { "epoch": 2.3570691434468523, "grad_norm": 3.5361784423222566, "learning_rate": 1.0012846790316926e-06, "loss": 1.0523, "step": 79940 }, { "epoch": 2.3572165708388617, "grad_norm": 3.489286849605307, "learning_rate": 1.0008471377218783e-06, "loss": 1.054, "step": 79945 }, { "epoch": 2.357363998230871, "grad_norm": 3.6094307847624685, "learning_rate": 1.000409677309026e-06, "loss": 1.0669, "step": 79950 }, { "epoch": 2.3575114256228806, "grad_norm": 3.743233204563706, "learning_rate": 9.99972297806009e-07, "loss": 1.0725, "step": 79955 }, { "epoch": 2.35765885301489, "grad_norm": 3.5294704971785222, "learning_rate": 9.995349992256971e-07, "loss": 1.0135, "step": 79960 }, { "epoch": 2.3578062804068995, "grad_norm": 3.3933242751283563, "learning_rate": 9.990977815809587e-07, "loss": 1.0103, "step": 79965 }, { "epoch": 2.357953707798909, "grad_norm": 3.615017352400456, "learning_rate": 9.9866064488466e-07, "loss": 1.0565, "step": 79970 }, { "epoch": 2.3581011351909185, "grad_norm": 3.4327513320686314, "learning_rate": 9.982235891496616e-07, "loss": 1.0244, "step": 79975 }, { "epoch": 2.358248562582928, "grad_norm": 3.4244809773620313, "learning_rate": 9.977866143888285e-07, "loss": 1.0377, "step": 79980 }, { "epoch": 2.3583959899749374, "grad_norm": 3.5003579203966115, "learning_rate": 9.973497206150161e-07, "loss": 1.0544, "step": 79985 }, { "epoch": 2.358543417366947, "grad_norm": 3.405904883281468, "learning_rate": 9.96912907841081e-07, "loss": 1.0684, "step": 79990 }, { "epoch": 2.3586908447589563, "grad_norm": 3.394222859627472, "learning_rate": 9.964761760798772e-07, "loss": 1.0497, "step": 79995 }, { "epoch": 2.3588382721509658, "grad_norm": 3.322128573397365, "learning_rate": 9.960395253442566e-07, "loss": 1.0179, "step": 80000 }, { "epoch": 2.3588382721509658, "eval_loss": 1.0821781158447266, "eval_runtime": 4.1974, "eval_samples_per_second": 94.343, "eval_steps_per_second": 3.097, "step": 80000 }, { "epoch": 2.358985699542975, "grad_norm": 3.4626907701493788, "learning_rate": 9.95602955647068e-07, "loss": 1.053, "step": 80005 }, { "epoch": 2.3591331269349847, "grad_norm": 3.486421559445419, "learning_rate": 9.951664670011573e-07, "loss": 1.0243, "step": 80010 }, { "epoch": 2.359280554326994, "grad_norm": 3.69546094448709, "learning_rate": 9.947300594193686e-07, "loss": 1.0628, "step": 80015 }, { "epoch": 2.359427981719003, "grad_norm": 3.5804087399119977, "learning_rate": 9.942937329145439e-07, "loss": 1.0551, "step": 80020 }, { "epoch": 2.359575409111013, "grad_norm": 3.377702708542423, "learning_rate": 9.938574874995233e-07, "loss": 0.9986, "step": 80025 }, { "epoch": 2.359722836503022, "grad_norm": 3.4858237839245616, "learning_rate": 9.934213231871429e-07, "loss": 1.0451, "step": 80030 }, { "epoch": 2.3598702638950315, "grad_norm": 3.4941592924765503, "learning_rate": 9.929852399902378e-07, "loss": 1.026, "step": 80035 }, { "epoch": 2.360017691287041, "grad_norm": 3.5203358661073882, "learning_rate": 9.925492379216405e-07, "loss": 1.0318, "step": 80040 }, { "epoch": 2.3601651186790504, "grad_norm": 3.3781299776900013, "learning_rate": 9.921133169941816e-07, "loss": 1.0606, "step": 80045 }, { "epoch": 2.36031254607106, "grad_norm": 3.3112008365652366, "learning_rate": 9.91677477220685e-07, "loss": 1.0393, "step": 80050 }, { "epoch": 2.3604599734630693, "grad_norm": 3.6078540504760914, "learning_rate": 9.91241718613981e-07, "loss": 1.0577, "step": 80055 }, { "epoch": 2.360607400855079, "grad_norm": 3.557328952208101, "learning_rate": 9.908060411868882e-07, "loss": 1.0745, "step": 80060 }, { "epoch": 2.3607548282470883, "grad_norm": 3.471100437830449, "learning_rate": 9.90370444952228e-07, "loss": 1.095, "step": 80065 }, { "epoch": 2.3609022556390977, "grad_norm": 3.5693193440147137, "learning_rate": 9.899349299228202e-07, "loss": 1.0599, "step": 80070 }, { "epoch": 2.361049683031107, "grad_norm": 3.3352968726501686, "learning_rate": 9.894994961114764e-07, "loss": 1.0312, "step": 80075 }, { "epoch": 2.3611971104231166, "grad_norm": 3.7005991183730407, "learning_rate": 9.890641435310139e-07, "loss": 1.087, "step": 80080 }, { "epoch": 2.361344537815126, "grad_norm": 3.469268666857058, "learning_rate": 9.8862887219424e-07, "loss": 1.0402, "step": 80085 }, { "epoch": 2.3614919652071356, "grad_norm": 3.4675894856213456, "learning_rate": 9.881936821139669e-07, "loss": 1.044, "step": 80090 }, { "epoch": 2.361639392599145, "grad_norm": 3.300533836015407, "learning_rate": 9.87758573302997e-07, "loss": 1.0305, "step": 80095 }, { "epoch": 2.3617868199911545, "grad_norm": 3.286553168301474, "learning_rate": 9.87323545774135e-07, "loss": 1.008, "step": 80100 }, { "epoch": 2.361934247383164, "grad_norm": 3.610117924181888, "learning_rate": 9.868885995401828e-07, "loss": 1.0568, "step": 80105 }, { "epoch": 2.3620816747751734, "grad_norm": 3.4529892965195805, "learning_rate": 9.864537346139383e-07, "loss": 1.0807, "step": 80110 }, { "epoch": 2.362229102167183, "grad_norm": 3.489661326478058, "learning_rate": 9.860189510081986e-07, "loss": 1.0684, "step": 80115 }, { "epoch": 2.3623765295591923, "grad_norm": 3.6891517301298715, "learning_rate": 9.855842487357584e-07, "loss": 1.0683, "step": 80120 }, { "epoch": 2.3625239569512013, "grad_norm": 3.526879390713227, "learning_rate": 9.85149627809406e-07, "loss": 1.0523, "step": 80125 }, { "epoch": 2.362671384343211, "grad_norm": 3.6251628549342723, "learning_rate": 9.847150882419354e-07, "loss": 1.0938, "step": 80130 }, { "epoch": 2.3628188117352202, "grad_norm": 3.474096463027331, "learning_rate": 9.842806300461293e-07, "loss": 1.0169, "step": 80135 }, { "epoch": 2.3629662391272297, "grad_norm": 3.588730615259956, "learning_rate": 9.838462532347742e-07, "loss": 1.0594, "step": 80140 }, { "epoch": 2.363113666519239, "grad_norm": 3.4489560844025045, "learning_rate": 9.83411957820653e-07, "loss": 1.0513, "step": 80145 }, { "epoch": 2.3632610939112486, "grad_norm": 3.6723501386076585, "learning_rate": 9.829777438165417e-07, "loss": 1.0254, "step": 80150 }, { "epoch": 2.363408521303258, "grad_norm": 3.5220959685428386, "learning_rate": 9.82543611235222e-07, "loss": 0.9968, "step": 80155 }, { "epoch": 2.3635559486952675, "grad_norm": 3.602265026229189, "learning_rate": 9.82109560089464e-07, "loss": 1.0457, "step": 80160 }, { "epoch": 2.363703376087277, "grad_norm": 3.4496939587732762, "learning_rate": 9.816755903920458e-07, "loss": 1.0873, "step": 80165 }, { "epoch": 2.3638508034792864, "grad_norm": 3.4292405370723325, "learning_rate": 9.81241702155733e-07, "loss": 1.0141, "step": 80170 }, { "epoch": 2.363998230871296, "grad_norm": 3.2916511548150114, "learning_rate": 9.808078953932947e-07, "loss": 1.0243, "step": 80175 }, { "epoch": 2.3641456582633054, "grad_norm": 3.4448528191339993, "learning_rate": 9.803741701174963e-07, "loss": 1.0307, "step": 80180 }, { "epoch": 2.364293085655315, "grad_norm": 3.539174273453698, "learning_rate": 9.799405263411001e-07, "loss": 1.1062, "step": 80185 }, { "epoch": 2.3644405130473243, "grad_norm": 3.480215297635197, "learning_rate": 9.795069640768676e-07, "loss": 1.0818, "step": 80190 }, { "epoch": 2.3645879404393337, "grad_norm": 3.4765768235880103, "learning_rate": 9.79073483337557e-07, "loss": 1.0733, "step": 80195 }, { "epoch": 2.364735367831343, "grad_norm": 3.4945206556782242, "learning_rate": 9.78640084135922e-07, "loss": 1.0686, "step": 80200 }, { "epoch": 2.3648827952233527, "grad_norm": 3.4237877034050994, "learning_rate": 9.782067664847173e-07, "loss": 1.0572, "step": 80205 }, { "epoch": 2.365030222615362, "grad_norm": 3.590493265762038, "learning_rate": 9.777735303966934e-07, "loss": 1.0545, "step": 80210 }, { "epoch": 2.3651776500073716, "grad_norm": 3.4577722432887086, "learning_rate": 9.773403758845993e-07, "loss": 1.0513, "step": 80215 }, { "epoch": 2.3653250773993806, "grad_norm": 3.5041866988320383, "learning_rate": 9.769073029611803e-07, "loss": 1.033, "step": 80220 }, { "epoch": 2.3654725047913905, "grad_norm": 3.5626420920973296, "learning_rate": 9.764743116391806e-07, "loss": 1.0463, "step": 80225 }, { "epoch": 2.3656199321833995, "grad_norm": 3.5890494564165834, "learning_rate": 9.760414019313424e-07, "loss": 1.0606, "step": 80230 }, { "epoch": 2.365767359575409, "grad_norm": 3.3725141382966854, "learning_rate": 9.75608573850401e-07, "loss": 1.0286, "step": 80235 }, { "epoch": 2.3659147869674184, "grad_norm": 3.3756997717297725, "learning_rate": 9.75175827409098e-07, "loss": 1.0596, "step": 80240 }, { "epoch": 2.366062214359428, "grad_norm": 3.359981864105495, "learning_rate": 9.747431626201633e-07, "loss": 1.0454, "step": 80245 }, { "epoch": 2.3662096417514373, "grad_norm": 3.4296475712132657, "learning_rate": 9.743105794963303e-07, "loss": 1.0404, "step": 80250 }, { "epoch": 2.366357069143447, "grad_norm": 3.5607739904162212, "learning_rate": 9.738780780503282e-07, "loss": 1.001, "step": 80255 }, { "epoch": 2.3665044965354562, "grad_norm": 3.549989758419855, "learning_rate": 9.734456582948832e-07, "loss": 1.0733, "step": 80260 }, { "epoch": 2.3666519239274657, "grad_norm": 3.6179653899741644, "learning_rate": 9.730133202427216e-07, "loss": 1.0392, "step": 80265 }, { "epoch": 2.366799351319475, "grad_norm": 3.454038095786911, "learning_rate": 9.725810639065627e-07, "loss": 1.0502, "step": 80270 }, { "epoch": 2.3669467787114846, "grad_norm": 3.421075844780292, "learning_rate": 9.721488892991275e-07, "loss": 1.0477, "step": 80275 }, { "epoch": 2.367094206103494, "grad_norm": 3.39454765838549, "learning_rate": 9.717167964331331e-07, "loss": 1.0339, "step": 80280 }, { "epoch": 2.3672416334955035, "grad_norm": 3.48804485353952, "learning_rate": 9.712847853212947e-07, "loss": 1.0491, "step": 80285 }, { "epoch": 2.367389060887513, "grad_norm": 3.4013215654019433, "learning_rate": 9.708528559763246e-07, "loss": 1.0523, "step": 80290 }, { "epoch": 2.3675364882795225, "grad_norm": 3.5274259156700234, "learning_rate": 9.704210084109324e-07, "loss": 1.077, "step": 80295 }, { "epoch": 2.367683915671532, "grad_norm": 3.399626193564494, "learning_rate": 9.699892426378257e-07, "loss": 1.0431, "step": 80300 }, { "epoch": 2.3678313430635414, "grad_norm": 3.575235438311426, "learning_rate": 9.695575586697115e-07, "loss": 1.081, "step": 80305 }, { "epoch": 2.367978770455551, "grad_norm": 3.5054924693717386, "learning_rate": 9.69125956519289e-07, "loss": 1.0407, "step": 80310 }, { "epoch": 2.36812619784756, "grad_norm": 3.4345729059996857, "learning_rate": 9.686944361992625e-07, "loss": 1.0504, "step": 80315 }, { "epoch": 2.3682736252395697, "grad_norm": 3.553098316069917, "learning_rate": 9.68262997722327e-07, "loss": 1.0745, "step": 80320 }, { "epoch": 2.3684210526315788, "grad_norm": 3.369196587515558, "learning_rate": 9.678316411011795e-07, "loss": 1.0141, "step": 80325 }, { "epoch": 2.368568480023588, "grad_norm": 3.413744607626567, "learning_rate": 9.67400366348514e-07, "loss": 1.0523, "step": 80330 }, { "epoch": 2.3687159074155977, "grad_norm": 3.4589203803704485, "learning_rate": 9.669691734770175e-07, "loss": 1.0378, "step": 80335 }, { "epoch": 2.368863334807607, "grad_norm": 3.439438132734044, "learning_rate": 9.665380624993833e-07, "loss": 1.0596, "step": 80340 }, { "epoch": 2.3690107621996166, "grad_norm": 3.416855830972754, "learning_rate": 9.66107033428293e-07, "loss": 1.0715, "step": 80345 }, { "epoch": 2.369158189591626, "grad_norm": 3.5078631119000074, "learning_rate": 9.656760862764345e-07, "loss": 1.0262, "step": 80350 }, { "epoch": 2.3693056169836355, "grad_norm": 3.4898817336120866, "learning_rate": 9.652452210564847e-07, "loss": 1.0947, "step": 80355 }, { "epoch": 2.369453044375645, "grad_norm": 3.5878863928047173, "learning_rate": 9.648144377811247e-07, "loss": 1.0211, "step": 80360 }, { "epoch": 2.3696004717676544, "grad_norm": 3.480976167806603, "learning_rate": 9.6438373646303e-07, "loss": 1.0402, "step": 80365 }, { "epoch": 2.369747899159664, "grad_norm": 3.4205719869372944, "learning_rate": 9.639531171148747e-07, "loss": 1.0731, "step": 80370 }, { "epoch": 2.3698953265516733, "grad_norm": 3.5590264362018678, "learning_rate": 9.635225797493303e-07, "loss": 1.0767, "step": 80375 }, { "epoch": 2.370042753943683, "grad_norm": 3.457095564736253, "learning_rate": 9.63092124379067e-07, "loss": 1.0573, "step": 80380 }, { "epoch": 2.3701901813356923, "grad_norm": 3.2925966678510505, "learning_rate": 9.62661751016748e-07, "loss": 1.0483, "step": 80385 }, { "epoch": 2.3703376087277017, "grad_norm": 3.410732348257185, "learning_rate": 9.62231459675042e-07, "loss": 1.0442, "step": 80390 }, { "epoch": 2.370485036119711, "grad_norm": 3.392914081761678, "learning_rate": 9.618012503666074e-07, "loss": 1.0676, "step": 80395 }, { "epoch": 2.3706324635117206, "grad_norm": 3.4710589772230427, "learning_rate": 9.613711231041052e-07, "loss": 1.0498, "step": 80400 }, { "epoch": 2.37077989090373, "grad_norm": 3.520655937578775, "learning_rate": 9.609410779001928e-07, "loss": 1.0386, "step": 80405 }, { "epoch": 2.370927318295739, "grad_norm": 3.7319104308252466, "learning_rate": 9.605111147675214e-07, "loss": 1.097, "step": 80410 }, { "epoch": 2.371074745687749, "grad_norm": 3.4638309227002257, "learning_rate": 9.600812337187485e-07, "loss": 1.0376, "step": 80415 }, { "epoch": 2.371222173079758, "grad_norm": 3.4231763141658687, "learning_rate": 9.596514347665183e-07, "loss": 1.0578, "step": 80420 }, { "epoch": 2.3713696004717675, "grad_norm": 3.5029516209993328, "learning_rate": 9.592217179234831e-07, "loss": 1.0387, "step": 80425 }, { "epoch": 2.371517027863777, "grad_norm": 3.59008674681986, "learning_rate": 9.587920832022848e-07, "loss": 1.0404, "step": 80430 }, { "epoch": 2.3716644552557864, "grad_norm": 3.442975375313745, "learning_rate": 9.583625306155668e-07, "loss": 1.0637, "step": 80435 }, { "epoch": 2.371811882647796, "grad_norm": 3.4215983530318823, "learning_rate": 9.57933060175969e-07, "loss": 1.0408, "step": 80440 }, { "epoch": 2.3719593100398053, "grad_norm": 3.3119192204308856, "learning_rate": 9.575036718961287e-07, "loss": 1.0498, "step": 80445 }, { "epoch": 2.3721067374318148, "grad_norm": 3.3780169035705625, "learning_rate": 9.570743657886819e-07, "loss": 1.0306, "step": 80450 }, { "epoch": 2.3722541648238242, "grad_norm": 3.606124858692991, "learning_rate": 9.56645141866262e-07, "loss": 1.0741, "step": 80455 }, { "epoch": 2.3724015922158337, "grad_norm": 3.3767527700620845, "learning_rate": 9.562160001414978e-07, "loss": 1.0333, "step": 80460 }, { "epoch": 2.372549019607843, "grad_norm": 3.3882931069593076, "learning_rate": 9.557869406270175e-07, "loss": 1.0815, "step": 80465 }, { "epoch": 2.3726964469998526, "grad_norm": 3.618465632709425, "learning_rate": 9.553579633354474e-07, "loss": 1.0678, "step": 80470 }, { "epoch": 2.372843874391862, "grad_norm": 3.480504012027553, "learning_rate": 9.5492906827941e-07, "loss": 1.0378, "step": 80475 }, { "epoch": 2.3729913017838715, "grad_norm": 3.682127519315504, "learning_rate": 9.545002554715266e-07, "loss": 1.0705, "step": 80480 }, { "epoch": 2.373138729175881, "grad_norm": 3.2617429135016156, "learning_rate": 9.54071524924415e-07, "loss": 1.0724, "step": 80485 }, { "epoch": 2.3732861565678904, "grad_norm": 3.434768459288996, "learning_rate": 9.53642876650693e-07, "loss": 1.0237, "step": 80490 }, { "epoch": 2.3734335839599, "grad_norm": 3.4858557390897946, "learning_rate": 9.532143106629701e-07, "loss": 1.0295, "step": 80495 }, { "epoch": 2.3735810113519094, "grad_norm": 3.756592323909315, "learning_rate": 9.527858269738622e-07, "loss": 1.0524, "step": 80500 }, { "epoch": 2.3735810113519094, "eval_loss": 1.0820695161819458, "eval_runtime": 4.2747, "eval_samples_per_second": 92.638, "eval_steps_per_second": 3.041, "step": 80500 }, { "epoch": 2.373728438743919, "grad_norm": 3.5214628333059097, "learning_rate": 9.523574255959739e-07, "loss": 1.0392, "step": 80505 }, { "epoch": 2.3738758661359283, "grad_norm": 3.63075708064557, "learning_rate": 9.519291065419132e-07, "loss": 1.1152, "step": 80510 }, { "epoch": 2.3740232935279373, "grad_norm": 3.4441655833197187, "learning_rate": 9.515008698242831e-07, "loss": 1.0657, "step": 80515 }, { "epoch": 2.3741707209199467, "grad_norm": 3.2805226468261286, "learning_rate": 9.510727154556862e-07, "loss": 1.0036, "step": 80520 }, { "epoch": 2.374318148311956, "grad_norm": 3.400359130138543, "learning_rate": 9.506446434487212e-07, "loss": 1.0413, "step": 80525 }, { "epoch": 2.3744655757039657, "grad_norm": 3.393452026452076, "learning_rate": 9.502166538159833e-07, "loss": 1.0674, "step": 80530 }, { "epoch": 2.374613003095975, "grad_norm": 3.5173429986333726, "learning_rate": 9.497887465700669e-07, "loss": 1.0573, "step": 80535 }, { "epoch": 2.3747604304879846, "grad_norm": 3.482074594533421, "learning_rate": 9.493609217235645e-07, "loss": 1.0243, "step": 80540 }, { "epoch": 2.374907857879994, "grad_norm": 3.4989836879952287, "learning_rate": 9.489331792890651e-07, "loss": 0.9836, "step": 80545 }, { "epoch": 2.3750552852720035, "grad_norm": 3.5451637176781565, "learning_rate": 9.485055192791554e-07, "loss": 1.0248, "step": 80550 }, { "epoch": 2.375202712664013, "grad_norm": 3.382595664771664, "learning_rate": 9.480779417064194e-07, "loss": 0.9925, "step": 80555 }, { "epoch": 2.3753501400560224, "grad_norm": 3.5641527505707997, "learning_rate": 9.476504465834398e-07, "loss": 1.0769, "step": 80560 }, { "epoch": 2.375497567448032, "grad_norm": 3.584165618150038, "learning_rate": 9.472230339227969e-07, "loss": 1.0562, "step": 80565 }, { "epoch": 2.3756449948400413, "grad_norm": 3.4137380828748842, "learning_rate": 9.467957037370639e-07, "loss": 1.0299, "step": 80570 }, { "epoch": 2.375792422232051, "grad_norm": 3.585079768023344, "learning_rate": 9.463684560388211e-07, "loss": 1.0865, "step": 80575 }, { "epoch": 2.3759398496240602, "grad_norm": 3.5466062259457796, "learning_rate": 9.459412908406363e-07, "loss": 1.0266, "step": 80580 }, { "epoch": 2.3760872770160697, "grad_norm": 3.749471540190886, "learning_rate": 9.455142081550812e-07, "loss": 1.0513, "step": 80585 }, { "epoch": 2.376234704408079, "grad_norm": 3.474800266201657, "learning_rate": 9.45087207994724e-07, "loss": 1.0357, "step": 80590 }, { "epoch": 2.3763821318000886, "grad_norm": 3.569185492848024, "learning_rate": 9.446602903721263e-07, "loss": 1.0909, "step": 80595 }, { "epoch": 2.376529559192098, "grad_norm": 3.453219423840314, "learning_rate": 9.442334552998552e-07, "loss": 1.0304, "step": 80600 }, { "epoch": 2.3766769865841075, "grad_norm": 3.5131211378976333, "learning_rate": 9.438067027904661e-07, "loss": 1.0267, "step": 80605 }, { "epoch": 2.3768244139761165, "grad_norm": 3.474156132637099, "learning_rate": 9.433800328565213e-07, "loss": 1.0216, "step": 80610 }, { "epoch": 2.376971841368126, "grad_norm": 3.664479213284997, "learning_rate": 9.42953445510573e-07, "loss": 1.0545, "step": 80615 }, { "epoch": 2.3771192687601355, "grad_norm": 3.552370034123213, "learning_rate": 9.425269407651749e-07, "loss": 1.0366, "step": 80620 }, { "epoch": 2.377266696152145, "grad_norm": 3.455921768386126, "learning_rate": 9.421005186328772e-07, "loss": 1.0412, "step": 80625 }, { "epoch": 2.3774141235441544, "grad_norm": 3.3696139524096247, "learning_rate": 9.41674179126228e-07, "loss": 1.0643, "step": 80630 }, { "epoch": 2.377561550936164, "grad_norm": 3.5052629668359816, "learning_rate": 9.412479222577731e-07, "loss": 1.0678, "step": 80635 }, { "epoch": 2.3777089783281733, "grad_norm": 3.268465169662659, "learning_rate": 9.408217480400564e-07, "loss": 1.0848, "step": 80640 }, { "epoch": 2.3778564057201828, "grad_norm": 3.428925323613676, "learning_rate": 9.40395656485615e-07, "loss": 1.066, "step": 80645 }, { "epoch": 2.378003833112192, "grad_norm": 3.5175406743247346, "learning_rate": 9.399696476069921e-07, "loss": 1.0232, "step": 80650 }, { "epoch": 2.3781512605042017, "grad_norm": 3.49805931538501, "learning_rate": 9.395437214167197e-07, "loss": 1.0424, "step": 80655 }, { "epoch": 2.378298687896211, "grad_norm": 3.3914820894101103, "learning_rate": 9.391178779273328e-07, "loss": 1.0243, "step": 80660 }, { "epoch": 2.3784461152882206, "grad_norm": 3.498538153584086, "learning_rate": 9.386921171513632e-07, "loss": 1.053, "step": 80665 }, { "epoch": 2.37859354268023, "grad_norm": 3.4331146622241726, "learning_rate": 9.382664391013355e-07, "loss": 1.0404, "step": 80670 }, { "epoch": 2.3787409700722395, "grad_norm": 3.3129086703135826, "learning_rate": 9.37840843789781e-07, "loss": 0.9869, "step": 80675 }, { "epoch": 2.378888397464249, "grad_norm": 3.5704609508073757, "learning_rate": 9.374153312292188e-07, "loss": 1.0472, "step": 80680 }, { "epoch": 2.3790358248562584, "grad_norm": 3.3376049636465024, "learning_rate": 9.369899014321744e-07, "loss": 1.0289, "step": 80685 }, { "epoch": 2.379183252248268, "grad_norm": 3.6436776580243175, "learning_rate": 9.365645544111625e-07, "loss": 1.0374, "step": 80690 }, { "epoch": 2.3793306796402773, "grad_norm": 3.5623658334431547, "learning_rate": 9.361392901787018e-07, "loss": 1.0587, "step": 80695 }, { "epoch": 2.379478107032287, "grad_norm": 3.4837968975698197, "learning_rate": 9.357141087473053e-07, "loss": 1.0723, "step": 80700 }, { "epoch": 2.379625534424296, "grad_norm": 3.3976080114459766, "learning_rate": 9.352890101294849e-07, "loss": 1.0594, "step": 80705 }, { "epoch": 2.3797729618163057, "grad_norm": 3.779585361157395, "learning_rate": 9.348639943377496e-07, "loss": 1.1139, "step": 80710 }, { "epoch": 2.3799203892083147, "grad_norm": 3.3463924068159296, "learning_rate": 9.344390613846066e-07, "loss": 1.0203, "step": 80715 }, { "epoch": 2.380067816600324, "grad_norm": 3.5012240060782664, "learning_rate": 9.340142112825582e-07, "loss": 1.0762, "step": 80720 }, { "epoch": 2.3802152439923336, "grad_norm": 3.562821096134668, "learning_rate": 9.335894440441071e-07, "loss": 1.07, "step": 80725 }, { "epoch": 2.380362671384343, "grad_norm": 3.2767520633079164, "learning_rate": 9.331647596817529e-07, "loss": 1.0058, "step": 80730 }, { "epoch": 2.3805100987763526, "grad_norm": 3.5289104160393943, "learning_rate": 9.327401582079917e-07, "loss": 1.0732, "step": 80735 }, { "epoch": 2.380657526168362, "grad_norm": 3.5849193639899894, "learning_rate": 9.323156396353186e-07, "loss": 1.0605, "step": 80740 }, { "epoch": 2.3808049535603715, "grad_norm": 3.5156110057584877, "learning_rate": 9.318912039762253e-07, "loss": 1.0561, "step": 80745 }, { "epoch": 2.380952380952381, "grad_norm": 3.3221534009001283, "learning_rate": 9.31466851243202e-07, "loss": 1.0443, "step": 80750 }, { "epoch": 2.3810998083443904, "grad_norm": 3.627621217820648, "learning_rate": 9.310425814487332e-07, "loss": 1.0257, "step": 80755 }, { "epoch": 2.3812472357364, "grad_norm": 3.396205986810908, "learning_rate": 9.306183946053075e-07, "loss": 1.0758, "step": 80760 }, { "epoch": 2.3813946631284093, "grad_norm": 3.5867521015123507, "learning_rate": 9.301942907254036e-07, "loss": 1.0633, "step": 80765 }, { "epoch": 2.3815420905204188, "grad_norm": 3.515241878767095, "learning_rate": 9.29770269821503e-07, "loss": 1.0671, "step": 80770 }, { "epoch": 2.3816895179124282, "grad_norm": 3.6287121273512843, "learning_rate": 9.293463319060821e-07, "loss": 1.0512, "step": 80775 }, { "epoch": 2.3818369453044377, "grad_norm": 3.6334684821901395, "learning_rate": 9.289224769916166e-07, "loss": 1.1099, "step": 80780 }, { "epoch": 2.381984372696447, "grad_norm": 3.5409190674103677, "learning_rate": 9.284987050905796e-07, "loss": 1.0587, "step": 80785 }, { "epoch": 2.3821318000884566, "grad_norm": 3.568452609204336, "learning_rate": 9.280750162154389e-07, "loss": 1.0092, "step": 80790 }, { "epoch": 2.382279227480466, "grad_norm": 3.5280540244745984, "learning_rate": 9.276514103786631e-07, "loss": 1.076, "step": 80795 }, { "epoch": 2.382426654872475, "grad_norm": 3.575922158724645, "learning_rate": 9.272278875927172e-07, "loss": 1.0595, "step": 80800 }, { "epoch": 2.382574082264485, "grad_norm": 3.4319148873657537, "learning_rate": 9.268044478700642e-07, "loss": 1.0722, "step": 80805 }, { "epoch": 2.382721509656494, "grad_norm": 3.449825619045595, "learning_rate": 9.263810912231642e-07, "loss": 1.0365, "step": 80810 }, { "epoch": 2.3828689370485034, "grad_norm": 3.607739444230565, "learning_rate": 9.259578176644742e-07, "loss": 1.0591, "step": 80815 }, { "epoch": 2.383016364440513, "grad_norm": 3.417939711281949, "learning_rate": 9.255346272064506e-07, "loss": 1.0764, "step": 80820 }, { "epoch": 2.3831637918325224, "grad_norm": 3.434481229400677, "learning_rate": 9.251115198615468e-07, "loss": 1.0649, "step": 80825 }, { "epoch": 2.383311219224532, "grad_norm": 3.5658515998059905, "learning_rate": 9.246884956422101e-07, "loss": 1.0713, "step": 80830 }, { "epoch": 2.3834586466165413, "grad_norm": 3.470195023068104, "learning_rate": 9.242655545608929e-07, "loss": 1.0149, "step": 80835 }, { "epoch": 2.3836060740085507, "grad_norm": 3.378119830061914, "learning_rate": 9.238426966300371e-07, "loss": 1.0413, "step": 80840 }, { "epoch": 2.38375350140056, "grad_norm": 3.2879160311294413, "learning_rate": 9.234199218620871e-07, "loss": 1.0584, "step": 80845 }, { "epoch": 2.3839009287925697, "grad_norm": 3.606840079242774, "learning_rate": 9.229972302694846e-07, "loss": 1.0711, "step": 80850 }, { "epoch": 2.384048356184579, "grad_norm": 3.6983079947032045, "learning_rate": 9.225746218646646e-07, "loss": 1.0496, "step": 80855 }, { "epoch": 2.3841957835765886, "grad_norm": 3.4178122238721422, "learning_rate": 9.221520966600671e-07, "loss": 1.0444, "step": 80860 }, { "epoch": 2.384343210968598, "grad_norm": 3.500029320290205, "learning_rate": 9.217296546681208e-07, "loss": 1.0582, "step": 80865 }, { "epoch": 2.3844906383606075, "grad_norm": 3.443939755937822, "learning_rate": 9.213072959012615e-07, "loss": 1.0495, "step": 80870 }, { "epoch": 2.384638065752617, "grad_norm": 3.4724290555764576, "learning_rate": 9.208850203719137e-07, "loss": 1.0331, "step": 80875 }, { "epoch": 2.3847854931446264, "grad_norm": 3.3902492575036023, "learning_rate": 9.204628280925049e-07, "loss": 1.0329, "step": 80880 }, { "epoch": 2.384932920536636, "grad_norm": 3.3884976921682517, "learning_rate": 9.20040719075458e-07, "loss": 1.0451, "step": 80885 }, { "epoch": 2.3850803479286453, "grad_norm": 3.4093796144912005, "learning_rate": 9.196186933331949e-07, "loss": 1.0532, "step": 80890 }, { "epoch": 2.3852277753206543, "grad_norm": 3.417635859415735, "learning_rate": 9.191967508781334e-07, "loss": 1.0025, "step": 80895 }, { "epoch": 2.3853752027126642, "grad_norm": 3.5676863950063167, "learning_rate": 9.18774891722691e-07, "loss": 1.0406, "step": 80900 }, { "epoch": 2.3855226301046732, "grad_norm": 3.506186146295422, "learning_rate": 9.183531158792783e-07, "loss": 1.0606, "step": 80905 }, { "epoch": 2.3856700574966827, "grad_norm": 3.459418598531864, "learning_rate": 9.179314233603108e-07, "loss": 1.09, "step": 80910 }, { "epoch": 2.385817484888692, "grad_norm": 3.4956708565810173, "learning_rate": 9.175098141781936e-07, "loss": 1.0117, "step": 80915 }, { "epoch": 2.3859649122807016, "grad_norm": 3.5680283716342096, "learning_rate": 9.170882883453347e-07, "loss": 1.0038, "step": 80920 }, { "epoch": 2.386112339672711, "grad_norm": 3.3457979979205317, "learning_rate": 9.166668458741389e-07, "loss": 1.0411, "step": 80925 }, { "epoch": 2.3862597670647205, "grad_norm": 3.659869690178984, "learning_rate": 9.162454867770046e-07, "loss": 1.0673, "step": 80930 }, { "epoch": 2.38640719445673, "grad_norm": 3.426098157037799, "learning_rate": 9.158242110663345e-07, "loss": 1.038, "step": 80935 }, { "epoch": 2.3865546218487395, "grad_norm": 3.5737224841314243, "learning_rate": 9.154030187545211e-07, "loss": 1.0763, "step": 80940 }, { "epoch": 2.386702049240749, "grad_norm": 3.510918116514063, "learning_rate": 9.149819098539626e-07, "loss": 1.0185, "step": 80945 }, { "epoch": 2.3868494766327584, "grad_norm": 3.4809445382375577, "learning_rate": 9.145608843770479e-07, "loss": 1.0284, "step": 80950 }, { "epoch": 2.386996904024768, "grad_norm": 3.663842722009281, "learning_rate": 9.141399423361668e-07, "loss": 1.0402, "step": 80955 }, { "epoch": 2.3871443314167773, "grad_norm": 3.6234170983850373, "learning_rate": 9.137190837437064e-07, "loss": 1.0523, "step": 80960 }, { "epoch": 2.3872917588087867, "grad_norm": 3.4467650206885825, "learning_rate": 9.132983086120505e-07, "loss": 1.0476, "step": 80965 }, { "epoch": 2.387439186200796, "grad_norm": 3.457763957088949, "learning_rate": 9.128776169535808e-07, "loss": 1.034, "step": 80970 }, { "epoch": 2.3875866135928057, "grad_norm": 3.563802343545609, "learning_rate": 9.124570087806778e-07, "loss": 1.0708, "step": 80975 }, { "epoch": 2.387734040984815, "grad_norm": 3.52282347058998, "learning_rate": 9.120364841057169e-07, "loss": 1.0549, "step": 80980 }, { "epoch": 2.3878814683768246, "grad_norm": 3.6351374352785766, "learning_rate": 9.116160429410729e-07, "loss": 1.0326, "step": 80985 }, { "epoch": 2.388028895768834, "grad_norm": 3.492633999575312, "learning_rate": 9.111956852991177e-07, "loss": 1.0526, "step": 80990 }, { "epoch": 2.3881763231608435, "grad_norm": 3.3516295565687413, "learning_rate": 9.107754111922213e-07, "loss": 1.0267, "step": 80995 }, { "epoch": 2.3883237505528525, "grad_norm": 3.426038939015407, "learning_rate": 9.103552206327502e-07, "loss": 1.0364, "step": 81000 }, { "epoch": 2.3883237505528525, "eval_loss": 1.0818567276000977, "eval_runtime": 4.1614, "eval_samples_per_second": 95.159, "eval_steps_per_second": 3.124, "step": 81000 }, { "epoch": 2.388471177944862, "grad_norm": 3.237171986005418, "learning_rate": 9.099351136330698e-07, "loss": 1.0215, "step": 81005 }, { "epoch": 2.3886186053368714, "grad_norm": 3.6413721448588574, "learning_rate": 9.095150902055424e-07, "loss": 1.0301, "step": 81010 }, { "epoch": 2.388766032728881, "grad_norm": 3.480831354690745, "learning_rate": 9.090951503625247e-07, "loss": 1.0561, "step": 81015 }, { "epoch": 2.3889134601208903, "grad_norm": 3.3773908733315317, "learning_rate": 9.086752941163786e-07, "loss": 0.976, "step": 81020 }, { "epoch": 2.3890608875129, "grad_norm": 3.5392717408538923, "learning_rate": 9.082555214794552e-07, "loss": 1.0206, "step": 81025 }, { "epoch": 2.3892083149049093, "grad_norm": 3.6141521882552605, "learning_rate": 9.078358324641079e-07, "loss": 1.0767, "step": 81030 }, { "epoch": 2.3893557422969187, "grad_norm": 3.5382145106694125, "learning_rate": 9.074162270826869e-07, "loss": 1.0434, "step": 81035 }, { "epoch": 2.389503169688928, "grad_norm": 3.452742991274539, "learning_rate": 9.069967053475392e-07, "loss": 1.0226, "step": 81040 }, { "epoch": 2.3896505970809376, "grad_norm": 3.5533156811571156, "learning_rate": 9.065772672710107e-07, "loss": 1.0737, "step": 81045 }, { "epoch": 2.389798024472947, "grad_norm": 3.4315820869736107, "learning_rate": 9.061579128654419e-07, "loss": 1.0383, "step": 81050 }, { "epoch": 2.3899454518649565, "grad_norm": 3.5876391415632147, "learning_rate": 9.057386421431739e-07, "loss": 1.0665, "step": 81055 }, { "epoch": 2.390092879256966, "grad_norm": 3.5613321420503152, "learning_rate": 9.053194551165446e-07, "loss": 1.0611, "step": 81060 }, { "epoch": 2.3902403066489755, "grad_norm": 3.495730835534142, "learning_rate": 9.049003517978879e-07, "loss": 1.0536, "step": 81065 }, { "epoch": 2.390387734040985, "grad_norm": 3.549623223249114, "learning_rate": 9.044813321995378e-07, "loss": 1.0811, "step": 81070 }, { "epoch": 2.3905351614329944, "grad_norm": 3.546744177867147, "learning_rate": 9.040623963338231e-07, "loss": 1.0428, "step": 81075 }, { "epoch": 2.390682588825004, "grad_norm": 3.3899763970667403, "learning_rate": 9.036435442130726e-07, "loss": 1.0286, "step": 81080 }, { "epoch": 2.3908300162170133, "grad_norm": 3.478365493731305, "learning_rate": 9.03224775849612e-07, "loss": 1.0273, "step": 81085 }, { "epoch": 2.3909774436090228, "grad_norm": 3.4066090917373564, "learning_rate": 9.028060912557607e-07, "loss": 1.094, "step": 81090 }, { "epoch": 2.3911248710010318, "grad_norm": 3.4459923820398823, "learning_rate": 9.023874904438435e-07, "loss": 1.0686, "step": 81095 }, { "epoch": 2.3912722983930417, "grad_norm": 3.396357319217634, "learning_rate": 9.01968973426175e-07, "loss": 1.018, "step": 81100 }, { "epoch": 2.3914197257850507, "grad_norm": 3.383190399649581, "learning_rate": 9.015505402150715e-07, "loss": 1.0297, "step": 81105 }, { "epoch": 2.39156715317706, "grad_norm": 3.648156696304034, "learning_rate": 9.011321908228474e-07, "loss": 1.0587, "step": 81110 }, { "epoch": 2.3917145805690696, "grad_norm": 3.7191068483040852, "learning_rate": 9.007139252618091e-07, "loss": 1.0439, "step": 81115 }, { "epoch": 2.391862007961079, "grad_norm": 3.533574337454092, "learning_rate": 9.002957435442695e-07, "loss": 1.0014, "step": 81120 }, { "epoch": 2.3920094353530885, "grad_norm": 3.545937865025644, "learning_rate": 8.998776456825293e-07, "loss": 1.0559, "step": 81125 }, { "epoch": 2.392156862745098, "grad_norm": 3.5420732769597696, "learning_rate": 8.994596316888962e-07, "loss": 1.1066, "step": 81130 }, { "epoch": 2.3923042901371074, "grad_norm": 3.640626487416378, "learning_rate": 8.99041701575667e-07, "loss": 1.0724, "step": 81135 }, { "epoch": 2.392451717529117, "grad_norm": 3.382112341075995, "learning_rate": 8.986238553551412e-07, "loss": 1.0541, "step": 81140 }, { "epoch": 2.3925991449211264, "grad_norm": 3.388075935383932, "learning_rate": 8.982060930396148e-07, "loss": 1.0345, "step": 81145 }, { "epoch": 2.392746572313136, "grad_norm": 3.408912503081193, "learning_rate": 8.977884146413802e-07, "loss": 1.0158, "step": 81150 }, { "epoch": 2.3928939997051453, "grad_norm": 3.5727607265825156, "learning_rate": 8.973708201727284e-07, "loss": 1.0552, "step": 81155 }, { "epoch": 2.3930414270971547, "grad_norm": 3.5103824194297224, "learning_rate": 8.96953309645949e-07, "loss": 1.0482, "step": 81160 }, { "epoch": 2.393188854489164, "grad_norm": 3.5756051208895894, "learning_rate": 8.965358830733239e-07, "loss": 1.061, "step": 81165 }, { "epoch": 2.3933362818811736, "grad_norm": 3.4677183152751545, "learning_rate": 8.96118540467141e-07, "loss": 1.0344, "step": 81170 }, { "epoch": 2.393483709273183, "grad_norm": 3.4425000896284836, "learning_rate": 8.957012818396775e-07, "loss": 1.0377, "step": 81175 }, { "epoch": 2.3936311366651926, "grad_norm": 3.4735624735842845, "learning_rate": 8.952841072032135e-07, "loss": 1.0743, "step": 81180 }, { "epoch": 2.393778564057202, "grad_norm": 3.3976621157093834, "learning_rate": 8.948670165700251e-07, "loss": 1.0631, "step": 81185 }, { "epoch": 2.393925991449211, "grad_norm": 3.3922929699226314, "learning_rate": 8.94450009952383e-07, "loss": 1.0456, "step": 81190 }, { "epoch": 2.394073418841221, "grad_norm": 3.500685493966033, "learning_rate": 8.940330873625618e-07, "loss": 1.0948, "step": 81195 }, { "epoch": 2.39422084623323, "grad_norm": 3.5750527188085464, "learning_rate": 8.936162488128267e-07, "loss": 1.0548, "step": 81200 }, { "epoch": 2.3943682736252394, "grad_norm": 3.6240046309314704, "learning_rate": 8.931994943154466e-07, "loss": 0.9941, "step": 81205 }, { "epoch": 2.394515701017249, "grad_norm": 3.4983972526292915, "learning_rate": 8.927828238826827e-07, "loss": 1.0249, "step": 81210 }, { "epoch": 2.3946631284092583, "grad_norm": 3.399432711542034, "learning_rate": 8.923662375267967e-07, "loss": 1.0601, "step": 81215 }, { "epoch": 2.394810555801268, "grad_norm": 3.5405712608974866, "learning_rate": 8.919497352600477e-07, "loss": 1.0647, "step": 81220 }, { "epoch": 2.3949579831932772, "grad_norm": 3.3441978524329707, "learning_rate": 8.915333170946907e-07, "loss": 1.0539, "step": 81225 }, { "epoch": 2.3951054105852867, "grad_norm": 3.5188162284757287, "learning_rate": 8.911169830429803e-07, "loss": 1.016, "step": 81230 }, { "epoch": 2.395252837977296, "grad_norm": 3.6379918720015825, "learning_rate": 8.907007331171681e-07, "loss": 1.0986, "step": 81235 }, { "epoch": 2.3954002653693056, "grad_norm": 3.387951919190866, "learning_rate": 8.902845673295007e-07, "loss": 1.0128, "step": 81240 }, { "epoch": 2.395547692761315, "grad_norm": 3.5221072032657887, "learning_rate": 8.898684856922254e-07, "loss": 0.995, "step": 81245 }, { "epoch": 2.3956951201533245, "grad_norm": 3.316038093362681, "learning_rate": 8.894524882175858e-07, "loss": 1.0319, "step": 81250 }, { "epoch": 2.395842547545334, "grad_norm": 3.5193967792638596, "learning_rate": 8.890365749178231e-07, "loss": 1.0343, "step": 81255 }, { "epoch": 2.3959899749373434, "grad_norm": 3.533619930435554, "learning_rate": 8.886207458051761e-07, "loss": 1.0291, "step": 81260 }, { "epoch": 2.396137402329353, "grad_norm": 3.567456854588209, "learning_rate": 8.882050008918812e-07, "loss": 1.0525, "step": 81265 }, { "epoch": 2.3962848297213624, "grad_norm": 3.6516520587814774, "learning_rate": 8.877893401901726e-07, "loss": 1.0515, "step": 81270 }, { "epoch": 2.396432257113372, "grad_norm": 3.3766298154914716, "learning_rate": 8.873737637122792e-07, "loss": 1.0572, "step": 81275 }, { "epoch": 2.3965796845053813, "grad_norm": 3.4815623225895296, "learning_rate": 8.869582714704334e-07, "loss": 0.9948, "step": 81280 }, { "epoch": 2.3967271118973903, "grad_norm": 3.851397353299758, "learning_rate": 8.86542863476859e-07, "loss": 1.0671, "step": 81285 }, { "epoch": 2.3968745392894, "grad_norm": 3.3804550912688773, "learning_rate": 8.861275397437805e-07, "loss": 1.0379, "step": 81290 }, { "epoch": 2.397021966681409, "grad_norm": 3.581725125329875, "learning_rate": 8.857123002834193e-07, "loss": 1.048, "step": 81295 }, { "epoch": 2.3971693940734187, "grad_norm": 3.5934608813229665, "learning_rate": 8.852971451079941e-07, "loss": 1.0234, "step": 81300 }, { "epoch": 2.397316821465428, "grad_norm": 3.4756789828616244, "learning_rate": 8.848820742297228e-07, "loss": 1.0725, "step": 81305 }, { "epoch": 2.3974642488574376, "grad_norm": 3.337615265646357, "learning_rate": 8.844670876608173e-07, "loss": 1.0115, "step": 81310 }, { "epoch": 2.397611676249447, "grad_norm": 3.510275574202713, "learning_rate": 8.840521854134893e-07, "loss": 1.0439, "step": 81315 }, { "epoch": 2.3977591036414565, "grad_norm": 3.3290250250681255, "learning_rate": 8.836373674999488e-07, "loss": 0.9815, "step": 81320 }, { "epoch": 2.397906531033466, "grad_norm": 3.4466109126621483, "learning_rate": 8.832226339324015e-07, "loss": 1.0777, "step": 81325 }, { "epoch": 2.3980539584254754, "grad_norm": 3.3214498321559933, "learning_rate": 8.828079847230517e-07, "loss": 1.0577, "step": 81330 }, { "epoch": 2.398201385817485, "grad_norm": 3.6777370229316935, "learning_rate": 8.823934198841012e-07, "loss": 1.0326, "step": 81335 }, { "epoch": 2.3983488132094943, "grad_norm": 3.574030180079041, "learning_rate": 8.819789394277487e-07, "loss": 1.0365, "step": 81340 }, { "epoch": 2.398496240601504, "grad_norm": 3.4999958149932087, "learning_rate": 8.815645433661916e-07, "loss": 1.0465, "step": 81345 }, { "epoch": 2.3986436679935133, "grad_norm": 3.569535479614587, "learning_rate": 8.811502317116213e-07, "loss": 1.0752, "step": 81350 }, { "epoch": 2.3987910953855227, "grad_norm": 3.6515687369662557, "learning_rate": 8.807360044762334e-07, "loss": 1.0601, "step": 81355 }, { "epoch": 2.398938522777532, "grad_norm": 3.394111930244475, "learning_rate": 8.803218616722142e-07, "loss": 0.9998, "step": 81360 }, { "epoch": 2.3990859501695416, "grad_norm": 3.5446706709742934, "learning_rate": 8.799078033117508e-07, "loss": 0.9939, "step": 81365 }, { "epoch": 2.399233377561551, "grad_norm": 3.398326123294782, "learning_rate": 8.794938294070286e-07, "loss": 1.0126, "step": 81370 }, { "epoch": 2.3993808049535605, "grad_norm": 3.6930667948568243, "learning_rate": 8.790799399702261e-07, "loss": 1.0404, "step": 81375 }, { "epoch": 2.39952823234557, "grad_norm": 3.405101132862097, "learning_rate": 8.786661350135269e-07, "loss": 1.045, "step": 81380 }, { "epoch": 2.3996756597375795, "grad_norm": 3.5041545406935066, "learning_rate": 8.782524145491031e-07, "loss": 1.0537, "step": 81385 }, { "epoch": 2.3998230871295885, "grad_norm": 3.518469458475304, "learning_rate": 8.778387785891334e-07, "loss": 1.0756, "step": 81390 }, { "epoch": 2.399970514521598, "grad_norm": 3.271035811148213, "learning_rate": 8.774252271457862e-07, "loss": 1.0135, "step": 81395 }, { "epoch": 2.4001179419136074, "grad_norm": 3.581831459559767, "learning_rate": 8.770117602312318e-07, "loss": 1.0207, "step": 81400 }, { "epoch": 2.400265369305617, "grad_norm": 3.431833033917316, "learning_rate": 8.76598377857637e-07, "loss": 0.992, "step": 81405 }, { "epoch": 2.4004127966976263, "grad_norm": 3.384300864294236, "learning_rate": 8.761850800371663e-07, "loss": 1.0414, "step": 81410 }, { "epoch": 2.4005602240896358, "grad_norm": 3.579161663305183, "learning_rate": 8.757718667819807e-07, "loss": 1.0258, "step": 81415 }, { "epoch": 2.400707651481645, "grad_norm": 3.553820782736957, "learning_rate": 8.75358738104241e-07, "loss": 1.0318, "step": 81420 }, { "epoch": 2.4008550788736547, "grad_norm": 3.5193960346263005, "learning_rate": 8.749456940161013e-07, "loss": 1.0497, "step": 81425 }, { "epoch": 2.401002506265664, "grad_norm": 3.491149616048605, "learning_rate": 8.745327345297193e-07, "loss": 1.0498, "step": 81430 }, { "epoch": 2.4011499336576736, "grad_norm": 3.5138722925973274, "learning_rate": 8.741198596572438e-07, "loss": 1.0294, "step": 81435 }, { "epoch": 2.401297361049683, "grad_norm": 3.269595603440306, "learning_rate": 8.737070694108255e-07, "loss": 1.0425, "step": 81440 }, { "epoch": 2.4014447884416925, "grad_norm": 3.5270827062526133, "learning_rate": 8.732943638026124e-07, "loss": 1.0838, "step": 81445 }, { "epoch": 2.401592215833702, "grad_norm": 3.443263879665292, "learning_rate": 8.728817428447451e-07, "loss": 1.0608, "step": 81450 }, { "epoch": 2.4017396432257114, "grad_norm": 3.586492657674182, "learning_rate": 8.724692065493703e-07, "loss": 1.0634, "step": 81455 }, { "epoch": 2.401887070617721, "grad_norm": 3.4952163730758743, "learning_rate": 8.720567549286224e-07, "loss": 1.0788, "step": 81460 }, { "epoch": 2.4020344980097303, "grad_norm": 3.6050218614457714, "learning_rate": 8.716443879946429e-07, "loss": 1.0299, "step": 81465 }, { "epoch": 2.40218192540174, "grad_norm": 3.3583862468706758, "learning_rate": 8.712321057595629e-07, "loss": 1.0358, "step": 81470 }, { "epoch": 2.4023293527937493, "grad_norm": 3.2963915466126372, "learning_rate": 8.708199082355157e-07, "loss": 1.0454, "step": 81475 }, { "epoch": 2.4024767801857587, "grad_norm": 3.630142121408721, "learning_rate": 8.704077954346299e-07, "loss": 1.0854, "step": 81480 }, { "epoch": 2.4026242075777677, "grad_norm": 3.44020946329592, "learning_rate": 8.699957673690334e-07, "loss": 1.0656, "step": 81485 }, { "epoch": 2.402771634969777, "grad_norm": 3.3776317098554456, "learning_rate": 8.695838240508497e-07, "loss": 1.0404, "step": 81490 }, { "epoch": 2.4029190623617867, "grad_norm": 3.5829229902417805, "learning_rate": 8.691719654922024e-07, "loss": 1.0611, "step": 81495 }, { "epoch": 2.403066489753796, "grad_norm": 3.5466888808311654, "learning_rate": 8.687601917052082e-07, "loss": 1.0421, "step": 81500 }, { "epoch": 2.403066489753796, "eval_loss": 1.0810171365737915, "eval_runtime": 4.244, "eval_samples_per_second": 93.307, "eval_steps_per_second": 3.063, "step": 81500 }, { "epoch": 2.4032139171458056, "grad_norm": 3.4396162341868024, "learning_rate": 8.683485027019856e-07, "loss": 1.0126, "step": 81505 }, { "epoch": 2.403361344537815, "grad_norm": 3.279491673894432, "learning_rate": 8.679368984946488e-07, "loss": 1.0012, "step": 81510 }, { "epoch": 2.4035087719298245, "grad_norm": 3.598681738135966, "learning_rate": 8.675253790953097e-07, "loss": 1.0258, "step": 81515 }, { "epoch": 2.403656199321834, "grad_norm": 3.27296853899026, "learning_rate": 8.671139445160778e-07, "loss": 0.9966, "step": 81520 }, { "epoch": 2.4038036267138434, "grad_norm": 3.446915799410377, "learning_rate": 8.667025947690601e-07, "loss": 1.0495, "step": 81525 }, { "epoch": 2.403951054105853, "grad_norm": 3.4475610167998685, "learning_rate": 8.662913298663618e-07, "loss": 1.065, "step": 81530 }, { "epoch": 2.4040984814978623, "grad_norm": 3.7093230858964366, "learning_rate": 8.658801498200821e-07, "loss": 1.1009, "step": 81535 }, { "epoch": 2.4042459088898718, "grad_norm": 3.489358399270032, "learning_rate": 8.654690546423241e-07, "loss": 1.0137, "step": 81540 }, { "epoch": 2.4043933362818812, "grad_norm": 3.516008447228858, "learning_rate": 8.650580443451821e-07, "loss": 1.0161, "step": 81545 }, { "epoch": 2.4045407636738907, "grad_norm": 3.444194645405494, "learning_rate": 8.646471189407512e-07, "loss": 1.0599, "step": 81550 }, { "epoch": 2.4046881910659, "grad_norm": 3.469601200081407, "learning_rate": 8.642362784411235e-07, "loss": 1.0278, "step": 81555 }, { "epoch": 2.4048356184579096, "grad_norm": 3.2730456329603093, "learning_rate": 8.638255228583889e-07, "loss": 1.0086, "step": 81560 }, { "epoch": 2.404983045849919, "grad_norm": 3.575600431187224, "learning_rate": 8.63414852204635e-07, "loss": 1.0904, "step": 81565 }, { "epoch": 2.4051304732419285, "grad_norm": 3.4668018873745874, "learning_rate": 8.630042664919438e-07, "loss": 1.0307, "step": 81570 }, { "epoch": 2.405277900633938, "grad_norm": 3.466837470157543, "learning_rate": 8.625937657323988e-07, "loss": 1.065, "step": 81575 }, { "epoch": 2.405425328025947, "grad_norm": 3.6110185080890864, "learning_rate": 8.621833499380792e-07, "loss": 1.0377, "step": 81580 }, { "epoch": 2.405572755417957, "grad_norm": 3.4605342268178254, "learning_rate": 8.617730191210622e-07, "loss": 1.0407, "step": 81585 }, { "epoch": 2.405720182809966, "grad_norm": 3.524777562070408, "learning_rate": 8.61362773293422e-07, "loss": 1.0681, "step": 81590 }, { "epoch": 2.4058676102019754, "grad_norm": 3.6240593224836823, "learning_rate": 8.60952612467231e-07, "loss": 1.0476, "step": 81595 }, { "epoch": 2.406015037593985, "grad_norm": 3.5323027544521577, "learning_rate": 8.60542536654558e-07, "loss": 0.9968, "step": 81600 }, { "epoch": 2.4061624649859943, "grad_norm": 3.4799808602409277, "learning_rate": 8.601325458674715e-07, "loss": 1.0437, "step": 81605 }, { "epoch": 2.4063098923780037, "grad_norm": 3.4337136776680683, "learning_rate": 8.597226401180327e-07, "loss": 1.0199, "step": 81610 }, { "epoch": 2.406457319770013, "grad_norm": 3.487536885097157, "learning_rate": 8.593128194183075e-07, "loss": 1.0249, "step": 81615 }, { "epoch": 2.4066047471620227, "grad_norm": 3.307947151165586, "learning_rate": 8.589030837803524e-07, "loss": 1.0576, "step": 81620 }, { "epoch": 2.406752174554032, "grad_norm": 3.363802146907851, "learning_rate": 8.584934332162253e-07, "loss": 1.0357, "step": 81625 }, { "epoch": 2.4068996019460416, "grad_norm": 3.477287997945358, "learning_rate": 8.580838677379815e-07, "loss": 1.0607, "step": 81630 }, { "epoch": 2.407047029338051, "grad_norm": 3.611579746786804, "learning_rate": 8.576743873576706e-07, "loss": 1.0658, "step": 81635 }, { "epoch": 2.4071944567300605, "grad_norm": 3.5026754285815853, "learning_rate": 8.572649920873454e-07, "loss": 1.0178, "step": 81640 }, { "epoch": 2.40734188412207, "grad_norm": 3.492852144382359, "learning_rate": 8.568556819390484e-07, "loss": 1.0362, "step": 81645 }, { "epoch": 2.4074893115140794, "grad_norm": 3.6275165081060194, "learning_rate": 8.564464569248291e-07, "loss": 1.0478, "step": 81650 }, { "epoch": 2.407636738906089, "grad_norm": 3.3357254488147294, "learning_rate": 8.560373170567258e-07, "loss": 1.0541, "step": 81655 }, { "epoch": 2.4077841662980983, "grad_norm": 3.5473222400458813, "learning_rate": 8.556282623467792e-07, "loss": 1.0981, "step": 81660 }, { "epoch": 2.407931593690108, "grad_norm": 3.5057169470604808, "learning_rate": 8.552192928070258e-07, "loss": 1.0684, "step": 81665 }, { "epoch": 2.4080790210821172, "grad_norm": 3.507620471655813, "learning_rate": 8.548104084495001e-07, "loss": 1.0391, "step": 81670 }, { "epoch": 2.4082264484741263, "grad_norm": 3.3060961685008103, "learning_rate": 8.544016092862345e-07, "loss": 1.0019, "step": 81675 }, { "epoch": 2.408373875866136, "grad_norm": 3.4993608614989706, "learning_rate": 8.539928953292587e-07, "loss": 1.0786, "step": 81680 }, { "epoch": 2.408521303258145, "grad_norm": 3.4626273762328132, "learning_rate": 8.535842665905968e-07, "loss": 1.0394, "step": 81685 }, { "epoch": 2.4086687306501546, "grad_norm": 3.4445473776357667, "learning_rate": 8.53175723082277e-07, "loss": 1.0447, "step": 81690 }, { "epoch": 2.408816158042164, "grad_norm": 3.5055394645201354, "learning_rate": 8.527672648163189e-07, "loss": 1.084, "step": 81695 }, { "epoch": 2.4089635854341735, "grad_norm": 3.4620187153873925, "learning_rate": 8.523588918047421e-07, "loss": 1.0512, "step": 81700 }, { "epoch": 2.409111012826183, "grad_norm": 3.3967712530183563, "learning_rate": 8.519506040595644e-07, "loss": 1.0728, "step": 81705 }, { "epoch": 2.4092584402181925, "grad_norm": 3.4428513797413283, "learning_rate": 8.515424015927977e-07, "loss": 1.0125, "step": 81710 }, { "epoch": 2.409405867610202, "grad_norm": 3.4468747369299946, "learning_rate": 8.511342844164577e-07, "loss": 1.0927, "step": 81715 }, { "epoch": 2.4095532950022114, "grad_norm": 3.501617907204212, "learning_rate": 8.507262525425491e-07, "loss": 1.0305, "step": 81720 }, { "epoch": 2.409700722394221, "grad_norm": 3.556767097942223, "learning_rate": 8.503183059830835e-07, "loss": 1.0822, "step": 81725 }, { "epoch": 2.4098481497862303, "grad_norm": 3.394695968922247, "learning_rate": 8.499104447500613e-07, "loss": 1.0432, "step": 81730 }, { "epoch": 2.4099955771782398, "grad_norm": 3.600370946584797, "learning_rate": 8.495026688554864e-07, "loss": 1.0561, "step": 81735 }, { "epoch": 2.410143004570249, "grad_norm": 3.5200454541722435, "learning_rate": 8.490949783113572e-07, "loss": 1.0359, "step": 81740 }, { "epoch": 2.4102904319622587, "grad_norm": 3.4619973488942466, "learning_rate": 8.48687373129671e-07, "loss": 1.0505, "step": 81745 }, { "epoch": 2.410437859354268, "grad_norm": 3.491768461893665, "learning_rate": 8.482798533224215e-07, "loss": 1.068, "step": 81750 }, { "epoch": 2.4105852867462776, "grad_norm": 3.3935292964444383, "learning_rate": 8.47872418901602e-07, "loss": 1.0481, "step": 81755 }, { "epoch": 2.410732714138287, "grad_norm": 3.4963035587462827, "learning_rate": 8.474650698791991e-07, "loss": 1.058, "step": 81760 }, { "epoch": 2.4108801415302965, "grad_norm": 3.5362873365066627, "learning_rate": 8.470578062672014e-07, "loss": 1.0464, "step": 81765 }, { "epoch": 2.4110275689223055, "grad_norm": 3.4657602679125086, "learning_rate": 8.466506280775918e-07, "loss": 1.1045, "step": 81770 }, { "epoch": 2.4111749963143154, "grad_norm": 3.5801859298459617, "learning_rate": 8.462435353223535e-07, "loss": 1.0365, "step": 81775 }, { "epoch": 2.4113224237063244, "grad_norm": 3.548947604181605, "learning_rate": 8.458365280134644e-07, "loss": 1.0346, "step": 81780 }, { "epoch": 2.411469851098334, "grad_norm": 3.5836136343352263, "learning_rate": 8.45429606162902e-07, "loss": 1.0427, "step": 81785 }, { "epoch": 2.4116172784903434, "grad_norm": 3.470375659984306, "learning_rate": 8.450227697826408e-07, "loss": 1.0476, "step": 81790 }, { "epoch": 2.411764705882353, "grad_norm": 3.5564748836004467, "learning_rate": 8.446160188846501e-07, "loss": 1.0296, "step": 81795 }, { "epoch": 2.4119121332743623, "grad_norm": 3.546850725529361, "learning_rate": 8.442093534809023e-07, "loss": 1.063, "step": 81800 }, { "epoch": 2.4120595606663717, "grad_norm": 3.483651234066878, "learning_rate": 8.438027735833616e-07, "loss": 1.0472, "step": 81805 }, { "epoch": 2.412206988058381, "grad_norm": 3.589569998510135, "learning_rate": 8.433962792039926e-07, "loss": 1.0317, "step": 81810 }, { "epoch": 2.4123544154503906, "grad_norm": 3.49780214700514, "learning_rate": 8.429898703547574e-07, "loss": 1.0269, "step": 81815 }, { "epoch": 2.4125018428424, "grad_norm": 3.435213284817521, "learning_rate": 8.425835470476144e-07, "loss": 1.0426, "step": 81820 }, { "epoch": 2.4126492702344096, "grad_norm": 3.543423398232076, "learning_rate": 8.421773092945214e-07, "loss": 1.0793, "step": 81825 }, { "epoch": 2.412796697626419, "grad_norm": 3.5094641885546403, "learning_rate": 8.417711571074308e-07, "loss": 1.0301, "step": 81830 }, { "epoch": 2.4129441250184285, "grad_norm": 3.6894769873901865, "learning_rate": 8.413650904982948e-07, "loss": 1.0446, "step": 81835 }, { "epoch": 2.413091552410438, "grad_norm": 3.5896521192388415, "learning_rate": 8.409591094790618e-07, "loss": 1.0646, "step": 81840 }, { "epoch": 2.4132389798024474, "grad_norm": 3.4322735150792596, "learning_rate": 8.405532140616791e-07, "loss": 1.0376, "step": 81845 }, { "epoch": 2.413386407194457, "grad_norm": 3.640909653268257, "learning_rate": 8.401474042580903e-07, "loss": 1.0938, "step": 81850 }, { "epoch": 2.4135338345864663, "grad_norm": 3.5498032473905576, "learning_rate": 8.397416800802369e-07, "loss": 1.0545, "step": 81855 }, { "epoch": 2.4136812619784758, "grad_norm": 3.5316843206200055, "learning_rate": 8.393360415400578e-07, "loss": 1.048, "step": 81860 }, { "epoch": 2.4138286893704852, "grad_norm": 3.3217142312266486, "learning_rate": 8.389304886494899e-07, "loss": 1.0422, "step": 81865 }, { "epoch": 2.4139761167624947, "grad_norm": 3.4862026789339713, "learning_rate": 8.385250214204648e-07, "loss": 1.0157, "step": 81870 }, { "epoch": 2.4141235441545037, "grad_norm": 3.654038463624531, "learning_rate": 8.381196398649177e-07, "loss": 1.0102, "step": 81875 }, { "epoch": 2.414270971546513, "grad_norm": 3.4748176493498844, "learning_rate": 8.377143439947739e-07, "loss": 1.0776, "step": 81880 }, { "epoch": 2.4144183989385226, "grad_norm": 3.444222079765325, "learning_rate": 8.37309133821961e-07, "loss": 1.0392, "step": 81885 }, { "epoch": 2.414565826330532, "grad_norm": 3.4376989999945144, "learning_rate": 8.369040093584036e-07, "loss": 1.0471, "step": 81890 }, { "epoch": 2.4147132537225415, "grad_norm": 3.6343365119698543, "learning_rate": 8.364989706160206e-07, "loss": 1.0315, "step": 81895 }, { "epoch": 2.414860681114551, "grad_norm": 3.489739334572094, "learning_rate": 8.36094017606734e-07, "loss": 1.0353, "step": 81900 }, { "epoch": 2.4150081085065604, "grad_norm": 3.5613348171611454, "learning_rate": 8.35689150342456e-07, "loss": 1.0344, "step": 81905 }, { "epoch": 2.41515553589857, "grad_norm": 3.65835389946263, "learning_rate": 8.35284368835105e-07, "loss": 1.0561, "step": 81910 }, { "epoch": 2.4153029632905794, "grad_norm": 3.5616874305335338, "learning_rate": 8.348796730965885e-07, "loss": 1.0623, "step": 81915 }, { "epoch": 2.415450390682589, "grad_norm": 3.55843833253903, "learning_rate": 8.344750631388166e-07, "loss": 1.1022, "step": 81920 }, { "epoch": 2.4155978180745983, "grad_norm": 3.4783008028353457, "learning_rate": 8.340705389736948e-07, "loss": 1.0484, "step": 81925 }, { "epoch": 2.4157452454666077, "grad_norm": 3.6791053781268324, "learning_rate": 8.336661006131272e-07, "loss": 1.0806, "step": 81930 }, { "epoch": 2.415892672858617, "grad_norm": 3.591935075899503, "learning_rate": 8.332617480690145e-07, "loss": 1.0723, "step": 81935 }, { "epoch": 2.4160401002506267, "grad_norm": 3.5913734248282076, "learning_rate": 8.328574813532566e-07, "loss": 1.0013, "step": 81940 }, { "epoch": 2.416187527642636, "grad_norm": 3.5104125928807206, "learning_rate": 8.324533004777466e-07, "loss": 1.0675, "step": 81945 }, { "epoch": 2.4163349550346456, "grad_norm": 3.4958987888911772, "learning_rate": 8.320492054543814e-07, "loss": 1.0526, "step": 81950 }, { "epoch": 2.416482382426655, "grad_norm": 3.479969306349517, "learning_rate": 8.316451962950493e-07, "loss": 1.0613, "step": 81955 }, { "epoch": 2.4166298098186645, "grad_norm": 3.485595368301287, "learning_rate": 8.312412730116398e-07, "loss": 1.0096, "step": 81960 }, { "epoch": 2.416777237210674, "grad_norm": 3.335506934701413, "learning_rate": 8.308374356160397e-07, "loss": 1.0471, "step": 81965 }, { "epoch": 2.416924664602683, "grad_norm": 3.5644913322683713, "learning_rate": 8.304336841201289e-07, "loss": 1.0953, "step": 81970 }, { "epoch": 2.417072091994693, "grad_norm": 3.5027074001957996, "learning_rate": 8.300300185357936e-07, "loss": 1.0833, "step": 81975 }, { "epoch": 2.417219519386702, "grad_norm": 3.2788672634179794, "learning_rate": 8.296264388749065e-07, "loss": 1.0529, "step": 81980 }, { "epoch": 2.4173669467787113, "grad_norm": 3.219283378429575, "learning_rate": 8.292229451493484e-07, "loss": 1.0099, "step": 81985 }, { "epoch": 2.417514374170721, "grad_norm": 3.377634684481497, "learning_rate": 8.288195373709896e-07, "loss": 1.0032, "step": 81990 }, { "epoch": 2.4176618015627303, "grad_norm": 3.667860529819075, "learning_rate": 8.284162155517015e-07, "loss": 1.1026, "step": 81995 }, { "epoch": 2.4178092289547397, "grad_norm": 3.39569720850103, "learning_rate": 8.280129797033521e-07, "loss": 1.0878, "step": 82000 }, { "epoch": 2.4178092289547397, "eval_loss": 1.0809013843536377, "eval_runtime": 4.1872, "eval_samples_per_second": 94.574, "eval_steps_per_second": 3.105, "step": 82000 }, { "epoch": 2.417956656346749, "grad_norm": 3.5104457826953444, "learning_rate": 8.276098298378074e-07, "loss": 1.0118, "step": 82005 }, { "epoch": 2.4181040837387586, "grad_norm": 3.486475503262239, "learning_rate": 8.272067659669306e-07, "loss": 1.0536, "step": 82010 }, { "epoch": 2.418251511130768, "grad_norm": 3.4365370569344527, "learning_rate": 8.268037881025829e-07, "loss": 1.0421, "step": 82015 }, { "epoch": 2.4183989385227775, "grad_norm": 3.4039797395575486, "learning_rate": 8.264008962566201e-07, "loss": 1.0494, "step": 82020 }, { "epoch": 2.418546365914787, "grad_norm": 3.6258267127457655, "learning_rate": 8.25998090440901e-07, "loss": 1.0488, "step": 82025 }, { "epoch": 2.4186937933067965, "grad_norm": 3.444021667935859, "learning_rate": 8.255953706672763e-07, "loss": 1.0346, "step": 82030 }, { "epoch": 2.418841220698806, "grad_norm": 3.373949724240744, "learning_rate": 8.251927369475967e-07, "loss": 1.0552, "step": 82035 }, { "epoch": 2.4189886480908154, "grad_norm": 3.5481282590830503, "learning_rate": 8.247901892937108e-07, "loss": 1.0642, "step": 82040 }, { "epoch": 2.419136075482825, "grad_norm": 3.519896711958141, "learning_rate": 8.243877277174637e-07, "loss": 1.0308, "step": 82045 }, { "epoch": 2.4192835028748343, "grad_norm": 3.3825397127374113, "learning_rate": 8.239853522306997e-07, "loss": 1.0412, "step": 82050 }, { "epoch": 2.4194309302668437, "grad_norm": 3.5304508316274252, "learning_rate": 8.235830628452558e-07, "loss": 1.062, "step": 82055 }, { "epoch": 2.419578357658853, "grad_norm": 3.5400441810687027, "learning_rate": 8.231808595729736e-07, "loss": 1.0397, "step": 82060 }, { "epoch": 2.419725785050862, "grad_norm": 3.3073380705830147, "learning_rate": 8.227787424256859e-07, "loss": 1.039, "step": 82065 }, { "epoch": 2.419873212442872, "grad_norm": 3.453161125475636, "learning_rate": 8.22376711415226e-07, "loss": 1.0321, "step": 82070 }, { "epoch": 2.420020639834881, "grad_norm": 3.4518958015302226, "learning_rate": 8.219747665534247e-07, "loss": 1.06, "step": 82075 }, { "epoch": 2.4201680672268906, "grad_norm": 3.4862912942394795, "learning_rate": 8.215729078521092e-07, "loss": 1.0385, "step": 82080 }, { "epoch": 2.4203154946189, "grad_norm": 3.351929459931964, "learning_rate": 8.211711353231051e-07, "loss": 1.0035, "step": 82085 }, { "epoch": 2.4204629220109095, "grad_norm": 3.4347011253529978, "learning_rate": 8.207694489782354e-07, "loss": 1.0253, "step": 82090 }, { "epoch": 2.420610349402919, "grad_norm": 3.5423588194077573, "learning_rate": 8.203678488293186e-07, "loss": 1.0634, "step": 82095 }, { "epoch": 2.4207577767949284, "grad_norm": 3.6086687425569446, "learning_rate": 8.199663348881726e-07, "loss": 1.0164, "step": 82100 }, { "epoch": 2.420905204186938, "grad_norm": 3.5176732479588773, "learning_rate": 8.195649071666136e-07, "loss": 1.0144, "step": 82105 }, { "epoch": 2.4210526315789473, "grad_norm": 3.490674665320309, "learning_rate": 8.191635656764533e-07, "loss": 1.0158, "step": 82110 }, { "epoch": 2.421200058970957, "grad_norm": 3.444072512324404, "learning_rate": 8.187623104295018e-07, "loss": 1.0424, "step": 82115 }, { "epoch": 2.4213474863629663, "grad_norm": 3.443104440180337, "learning_rate": 8.183611414375659e-07, "loss": 1.0546, "step": 82120 }, { "epoch": 2.4214949137549757, "grad_norm": 3.494249279696892, "learning_rate": 8.179600587124525e-07, "loss": 1.0546, "step": 82125 }, { "epoch": 2.421642341146985, "grad_norm": 3.620830002147392, "learning_rate": 8.175590622659604e-07, "loss": 1.076, "step": 82130 }, { "epoch": 2.4217897685389946, "grad_norm": 3.5882052536798206, "learning_rate": 8.171581521098935e-07, "loss": 1.0674, "step": 82135 }, { "epoch": 2.421937195931004, "grad_norm": 3.420621903931893, "learning_rate": 8.167573282560457e-07, "loss": 1.0082, "step": 82140 }, { "epoch": 2.4220846233230136, "grad_norm": 3.374645865551017, "learning_rate": 8.163565907162126e-07, "loss": 1.0071, "step": 82145 }, { "epoch": 2.422232050715023, "grad_norm": 3.492175786637005, "learning_rate": 8.159559395021874e-07, "loss": 1.0038, "step": 82150 }, { "epoch": 2.4223794781070325, "grad_norm": 3.7139739513416683, "learning_rate": 8.155553746257587e-07, "loss": 1.0847, "step": 82155 }, { "epoch": 2.4225269054990415, "grad_norm": 3.4979480711620137, "learning_rate": 8.151548960987147e-07, "loss": 1.0567, "step": 82160 }, { "epoch": 2.4226743328910514, "grad_norm": 3.404187579297589, "learning_rate": 8.147545039328373e-07, "loss": 1.0511, "step": 82165 }, { "epoch": 2.4228217602830604, "grad_norm": 3.445600113828345, "learning_rate": 8.14354198139912e-07, "loss": 1.0127, "step": 82170 }, { "epoch": 2.42296918767507, "grad_norm": 3.5574172717233212, "learning_rate": 8.139539787317162e-07, "loss": 1.0417, "step": 82175 }, { "epoch": 2.4231166150670793, "grad_norm": 3.380827866213443, "learning_rate": 8.135538457200268e-07, "loss": 1.0131, "step": 82180 }, { "epoch": 2.4232640424590888, "grad_norm": 3.7089527282796686, "learning_rate": 8.131537991166186e-07, "loss": 1.0954, "step": 82185 }, { "epoch": 2.4234114698510982, "grad_norm": 3.6748086667318796, "learning_rate": 8.127538389332634e-07, "loss": 1.0724, "step": 82190 }, { "epoch": 2.4235588972431077, "grad_norm": 3.435905413927928, "learning_rate": 8.123539651817303e-07, "loss": 1.0081, "step": 82195 }, { "epoch": 2.423706324635117, "grad_norm": 3.669421514407055, "learning_rate": 8.119541778737876e-07, "loss": 1.0463, "step": 82200 }, { "epoch": 2.4238537520271266, "grad_norm": 3.527186988925894, "learning_rate": 8.115544770211955e-07, "loss": 1.1035, "step": 82205 }, { "epoch": 2.424001179419136, "grad_norm": 3.5666920077241624, "learning_rate": 8.111548626357207e-07, "loss": 1.0373, "step": 82210 }, { "epoch": 2.4241486068111455, "grad_norm": 3.5011814686915335, "learning_rate": 8.107553347291191e-07, "loss": 1.0574, "step": 82215 }, { "epoch": 2.424296034203155, "grad_norm": 3.5038200406482916, "learning_rate": 8.103558933131476e-07, "loss": 1.0144, "step": 82220 }, { "epoch": 2.4244434615951644, "grad_norm": 3.432278923791677, "learning_rate": 8.099565383995617e-07, "loss": 1.0502, "step": 82225 }, { "epoch": 2.424590888987174, "grad_norm": 3.361002832042888, "learning_rate": 8.095572700001102e-07, "loss": 1.0551, "step": 82230 }, { "epoch": 2.4247383163791834, "grad_norm": 3.6475131326170747, "learning_rate": 8.091580881265454e-07, "loss": 1.0508, "step": 82235 }, { "epoch": 2.424885743771193, "grad_norm": 3.3141292196746712, "learning_rate": 8.087589927906102e-07, "loss": 0.9932, "step": 82240 }, { "epoch": 2.4250331711632023, "grad_norm": 3.3550380420414037, "learning_rate": 8.083599840040517e-07, "loss": 1.0395, "step": 82245 }, { "epoch": 2.4251805985552117, "grad_norm": 3.4761645956586826, "learning_rate": 8.079610617786091e-07, "loss": 1.0303, "step": 82250 }, { "epoch": 2.4253280259472207, "grad_norm": 3.4154868506492004, "learning_rate": 8.075622261260212e-07, "loss": 1.0477, "step": 82255 }, { "epoch": 2.4254754533392306, "grad_norm": 3.289474639125218, "learning_rate": 8.071634770580254e-07, "loss": 1.0051, "step": 82260 }, { "epoch": 2.4256228807312397, "grad_norm": 3.5374478827723195, "learning_rate": 8.067648145863542e-07, "loss": 1.0363, "step": 82265 }, { "epoch": 2.425770308123249, "grad_norm": 3.5440640598332775, "learning_rate": 8.063662387227392e-07, "loss": 1.0117, "step": 82270 }, { "epoch": 2.4259177355152586, "grad_norm": 3.548473761738834, "learning_rate": 8.0596774947891e-07, "loss": 1.0387, "step": 82275 }, { "epoch": 2.426065162907268, "grad_norm": 3.5615614663482233, "learning_rate": 8.055693468665895e-07, "loss": 1.0808, "step": 82280 }, { "epoch": 2.4262125902992775, "grad_norm": 3.5617664515186007, "learning_rate": 8.05171030897505e-07, "loss": 1.0401, "step": 82285 }, { "epoch": 2.426360017691287, "grad_norm": 3.3559605352752606, "learning_rate": 8.047728015833748e-07, "loss": 1.0497, "step": 82290 }, { "epoch": 2.4265074450832964, "grad_norm": 3.6060763764695776, "learning_rate": 8.043746589359177e-07, "loss": 1.0755, "step": 82295 }, { "epoch": 2.426654872475306, "grad_norm": 3.4686902655537413, "learning_rate": 8.039766029668501e-07, "loss": 1.0092, "step": 82300 }, { "epoch": 2.4268022998673153, "grad_norm": 3.5636833025596, "learning_rate": 8.03578633687885e-07, "loss": 1.0478, "step": 82305 }, { "epoch": 2.426949727259325, "grad_norm": 3.5346921021552578, "learning_rate": 8.03180751110734e-07, "loss": 1.0422, "step": 82310 }, { "epoch": 2.4270971546513342, "grad_norm": 3.2604929161168714, "learning_rate": 8.027829552471019e-07, "loss": 1.0313, "step": 82315 }, { "epoch": 2.4272445820433437, "grad_norm": 3.47715449650859, "learning_rate": 8.023852461086993e-07, "loss": 1.0452, "step": 82320 }, { "epoch": 2.427392009435353, "grad_norm": 3.5443208020255357, "learning_rate": 8.019876237072256e-07, "loss": 1.039, "step": 82325 }, { "epoch": 2.4275394368273626, "grad_norm": 3.527364268341522, "learning_rate": 8.015900880543818e-07, "loss": 0.9846, "step": 82330 }, { "epoch": 2.427686864219372, "grad_norm": 3.318908608132563, "learning_rate": 8.01192639161867e-07, "loss": 1.0306, "step": 82335 }, { "epoch": 2.4278342916113815, "grad_norm": 3.5710824567265282, "learning_rate": 8.007952770413758e-07, "loss": 1.0625, "step": 82340 }, { "epoch": 2.427981719003391, "grad_norm": 3.5888071840293656, "learning_rate": 8.003980017046011e-07, "loss": 1.0294, "step": 82345 }, { "epoch": 2.4281291463954005, "grad_norm": 3.6055011023192565, "learning_rate": 8.000008131632344e-07, "loss": 1.0565, "step": 82350 }, { "epoch": 2.42827657378741, "grad_norm": 3.381323309139179, "learning_rate": 7.996037114289608e-07, "loss": 1.0117, "step": 82355 }, { "epoch": 2.428424001179419, "grad_norm": 3.4850874849419013, "learning_rate": 7.992066965134678e-07, "loss": 1.0294, "step": 82360 }, { "epoch": 2.4285714285714284, "grad_norm": 3.43056056162376, "learning_rate": 7.988097684284368e-07, "loss": 1.0201, "step": 82365 }, { "epoch": 2.428718855963438, "grad_norm": 3.56191836892392, "learning_rate": 7.984129271855483e-07, "loss": 1.0811, "step": 82370 }, { "epoch": 2.4288662833554473, "grad_norm": 3.4389731937237484, "learning_rate": 7.980161727964799e-07, "loss": 1.0463, "step": 82375 }, { "epoch": 2.4290137107474568, "grad_norm": 3.3819561689281348, "learning_rate": 7.976195052729063e-07, "loss": 1.0092, "step": 82380 }, { "epoch": 2.429161138139466, "grad_norm": 3.531161540711138, "learning_rate": 7.97222924626501e-07, "loss": 1.0602, "step": 82385 }, { "epoch": 2.4293085655314757, "grad_norm": 3.4331859092951427, "learning_rate": 7.968264308689306e-07, "loss": 1.0419, "step": 82390 }, { "epoch": 2.429455992923485, "grad_norm": 3.3226844788850207, "learning_rate": 7.96430024011867e-07, "loss": 1.0149, "step": 82395 }, { "epoch": 2.4296034203154946, "grad_norm": 3.43866637063219, "learning_rate": 7.960337040669717e-07, "loss": 1.0712, "step": 82400 }, { "epoch": 2.429750847707504, "grad_norm": 3.7472855417133584, "learning_rate": 7.956374710459074e-07, "loss": 1.0788, "step": 82405 }, { "epoch": 2.4298982750995135, "grad_norm": 3.363906649092083, "learning_rate": 7.952413249603345e-07, "loss": 1.0116, "step": 82410 }, { "epoch": 2.430045702491523, "grad_norm": 3.4111199036624797, "learning_rate": 7.94845265821909e-07, "loss": 1.0615, "step": 82415 }, { "epoch": 2.4301931298835324, "grad_norm": 3.708224936152888, "learning_rate": 7.944492936422873e-07, "loss": 1.0952, "step": 82420 }, { "epoch": 2.430340557275542, "grad_norm": 3.506917170950262, "learning_rate": 7.940534084331181e-07, "loss": 1.0282, "step": 82425 }, { "epoch": 2.4304879846675513, "grad_norm": 3.512758656391096, "learning_rate": 7.936576102060547e-07, "loss": 1.0598, "step": 82430 }, { "epoch": 2.430635412059561, "grad_norm": 3.2780772506062483, "learning_rate": 7.932618989727405e-07, "loss": 1.0046, "step": 82435 }, { "epoch": 2.4307828394515703, "grad_norm": 3.5034441011653503, "learning_rate": 7.928662747448218e-07, "loss": 1.0461, "step": 82440 }, { "epoch": 2.4309302668435797, "grad_norm": 3.5555517059112294, "learning_rate": 7.92470737533939e-07, "loss": 1.0602, "step": 82445 }, { "epoch": 2.431077694235589, "grad_norm": 3.6400334161693597, "learning_rate": 7.920752873517323e-07, "loss": 1.0745, "step": 82450 }, { "epoch": 2.431225121627598, "grad_norm": 3.3827895218890958, "learning_rate": 7.916799242098376e-07, "loss": 1.0299, "step": 82455 }, { "epoch": 2.431372549019608, "grad_norm": 3.6248622509255277, "learning_rate": 7.912846481198906e-07, "loss": 1.0859, "step": 82460 }, { "epoch": 2.431519976411617, "grad_norm": 3.580176899675481, "learning_rate": 7.908894590935191e-07, "loss": 1.0454, "step": 82465 }, { "epoch": 2.4316674038036266, "grad_norm": 3.636157629407185, "learning_rate": 7.904943571423559e-07, "loss": 1.0595, "step": 82470 }, { "epoch": 2.431814831195636, "grad_norm": 3.482945184274835, "learning_rate": 7.90099342278025e-07, "loss": 1.0291, "step": 82475 }, { "epoch": 2.4319622585876455, "grad_norm": 3.354119853201055, "learning_rate": 7.89704414512151e-07, "loss": 1.0365, "step": 82480 }, { "epoch": 2.432109685979655, "grad_norm": 3.459630227171038, "learning_rate": 7.893095738563556e-07, "loss": 1.0686, "step": 82485 }, { "epoch": 2.4322571133716644, "grad_norm": 3.476031024399733, "learning_rate": 7.889148203222547e-07, "loss": 1.0126, "step": 82490 }, { "epoch": 2.432404540763674, "grad_norm": 3.2632799340915333, "learning_rate": 7.885201539214686e-07, "loss": 1.0306, "step": 82495 }, { "epoch": 2.4325519681556833, "grad_norm": 3.524015210953621, "learning_rate": 7.881255746656062e-07, "loss": 1.061, "step": 82500 }, { "epoch": 2.4325519681556833, "eval_loss": 1.0806916952133179, "eval_runtime": 4.2549, "eval_samples_per_second": 93.068, "eval_steps_per_second": 3.055, "step": 82500 }, { "epoch": 2.4326993955476928, "grad_norm": 3.483318146103341, "learning_rate": 7.877310825662832e-07, "loss": 1.0531, "step": 82505 }, { "epoch": 2.4328468229397022, "grad_norm": 3.467824574233095, "learning_rate": 7.873366776351047e-07, "loss": 1.0748, "step": 82510 }, { "epoch": 2.4329942503317117, "grad_norm": 3.478804569142394, "learning_rate": 7.869423598836776e-07, "loss": 1.0364, "step": 82515 }, { "epoch": 2.433141677723721, "grad_norm": 3.4888292304256794, "learning_rate": 7.865481293236049e-07, "loss": 1.0785, "step": 82520 }, { "epoch": 2.4332891051157306, "grad_norm": 3.4811794202329107, "learning_rate": 7.861539859664871e-07, "loss": 1.0618, "step": 82525 }, { "epoch": 2.43343653250774, "grad_norm": 3.5881036000333477, "learning_rate": 7.857599298239228e-07, "loss": 1.0335, "step": 82530 }, { "epoch": 2.4335839598997495, "grad_norm": 3.622304437568039, "learning_rate": 7.853659609075086e-07, "loss": 1.0249, "step": 82535 }, { "epoch": 2.433731387291759, "grad_norm": 3.5676116378900984, "learning_rate": 7.849720792288343e-07, "loss": 1.0626, "step": 82540 }, { "epoch": 2.4338788146837684, "grad_norm": 3.33601556316479, "learning_rate": 7.845782847994939e-07, "loss": 1.0541, "step": 82545 }, { "epoch": 2.4340262420757774, "grad_norm": 3.394518867016104, "learning_rate": 7.841845776310726e-07, "loss": 1.0197, "step": 82550 }, { "epoch": 2.4341736694677873, "grad_norm": 3.482518866188129, "learning_rate": 7.837909577351571e-07, "loss": 1.0108, "step": 82555 }, { "epoch": 2.4343210968597964, "grad_norm": 3.3344911286208383, "learning_rate": 7.833974251233294e-07, "loss": 1.0315, "step": 82560 }, { "epoch": 2.434468524251806, "grad_norm": 3.541173852898199, "learning_rate": 7.830039798071701e-07, "loss": 1.0205, "step": 82565 }, { "epoch": 2.4346159516438153, "grad_norm": 3.5299862911776874, "learning_rate": 7.826106217982578e-07, "loss": 1.0322, "step": 82570 }, { "epoch": 2.4347633790358247, "grad_norm": 3.5860753920172725, "learning_rate": 7.822173511081641e-07, "loss": 1.0702, "step": 82575 }, { "epoch": 2.434910806427834, "grad_norm": 3.5595040653159895, "learning_rate": 7.818241677484658e-07, "loss": 1.0505, "step": 82580 }, { "epoch": 2.4350582338198437, "grad_norm": 3.6519022284642912, "learning_rate": 7.814310717307295e-07, "loss": 1.0276, "step": 82585 }, { "epoch": 2.435205661211853, "grad_norm": 3.5248285733410305, "learning_rate": 7.810380630665239e-07, "loss": 1.04, "step": 82590 }, { "epoch": 2.4353530886038626, "grad_norm": 3.6707962688403684, "learning_rate": 7.806451417674134e-07, "loss": 1.0463, "step": 82595 }, { "epoch": 2.435500515995872, "grad_norm": 3.559786731807217, "learning_rate": 7.802523078449604e-07, "loss": 1.0505, "step": 82600 }, { "epoch": 2.4356479433878815, "grad_norm": 3.4388462807704614, "learning_rate": 7.798595613107242e-07, "loss": 1.0324, "step": 82605 }, { "epoch": 2.435795370779891, "grad_norm": 3.3985862453652853, "learning_rate": 7.794669021762628e-07, "loss": 1.0042, "step": 82610 }, { "epoch": 2.4359427981719004, "grad_norm": 3.39164585461171, "learning_rate": 7.790743304531292e-07, "loss": 1.0285, "step": 82615 }, { "epoch": 2.43609022556391, "grad_norm": 3.539234708088716, "learning_rate": 7.786818461528754e-07, "loss": 1.0145, "step": 82620 }, { "epoch": 2.4362376529559193, "grad_norm": 3.4743669375926802, "learning_rate": 7.782894492870517e-07, "loss": 1.0525, "step": 82625 }, { "epoch": 2.4363850803479288, "grad_norm": 3.4624813131090577, "learning_rate": 7.778971398672039e-07, "loss": 1.0814, "step": 82630 }, { "epoch": 2.4365325077399382, "grad_norm": 3.4797436492309495, "learning_rate": 7.775049179048764e-07, "loss": 1.0449, "step": 82635 }, { "epoch": 2.4366799351319477, "grad_norm": 3.562360682452563, "learning_rate": 7.771127834116114e-07, "loss": 1.0474, "step": 82640 }, { "epoch": 2.4368273625239567, "grad_norm": 3.3802472277585265, "learning_rate": 7.767207363989478e-07, "loss": 1.0661, "step": 82645 }, { "epoch": 2.4369747899159666, "grad_norm": 3.462435732176961, "learning_rate": 7.763287768784204e-07, "loss": 1.08, "step": 82650 }, { "epoch": 2.4371222173079756, "grad_norm": 3.437628194069259, "learning_rate": 7.75936904861566e-07, "loss": 1.0573, "step": 82655 }, { "epoch": 2.437269644699985, "grad_norm": 3.4375905057169076, "learning_rate": 7.755451203599129e-07, "loss": 1.0574, "step": 82660 }, { "epoch": 2.4374170720919945, "grad_norm": 3.6713229426604173, "learning_rate": 7.751534233849914e-07, "loss": 1.0569, "step": 82665 }, { "epoch": 2.437564499484004, "grad_norm": 3.5004724347136267, "learning_rate": 7.747618139483271e-07, "loss": 1.0261, "step": 82670 }, { "epoch": 2.4377119268760135, "grad_norm": 3.508190454476717, "learning_rate": 7.74370292061444e-07, "loss": 1.0977, "step": 82675 }, { "epoch": 2.437859354268023, "grad_norm": 3.547746625798048, "learning_rate": 7.739788577358641e-07, "loss": 1.0248, "step": 82680 }, { "epoch": 2.4380067816600324, "grad_norm": 3.597375003286409, "learning_rate": 7.735875109831022e-07, "loss": 1.027, "step": 82685 }, { "epoch": 2.438154209052042, "grad_norm": 3.670668528032529, "learning_rate": 7.731962518146786e-07, "loss": 1.0454, "step": 82690 }, { "epoch": 2.4383016364440513, "grad_norm": 3.515979262804736, "learning_rate": 7.728050802421034e-07, "loss": 1.0563, "step": 82695 }, { "epoch": 2.4384490638360607, "grad_norm": 3.6580805705561974, "learning_rate": 7.724139962768884e-07, "loss": 1.0406, "step": 82700 }, { "epoch": 2.43859649122807, "grad_norm": 3.440644352601745, "learning_rate": 7.720229999305417e-07, "loss": 1.0185, "step": 82705 }, { "epoch": 2.4387439186200797, "grad_norm": 3.406940308375541, "learning_rate": 7.716320912145685e-07, "loss": 1.0543, "step": 82710 }, { "epoch": 2.438891346012089, "grad_norm": 3.6058147263910607, "learning_rate": 7.712412701404723e-07, "loss": 1.0553, "step": 82715 }, { "epoch": 2.4390387734040986, "grad_norm": 3.3169623623291167, "learning_rate": 7.708505367197539e-07, "loss": 1.0021, "step": 82720 }, { "epoch": 2.439186200796108, "grad_norm": 3.6716915812914275, "learning_rate": 7.704598909639081e-07, "loss": 1.0596, "step": 82725 }, { "epoch": 2.4393336281881175, "grad_norm": 3.5271111544004263, "learning_rate": 7.700693328844346e-07, "loss": 1.0684, "step": 82730 }, { "epoch": 2.439481055580127, "grad_norm": 3.6102607159272444, "learning_rate": 7.696788624928224e-07, "loss": 1.0505, "step": 82735 }, { "epoch": 2.4396284829721364, "grad_norm": 3.4395329373617383, "learning_rate": 7.692884798005628e-07, "loss": 1.0341, "step": 82740 }, { "epoch": 2.439775910364146, "grad_norm": 3.4369664594528313, "learning_rate": 7.688981848191449e-07, "loss": 1.0352, "step": 82745 }, { "epoch": 2.439923337756155, "grad_norm": 3.464029600281127, "learning_rate": 7.685079775600496e-07, "loss": 1.0443, "step": 82750 }, { "epoch": 2.4400707651481643, "grad_norm": 3.5157864902925304, "learning_rate": 7.681178580347633e-07, "loss": 1.0527, "step": 82755 }, { "epoch": 2.440218192540174, "grad_norm": 3.370964254925802, "learning_rate": 7.677278262547621e-07, "loss": 1.0822, "step": 82760 }, { "epoch": 2.4403656199321833, "grad_norm": 3.523609209622491, "learning_rate": 7.673378822315271e-07, "loss": 1.0184, "step": 82765 }, { "epoch": 2.4405130473241927, "grad_norm": 3.565559603349046, "learning_rate": 7.6694802597653e-07, "loss": 1.071, "step": 82770 }, { "epoch": 2.440660474716202, "grad_norm": 3.4151836770386614, "learning_rate": 7.665582575012433e-07, "loss": 1.0333, "step": 82775 }, { "epoch": 2.4408079021082116, "grad_norm": 3.523990783283991, "learning_rate": 7.661685768171366e-07, "loss": 1.0482, "step": 82780 }, { "epoch": 2.440955329500221, "grad_norm": 3.3130440061988375, "learning_rate": 7.657789839356767e-07, "loss": 1.0478, "step": 82785 }, { "epoch": 2.4411027568922306, "grad_norm": 3.403170435470196, "learning_rate": 7.653894788683278e-07, "loss": 1.0466, "step": 82790 }, { "epoch": 2.44125018428424, "grad_norm": 3.5185469760986594, "learning_rate": 7.650000616265525e-07, "loss": 1.0279, "step": 82795 }, { "epoch": 2.4413976116762495, "grad_norm": 3.5710578599638136, "learning_rate": 7.646107322218068e-07, "loss": 1.025, "step": 82800 }, { "epoch": 2.441545039068259, "grad_norm": 3.453329743137671, "learning_rate": 7.642214906655517e-07, "loss": 1.0561, "step": 82805 }, { "epoch": 2.4416924664602684, "grad_norm": 3.4820486062286515, "learning_rate": 7.638323369692372e-07, "loss": 1.0427, "step": 82810 }, { "epoch": 2.441839893852278, "grad_norm": 3.570672278165887, "learning_rate": 7.634432711443162e-07, "loss": 1.0549, "step": 82815 }, { "epoch": 2.4419873212442873, "grad_norm": 3.476533425770265, "learning_rate": 7.630542932022374e-07, "loss": 1.0445, "step": 82820 }, { "epoch": 2.4421347486362968, "grad_norm": 3.4934956519098663, "learning_rate": 7.626654031544464e-07, "loss": 1.0383, "step": 82825 }, { "epoch": 2.442282176028306, "grad_norm": 3.4517793384288535, "learning_rate": 7.622766010123884e-07, "loss": 1.0402, "step": 82830 }, { "epoch": 2.4424296034203157, "grad_norm": 3.5563283925907108, "learning_rate": 7.618878867875011e-07, "loss": 1.0163, "step": 82835 }, { "epoch": 2.442577030812325, "grad_norm": 3.476755178241961, "learning_rate": 7.614992604912265e-07, "loss": 1.0547, "step": 82840 }, { "epoch": 2.442724458204334, "grad_norm": 3.516333632195224, "learning_rate": 7.611107221349979e-07, "loss": 1.0703, "step": 82845 }, { "epoch": 2.442871885596344, "grad_norm": 3.5608322816018707, "learning_rate": 7.607222717302492e-07, "loss": 1.0583, "step": 82850 }, { "epoch": 2.443019312988353, "grad_norm": 3.3817934371284832, "learning_rate": 7.603339092884108e-07, "loss": 1.0638, "step": 82855 }, { "epoch": 2.4431667403803625, "grad_norm": 3.5043633313105373, "learning_rate": 7.59945634820911e-07, "loss": 1.0461, "step": 82860 }, { "epoch": 2.443314167772372, "grad_norm": 3.20527428334297, "learning_rate": 7.595574483391754e-07, "loss": 1.0306, "step": 82865 }, { "epoch": 2.4434615951643814, "grad_norm": 3.493612786233541, "learning_rate": 7.591693498546276e-07, "loss": 1.0291, "step": 82870 }, { "epoch": 2.443609022556391, "grad_norm": 3.431099139619264, "learning_rate": 7.58781339378686e-07, "loss": 1.0339, "step": 82875 }, { "epoch": 2.4437564499484004, "grad_norm": 3.5258882793173165, "learning_rate": 7.583934169227687e-07, "loss": 1.0557, "step": 82880 }, { "epoch": 2.44390387734041, "grad_norm": 3.520757501563877, "learning_rate": 7.580055824982914e-07, "loss": 1.0913, "step": 82885 }, { "epoch": 2.4440513047324193, "grad_norm": 3.4668155113971317, "learning_rate": 7.576178361166667e-07, "loss": 1.0906, "step": 82890 }, { "epoch": 2.4441987321244287, "grad_norm": 3.411545838123237, "learning_rate": 7.572301777893033e-07, "loss": 1.1277, "step": 82895 }, { "epoch": 2.444346159516438, "grad_norm": 3.4846906753547606, "learning_rate": 7.568426075276097e-07, "loss": 1.0298, "step": 82900 }, { "epoch": 2.4444935869084476, "grad_norm": 3.4802860549514323, "learning_rate": 7.564551253429913e-07, "loss": 1.0411, "step": 82905 }, { "epoch": 2.444641014300457, "grad_norm": 3.4468132374578078, "learning_rate": 7.56067731246847e-07, "loss": 1.0378, "step": 82910 }, { "epoch": 2.4447884416924666, "grad_norm": 3.5337653615564966, "learning_rate": 7.556804252505805e-07, "loss": 1.0896, "step": 82915 }, { "epoch": 2.444935869084476, "grad_norm": 3.508610116571799, "learning_rate": 7.552932073655855e-07, "loss": 1.0051, "step": 82920 }, { "epoch": 2.4450832964764855, "grad_norm": 3.505961825371597, "learning_rate": 7.549060776032576e-07, "loss": 1.0375, "step": 82925 }, { "epoch": 2.445230723868495, "grad_norm": 3.592800267369374, "learning_rate": 7.545190359749886e-07, "loss": 1.0836, "step": 82930 }, { "epoch": 2.4453781512605044, "grad_norm": 3.597544581679713, "learning_rate": 7.541320824921671e-07, "loss": 1.0638, "step": 82935 }, { "epoch": 2.4455255786525134, "grad_norm": 3.5057633549066627, "learning_rate": 7.537452171661813e-07, "loss": 1.0137, "step": 82940 }, { "epoch": 2.4456730060445233, "grad_norm": 3.455326542135859, "learning_rate": 7.533584400084114e-07, "loss": 1.0319, "step": 82945 }, { "epoch": 2.4458204334365323, "grad_norm": 3.659329574366336, "learning_rate": 7.529717510302437e-07, "loss": 1.0557, "step": 82950 }, { "epoch": 2.445967860828542, "grad_norm": 3.535412937664108, "learning_rate": 7.525851502430532e-07, "loss": 1.0026, "step": 82955 }, { "epoch": 2.4461152882205512, "grad_norm": 3.623763320550901, "learning_rate": 7.521986376582173e-07, "loss": 1.0716, "step": 82960 }, { "epoch": 2.4462627156125607, "grad_norm": 3.5743788784300645, "learning_rate": 7.518122132871095e-07, "loss": 1.0356, "step": 82965 }, { "epoch": 2.44641014300457, "grad_norm": 3.484201489547588, "learning_rate": 7.514258771411015e-07, "loss": 1.0634, "step": 82970 }, { "epoch": 2.4465575703965796, "grad_norm": 3.4995446960993988, "learning_rate": 7.510396292315602e-07, "loss": 1.0296, "step": 82975 }, { "epoch": 2.446704997788589, "grad_norm": 3.452350884788309, "learning_rate": 7.506534695698539e-07, "loss": 1.0952, "step": 82980 }, { "epoch": 2.4468524251805985, "grad_norm": 3.5146340750973346, "learning_rate": 7.502673981673421e-07, "loss": 1.0445, "step": 82985 }, { "epoch": 2.446999852572608, "grad_norm": 3.635997345109795, "learning_rate": 7.498814150353894e-07, "loss": 1.035, "step": 82990 }, { "epoch": 2.4471472799646175, "grad_norm": 3.454279716874306, "learning_rate": 7.494955201853506e-07, "loss": 1.0463, "step": 82995 }, { "epoch": 2.447294707356627, "grad_norm": 3.486765036454291, "learning_rate": 7.491097136285825e-07, "loss": 1.004, "step": 83000 }, { "epoch": 2.447294707356627, "eval_loss": 1.0811467170715332, "eval_runtime": 4.2165, "eval_samples_per_second": 93.916, "eval_steps_per_second": 3.083, "step": 83000 }, { "epoch": 2.4474421347486364, "grad_norm": 3.4455850337648832, "learning_rate": 7.487239953764388e-07, "loss": 1.0483, "step": 83005 }, { "epoch": 2.447589562140646, "grad_norm": 3.3372221627355265, "learning_rate": 7.483383654402667e-07, "loss": 1.0406, "step": 83010 }, { "epoch": 2.4477369895326553, "grad_norm": 3.483245873921391, "learning_rate": 7.479528238314176e-07, "loss": 1.0178, "step": 83015 }, { "epoch": 2.4478844169246647, "grad_norm": 3.5037965010701657, "learning_rate": 7.475673705612323e-07, "loss": 0.9999, "step": 83020 }, { "epoch": 2.448031844316674, "grad_norm": 3.50869744214062, "learning_rate": 7.471820056410582e-07, "loss": 1.0851, "step": 83025 }, { "epoch": 2.4481792717086837, "grad_norm": 3.554595767898958, "learning_rate": 7.46796729082231e-07, "loss": 1.0551, "step": 83030 }, { "epoch": 2.4483266991006927, "grad_norm": 3.5892259894553753, "learning_rate": 7.464115408960896e-07, "loss": 1.0379, "step": 83035 }, { "epoch": 2.4484741264927026, "grad_norm": 3.590079429506015, "learning_rate": 7.46026441093968e-07, "loss": 1.0379, "step": 83040 }, { "epoch": 2.4486215538847116, "grad_norm": 3.3418447170232413, "learning_rate": 7.456414296871989e-07, "loss": 1.007, "step": 83045 }, { "epoch": 2.448768981276721, "grad_norm": 3.534886655382826, "learning_rate": 7.452565066871112e-07, "loss": 1.0714, "step": 83050 }, { "epoch": 2.4489164086687305, "grad_norm": 3.426917679068714, "learning_rate": 7.448716721050329e-07, "loss": 1.0593, "step": 83055 }, { "epoch": 2.44906383606074, "grad_norm": 3.3766946711388246, "learning_rate": 7.444869259522852e-07, "loss": 0.9889, "step": 83060 }, { "epoch": 2.4492112634527494, "grad_norm": 3.419191047265379, "learning_rate": 7.441022682401935e-07, "loss": 1.068, "step": 83065 }, { "epoch": 2.449358690844759, "grad_norm": 3.410548438205143, "learning_rate": 7.437176989800739e-07, "loss": 1.069, "step": 83070 }, { "epoch": 2.4495061182367683, "grad_norm": 3.570490427487181, "learning_rate": 7.433332181832438e-07, "loss": 1.0612, "step": 83075 }, { "epoch": 2.449653545628778, "grad_norm": 3.5038043224485946, "learning_rate": 7.429488258610171e-07, "loss": 1.0605, "step": 83080 }, { "epoch": 2.4498009730207873, "grad_norm": 3.508119167995741, "learning_rate": 7.42564522024705e-07, "loss": 1.0508, "step": 83085 }, { "epoch": 2.4499484004127967, "grad_norm": 3.3493992407109796, "learning_rate": 7.421803066856169e-07, "loss": 1.0203, "step": 83090 }, { "epoch": 2.450095827804806, "grad_norm": 3.4829263733953537, "learning_rate": 7.417961798550557e-07, "loss": 1.0678, "step": 83095 }, { "epoch": 2.4502432551968156, "grad_norm": 3.528520287184841, "learning_rate": 7.414121415443292e-07, "loss": 1.0779, "step": 83100 }, { "epoch": 2.450390682588825, "grad_norm": 3.5466051187082877, "learning_rate": 7.410281917647348e-07, "loss": 1.0269, "step": 83105 }, { "epoch": 2.4505381099808345, "grad_norm": 3.5285915115663427, "learning_rate": 7.406443305275716e-07, "loss": 1.0602, "step": 83110 }, { "epoch": 2.450685537372844, "grad_norm": 3.5099900317763812, "learning_rate": 7.402605578441352e-07, "loss": 1.0498, "step": 83115 }, { "epoch": 2.4508329647648535, "grad_norm": 3.4380159402763524, "learning_rate": 7.398768737257188e-07, "loss": 1.0382, "step": 83120 }, { "epoch": 2.450980392156863, "grad_norm": 3.4906140980698037, "learning_rate": 7.394932781836125e-07, "loss": 1.0465, "step": 83125 }, { "epoch": 2.451127819548872, "grad_norm": 3.5408542593183436, "learning_rate": 7.391097712291055e-07, "loss": 1.0433, "step": 83130 }, { "epoch": 2.451275246940882, "grad_norm": 3.3888460031212237, "learning_rate": 7.3872635287348e-07, "loss": 1.0209, "step": 83135 }, { "epoch": 2.451422674332891, "grad_norm": 3.5813667530004403, "learning_rate": 7.383430231280204e-07, "loss": 1.0436, "step": 83140 }, { "epoch": 2.4515701017249003, "grad_norm": 3.4657884792118043, "learning_rate": 7.379597820040064e-07, "loss": 1.0555, "step": 83145 }, { "epoch": 2.4517175291169098, "grad_norm": 3.3655429410238735, "learning_rate": 7.375766295127153e-07, "loss": 1.0087, "step": 83150 }, { "epoch": 2.4518649565089192, "grad_norm": 3.5372209819604743, "learning_rate": 7.371935656654212e-07, "loss": 1.0383, "step": 83155 }, { "epoch": 2.4520123839009287, "grad_norm": 3.5334828156335707, "learning_rate": 7.368105904733969e-07, "loss": 1.0118, "step": 83160 }, { "epoch": 2.452159811292938, "grad_norm": 3.4629175126139327, "learning_rate": 7.364277039479126e-07, "loss": 1.0421, "step": 83165 }, { "epoch": 2.4523072386849476, "grad_norm": 3.330862251425894, "learning_rate": 7.360449061002327e-07, "loss": 1.0515, "step": 83170 }, { "epoch": 2.452454666076957, "grad_norm": 3.573857575802608, "learning_rate": 7.356621969416245e-07, "loss": 1.0285, "step": 83175 }, { "epoch": 2.4526020934689665, "grad_norm": 3.452429541316962, "learning_rate": 7.352795764833473e-07, "loss": 1.0343, "step": 83180 }, { "epoch": 2.452749520860976, "grad_norm": 3.59349826029597, "learning_rate": 7.348970447366614e-07, "loss": 1.0965, "step": 83185 }, { "epoch": 2.4528969482529854, "grad_norm": 3.32040168838048, "learning_rate": 7.345146017128225e-07, "loss": 1.0098, "step": 83190 }, { "epoch": 2.453044375644995, "grad_norm": 3.4984710888393287, "learning_rate": 7.341322474230846e-07, "loss": 1.02, "step": 83195 }, { "epoch": 2.4531918030370043, "grad_norm": 3.451383392621863, "learning_rate": 7.337499818787005e-07, "loss": 1.0789, "step": 83200 }, { "epoch": 2.453339230429014, "grad_norm": 3.4511230709459464, "learning_rate": 7.333678050909148e-07, "loss": 1.0323, "step": 83205 }, { "epoch": 2.4534866578210233, "grad_norm": 3.563069709725009, "learning_rate": 7.329857170709785e-07, "loss": 1.02, "step": 83210 }, { "epoch": 2.4536340852130327, "grad_norm": 3.6041467561306475, "learning_rate": 7.326037178301316e-07, "loss": 1.0214, "step": 83215 }, { "epoch": 2.453781512605042, "grad_norm": 3.3922356424289055, "learning_rate": 7.322218073796157e-07, "loss": 1.0256, "step": 83220 }, { "epoch": 2.4539289399970516, "grad_norm": 3.4858179852490094, "learning_rate": 7.318399857306692e-07, "loss": 1.0703, "step": 83225 }, { "epoch": 2.454076367389061, "grad_norm": 3.412416116018082, "learning_rate": 7.314582528945272e-07, "loss": 1.015, "step": 83230 }, { "epoch": 2.45422379478107, "grad_norm": 3.595930450522313, "learning_rate": 7.310766088824231e-07, "loss": 1.0396, "step": 83235 }, { "epoch": 2.4543712221730796, "grad_norm": 3.5933351861603655, "learning_rate": 7.30695053705588e-07, "loss": 1.0457, "step": 83240 }, { "epoch": 2.454518649565089, "grad_norm": 3.4537190298448124, "learning_rate": 7.303135873752465e-07, "loss": 1.0208, "step": 83245 }, { "epoch": 2.4546660769570985, "grad_norm": 3.5488660059167954, "learning_rate": 7.299322099026278e-07, "loss": 1.0978, "step": 83250 }, { "epoch": 2.454813504349108, "grad_norm": 3.6367435286085583, "learning_rate": 7.295509212989516e-07, "loss": 1.1025, "step": 83255 }, { "epoch": 2.4549609317411174, "grad_norm": 3.5525035623679844, "learning_rate": 7.291697215754384e-07, "loss": 1.093, "step": 83260 }, { "epoch": 2.455108359133127, "grad_norm": 3.573562987754777, "learning_rate": 7.287886107433064e-07, "loss": 1.0659, "step": 83265 }, { "epoch": 2.4552557865251363, "grad_norm": 3.581851541193834, "learning_rate": 7.284075888137677e-07, "loss": 1.0577, "step": 83270 }, { "epoch": 2.4554032139171458, "grad_norm": 3.4449061254324946, "learning_rate": 7.280266557980378e-07, "loss": 1.0404, "step": 83275 }, { "epoch": 2.4555506413091552, "grad_norm": 3.4808663883009863, "learning_rate": 7.276458117073227e-07, "loss": 1.0183, "step": 83280 }, { "epoch": 2.4556980687011647, "grad_norm": 3.4485975832670808, "learning_rate": 7.272650565528323e-07, "loss": 1.0586, "step": 83285 }, { "epoch": 2.455845496093174, "grad_norm": 3.518302592592507, "learning_rate": 7.268843903457684e-07, "loss": 1.0309, "step": 83290 }, { "epoch": 2.4559929234851836, "grad_norm": 3.355261334690461, "learning_rate": 7.265038130973332e-07, "loss": 1.0129, "step": 83295 }, { "epoch": 2.456140350877193, "grad_norm": 3.5841973485783574, "learning_rate": 7.261233248187259e-07, "loss": 1.0587, "step": 83300 }, { "epoch": 2.4562877782692025, "grad_norm": 3.3315111764000185, "learning_rate": 7.257429255211427e-07, "loss": 1.0523, "step": 83305 }, { "epoch": 2.456435205661212, "grad_norm": 3.679362197376843, "learning_rate": 7.253626152157769e-07, "loss": 1.063, "step": 83310 }, { "epoch": 2.4565826330532214, "grad_norm": 3.622209284354936, "learning_rate": 7.249823939138215e-07, "loss": 1.027, "step": 83315 }, { "epoch": 2.456730060445231, "grad_norm": 3.5163873536820245, "learning_rate": 7.246022616264611e-07, "loss": 1.068, "step": 83320 }, { "epoch": 2.4568774878372404, "grad_norm": 3.504170813558968, "learning_rate": 7.242222183648857e-07, "loss": 1.0611, "step": 83325 }, { "epoch": 2.4570249152292494, "grad_norm": 3.548639985881991, "learning_rate": 7.238422641402756e-07, "loss": 1.021, "step": 83330 }, { "epoch": 2.4571723426212593, "grad_norm": 3.361898296233721, "learning_rate": 7.234623989638122e-07, "loss": 0.9724, "step": 83335 }, { "epoch": 2.4573197700132683, "grad_norm": 3.279566604109544, "learning_rate": 7.230826228466743e-07, "loss": 1.0067, "step": 83340 }, { "epoch": 2.4574671974052777, "grad_norm": 3.456544799081551, "learning_rate": 7.22702935800036e-07, "loss": 1.0528, "step": 83345 }, { "epoch": 2.457614624797287, "grad_norm": 3.5702716844170945, "learning_rate": 7.223233378350719e-07, "loss": 1.062, "step": 83350 }, { "epoch": 2.4577620521892967, "grad_norm": 3.529908747312505, "learning_rate": 7.219438289629488e-07, "loss": 1.0348, "step": 83355 }, { "epoch": 2.457909479581306, "grad_norm": 3.485330616711759, "learning_rate": 7.215644091948381e-07, "loss": 1.111, "step": 83360 }, { "epoch": 2.4580569069733156, "grad_norm": 3.672344768948906, "learning_rate": 7.211850785419019e-07, "loss": 1.0554, "step": 83365 }, { "epoch": 2.458204334365325, "grad_norm": 3.5776519570256458, "learning_rate": 7.208058370153033e-07, "loss": 1.0559, "step": 83370 }, { "epoch": 2.4583517617573345, "grad_norm": 3.473357748640696, "learning_rate": 7.20426684626202e-07, "loss": 1.0059, "step": 83375 }, { "epoch": 2.458499189149344, "grad_norm": 3.4902512837169577, "learning_rate": 7.200476213857546e-07, "loss": 1.043, "step": 83380 }, { "epoch": 2.4586466165413534, "grad_norm": 3.4632340116798925, "learning_rate": 7.196686473051159e-07, "loss": 1.0241, "step": 83385 }, { "epoch": 2.458794043933363, "grad_norm": 3.4670563418786546, "learning_rate": 7.192897623954387e-07, "loss": 1.0473, "step": 83390 }, { "epoch": 2.4589414713253723, "grad_norm": 3.5672538686301367, "learning_rate": 7.189109666678698e-07, "loss": 1.0221, "step": 83395 }, { "epoch": 2.459088898717382, "grad_norm": 3.616115797773598, "learning_rate": 7.18532260133557e-07, "loss": 1.0318, "step": 83400 }, { "epoch": 2.4592363261093912, "grad_norm": 3.669709583244272, "learning_rate": 7.181536428036439e-07, "loss": 1.016, "step": 83405 }, { "epoch": 2.4593837535014007, "grad_norm": 3.510606730585918, "learning_rate": 7.177751146892717e-07, "loss": 1.0504, "step": 83410 }, { "epoch": 2.45953118089341, "grad_norm": 3.592460902271075, "learning_rate": 7.173966758015796e-07, "loss": 1.0624, "step": 83415 }, { "epoch": 2.4596786082854196, "grad_norm": 3.355993447874716, "learning_rate": 7.17018326151703e-07, "loss": 1.0348, "step": 83420 }, { "epoch": 2.4598260356774286, "grad_norm": 3.675329635077841, "learning_rate": 7.166400657507762e-07, "loss": 1.049, "step": 83425 }, { "epoch": 2.4599734630694385, "grad_norm": 3.56996876727774, "learning_rate": 7.162618946099276e-07, "loss": 1.0426, "step": 83430 }, { "epoch": 2.4601208904614476, "grad_norm": 3.396370367581345, "learning_rate": 7.158838127402886e-07, "loss": 1.0611, "step": 83435 }, { "epoch": 2.460268317853457, "grad_norm": 3.488034919986915, "learning_rate": 7.15505820152982e-07, "loss": 1.0489, "step": 83440 }, { "epoch": 2.4604157452454665, "grad_norm": 3.5872268778098815, "learning_rate": 7.151279168591321e-07, "loss": 1.0311, "step": 83445 }, { "epoch": 2.460563172637476, "grad_norm": 3.5111108504518223, "learning_rate": 7.14750102869858e-07, "loss": 1.0575, "step": 83450 }, { "epoch": 2.4607106000294854, "grad_norm": 3.4528507957077923, "learning_rate": 7.143723781962788e-07, "loss": 1.0713, "step": 83455 }, { "epoch": 2.460858027421495, "grad_norm": 3.629503091945179, "learning_rate": 7.139947428495093e-07, "loss": 1.0755, "step": 83460 }, { "epoch": 2.4610054548135043, "grad_norm": 3.523477928505496, "learning_rate": 7.136171968406594e-07, "loss": 1.0696, "step": 83465 }, { "epoch": 2.4611528822055138, "grad_norm": 3.484263528200034, "learning_rate": 7.132397401808421e-07, "loss": 1.0528, "step": 83470 }, { "epoch": 2.461300309597523, "grad_norm": 3.51794643024008, "learning_rate": 7.128623728811625e-07, "loss": 1.0627, "step": 83475 }, { "epoch": 2.4614477369895327, "grad_norm": 3.6063270209492297, "learning_rate": 7.124850949527259e-07, "loss": 1.0555, "step": 83480 }, { "epoch": 2.461595164381542, "grad_norm": 3.60977561669651, "learning_rate": 7.121079064066332e-07, "loss": 1.0742, "step": 83485 }, { "epoch": 2.4617425917735516, "grad_norm": 3.5450704264743447, "learning_rate": 7.117308072539849e-07, "loss": 1.0582, "step": 83490 }, { "epoch": 2.461890019165561, "grad_norm": 3.699218426553989, "learning_rate": 7.113537975058766e-07, "loss": 1.033, "step": 83495 }, { "epoch": 2.4620374465575705, "grad_norm": 3.5415399650701582, "learning_rate": 7.109768771734037e-07, "loss": 1.0488, "step": 83500 }, { "epoch": 2.4620374465575705, "eval_loss": 1.080314040184021, "eval_runtime": 4.2179, "eval_samples_per_second": 93.886, "eval_steps_per_second": 3.082, "step": 83500 }, { "epoch": 2.46218487394958, "grad_norm": 3.4393340482578196, "learning_rate": 7.106000462676546e-07, "loss": 1.0384, "step": 83505 }, { "epoch": 2.4623323013415894, "grad_norm": 3.497542823934257, "learning_rate": 7.102233047997214e-07, "loss": 1.0409, "step": 83510 }, { "epoch": 2.462479728733599, "grad_norm": 3.424008033702586, "learning_rate": 7.098466527806874e-07, "loss": 1.0836, "step": 83515 }, { "epoch": 2.462627156125608, "grad_norm": 3.550144689731585, "learning_rate": 7.094700902216374e-07, "loss": 1.0696, "step": 83520 }, { "epoch": 2.462774583517618, "grad_norm": 3.6966495306146108, "learning_rate": 7.090936171336528e-07, "loss": 1.0367, "step": 83525 }, { "epoch": 2.462922010909627, "grad_norm": 3.3955795046373227, "learning_rate": 7.087172335278092e-07, "loss": 1.0166, "step": 83530 }, { "epoch": 2.4630694383016363, "grad_norm": 3.610704529048582, "learning_rate": 7.083409394151856e-07, "loss": 1.0579, "step": 83535 }, { "epoch": 2.4632168656936457, "grad_norm": 3.752425860773727, "learning_rate": 7.079647348068509e-07, "loss": 1.05, "step": 83540 }, { "epoch": 2.463364293085655, "grad_norm": 3.6167285732323307, "learning_rate": 7.075886197138796e-07, "loss": 0.9979, "step": 83545 }, { "epoch": 2.4635117204776646, "grad_norm": 3.4938003427360194, "learning_rate": 7.072125941473363e-07, "loss": 1.0314, "step": 83550 }, { "epoch": 2.463659147869674, "grad_norm": 3.567159197799564, "learning_rate": 7.068366581182867e-07, "loss": 1.028, "step": 83555 }, { "epoch": 2.4638065752616836, "grad_norm": 3.395362504592857, "learning_rate": 7.064608116377935e-07, "loss": 1.0224, "step": 83560 }, { "epoch": 2.463954002653693, "grad_norm": 3.658477605526873, "learning_rate": 7.060850547169163e-07, "loss": 1.0474, "step": 83565 }, { "epoch": 2.4641014300457025, "grad_norm": 3.325945469427592, "learning_rate": 7.057093873667122e-07, "loss": 1.0158, "step": 83570 }, { "epoch": 2.464248857437712, "grad_norm": 3.585292102370749, "learning_rate": 7.053338095982366e-07, "loss": 1.0743, "step": 83575 }, { "epoch": 2.4643962848297214, "grad_norm": 3.474815602157061, "learning_rate": 7.049583214225384e-07, "loss": 1.0556, "step": 83580 }, { "epoch": 2.464543712221731, "grad_norm": 3.5141726710291357, "learning_rate": 7.045829228506705e-07, "loss": 1.0548, "step": 83585 }, { "epoch": 2.4646911396137403, "grad_norm": 3.3763304391462636, "learning_rate": 7.042076138936764e-07, "loss": 1.0524, "step": 83590 }, { "epoch": 2.4648385670057498, "grad_norm": 3.4858041433566305, "learning_rate": 7.038323945626014e-07, "loss": 1.0464, "step": 83595 }, { "epoch": 2.4649859943977592, "grad_norm": 3.6022571657933637, "learning_rate": 7.034572648684868e-07, "loss": 1.0456, "step": 83600 }, { "epoch": 2.4651334217897687, "grad_norm": 3.3271076388710155, "learning_rate": 7.030822248223705e-07, "loss": 1.0585, "step": 83605 }, { "epoch": 2.465280849181778, "grad_norm": 3.431185424801966, "learning_rate": 7.027072744352901e-07, "loss": 1.0434, "step": 83610 }, { "epoch": 2.4654282765737876, "grad_norm": 3.46476480489174, "learning_rate": 7.023324137182762e-07, "loss": 1.0025, "step": 83615 }, { "epoch": 2.465575703965797, "grad_norm": 3.3467204232802192, "learning_rate": 7.019576426823626e-07, "loss": 1.0783, "step": 83620 }, { "epoch": 2.465723131357806, "grad_norm": 3.4261866713839306, "learning_rate": 7.01582961338575e-07, "loss": 1.0436, "step": 83625 }, { "epoch": 2.4658705587498155, "grad_norm": 3.3555080000366915, "learning_rate": 7.012083696979396e-07, "loss": 1.0484, "step": 83630 }, { "epoch": 2.466017986141825, "grad_norm": 3.4771273547502917, "learning_rate": 7.008338677714796e-07, "loss": 1.0777, "step": 83635 }, { "epoch": 2.4661654135338344, "grad_norm": 3.480697990346052, "learning_rate": 7.004594555702143e-07, "loss": 1.0453, "step": 83640 }, { "epoch": 2.466312840925844, "grad_norm": 3.550874870837936, "learning_rate": 7.00085133105162e-07, "loss": 1.009, "step": 83645 }, { "epoch": 2.4664602683178534, "grad_norm": 3.6890164104089957, "learning_rate": 6.997109003873386e-07, "loss": 1.0961, "step": 83650 }, { "epoch": 2.466607695709863, "grad_norm": 3.4989525692019834, "learning_rate": 6.993367574277536e-07, "loss": 1.0261, "step": 83655 }, { "epoch": 2.4667551231018723, "grad_norm": 3.2614772050641494, "learning_rate": 6.989627042374181e-07, "loss": 1.0326, "step": 83660 }, { "epoch": 2.4669025504938817, "grad_norm": 3.534924433575173, "learning_rate": 6.985887408273391e-07, "loss": 1.0542, "step": 83665 }, { "epoch": 2.467049977885891, "grad_norm": 3.389650630200552, "learning_rate": 6.982148672085207e-07, "loss": 1.0704, "step": 83670 }, { "epoch": 2.4671974052779007, "grad_norm": 3.3529198462336978, "learning_rate": 6.978410833919648e-07, "loss": 1.0669, "step": 83675 }, { "epoch": 2.46734483266991, "grad_norm": 3.4260624928750185, "learning_rate": 6.974673893886697e-07, "loss": 1.0237, "step": 83680 }, { "epoch": 2.4674922600619196, "grad_norm": 3.5191136876075313, "learning_rate": 6.970937852096336e-07, "loss": 1.055, "step": 83685 }, { "epoch": 2.467639687453929, "grad_norm": 3.4301678491966254, "learning_rate": 6.967202708658471e-07, "loss": 1.0349, "step": 83690 }, { "epoch": 2.4677871148459385, "grad_norm": 3.3976110994313413, "learning_rate": 6.963468463683049e-07, "loss": 1.0437, "step": 83695 }, { "epoch": 2.467934542237948, "grad_norm": 3.5486000496550694, "learning_rate": 6.959735117279925e-07, "loss": 1.0517, "step": 83700 }, { "epoch": 2.4680819696299574, "grad_norm": 3.545815506585934, "learning_rate": 6.956002669558969e-07, "loss": 1.0622, "step": 83705 }, { "epoch": 2.468229397021967, "grad_norm": 3.487355229815096, "learning_rate": 6.952271120630015e-07, "loss": 0.9977, "step": 83710 }, { "epoch": 2.4683768244139763, "grad_norm": 3.4307394628528636, "learning_rate": 6.948540470602864e-07, "loss": 1.0019, "step": 83715 }, { "epoch": 2.4685242518059853, "grad_norm": 3.3468906978929156, "learning_rate": 6.944810719587304e-07, "loss": 1.0474, "step": 83720 }, { "epoch": 2.468671679197995, "grad_norm": 3.6039046898491023, "learning_rate": 6.941081867693058e-07, "loss": 1.0472, "step": 83725 }, { "epoch": 2.4688191065900043, "grad_norm": 3.5181598597466186, "learning_rate": 6.937353915029894e-07, "loss": 1.037, "step": 83730 }, { "epoch": 2.4689665339820137, "grad_norm": 3.5622021981648073, "learning_rate": 6.933626861707481e-07, "loss": 1.0299, "step": 83735 }, { "epoch": 2.469113961374023, "grad_norm": 3.6244463242698837, "learning_rate": 6.9299007078355e-07, "loss": 1.0574, "step": 83740 }, { "epoch": 2.4692613887660326, "grad_norm": 3.5590955903831385, "learning_rate": 6.926175453523599e-07, "loss": 1.0358, "step": 83745 }, { "epoch": 2.469408816158042, "grad_norm": 3.4958414645076026, "learning_rate": 6.922451098881399e-07, "loss": 1.0659, "step": 83750 }, { "epoch": 2.4695562435500515, "grad_norm": 3.6431798808908304, "learning_rate": 6.918727644018488e-07, "loss": 1.0611, "step": 83755 }, { "epoch": 2.469703670942061, "grad_norm": 3.478451463553328, "learning_rate": 6.915005089044444e-07, "loss": 1.0547, "step": 83760 }, { "epoch": 2.4698510983340705, "grad_norm": 3.44069391739423, "learning_rate": 6.911283434068786e-07, "loss": 1.0212, "step": 83765 }, { "epoch": 2.46999852572608, "grad_norm": 3.5087556504688138, "learning_rate": 6.907562679201056e-07, "loss": 1.0316, "step": 83770 }, { "epoch": 2.4701459531180894, "grad_norm": 3.37012323348483, "learning_rate": 6.903842824550719e-07, "loss": 1.0264, "step": 83775 }, { "epoch": 2.470293380510099, "grad_norm": 3.5025613641764997, "learning_rate": 6.900123870227244e-07, "loss": 1.062, "step": 83780 }, { "epoch": 2.4704408079021083, "grad_norm": 3.52222577910594, "learning_rate": 6.896405816340075e-07, "loss": 1.0357, "step": 83785 }, { "epoch": 2.4705882352941178, "grad_norm": 3.5596852052722907, "learning_rate": 6.892688662998593e-07, "loss": 1.0639, "step": 83790 }, { "epoch": 2.470735662686127, "grad_norm": 3.507495886526426, "learning_rate": 6.888972410312212e-07, "loss": 1.0921, "step": 83795 }, { "epoch": 2.4708830900781367, "grad_norm": 3.4388180653914913, "learning_rate": 6.885257058390256e-07, "loss": 1.0342, "step": 83800 }, { "epoch": 2.471030517470146, "grad_norm": 3.3321903593532896, "learning_rate": 6.881542607342089e-07, "loss": 1.0622, "step": 83805 }, { "epoch": 2.4711779448621556, "grad_norm": 3.4803334591446773, "learning_rate": 6.877829057276985e-07, "loss": 1.0613, "step": 83810 }, { "epoch": 2.4713253722541646, "grad_norm": 3.517268573241983, "learning_rate": 6.874116408304224e-07, "loss": 1.0552, "step": 83815 }, { "epoch": 2.4714727996461745, "grad_norm": 3.5058395572656695, "learning_rate": 6.870404660533062e-07, "loss": 1.0558, "step": 83820 }, { "epoch": 2.4716202270381835, "grad_norm": 3.539387443012528, "learning_rate": 6.866693814072715e-07, "loss": 1.0367, "step": 83825 }, { "epoch": 2.471767654430193, "grad_norm": 3.361874612429477, "learning_rate": 6.862983869032385e-07, "loss": 1.0378, "step": 83830 }, { "epoch": 2.4719150818222024, "grad_norm": 3.6328662823478406, "learning_rate": 6.859274825521249e-07, "loss": 1.0259, "step": 83835 }, { "epoch": 2.472062509214212, "grad_norm": 3.621384065997485, "learning_rate": 6.855566683648417e-07, "loss": 1.0825, "step": 83840 }, { "epoch": 2.4722099366062213, "grad_norm": 3.465563568764755, "learning_rate": 6.851859443523047e-07, "loss": 1.0174, "step": 83845 }, { "epoch": 2.472357363998231, "grad_norm": 3.545926825238225, "learning_rate": 6.848153105254199e-07, "loss": 1.0627, "step": 83850 }, { "epoch": 2.4725047913902403, "grad_norm": 3.3464497047477457, "learning_rate": 6.844447668950948e-07, "loss": 1.0656, "step": 83855 }, { "epoch": 2.4726522187822497, "grad_norm": 3.5692636809156015, "learning_rate": 6.84074313472233e-07, "loss": 1.0432, "step": 83860 }, { "epoch": 2.472799646174259, "grad_norm": 3.499083025448263, "learning_rate": 6.837039502677356e-07, "loss": 1.0619, "step": 83865 }, { "epoch": 2.4729470735662686, "grad_norm": 3.3565543134970035, "learning_rate": 6.833336772925014e-07, "loss": 1.0215, "step": 83870 }, { "epoch": 2.473094500958278, "grad_norm": 3.3852899670002556, "learning_rate": 6.829634945574238e-07, "loss": 1.0287, "step": 83875 }, { "epoch": 2.4732419283502876, "grad_norm": 3.466954110571324, "learning_rate": 6.825934020733996e-07, "loss": 1.0445, "step": 83880 }, { "epoch": 2.473389355742297, "grad_norm": 3.6767561429042237, "learning_rate": 6.822233998513157e-07, "loss": 1.1088, "step": 83885 }, { "epoch": 2.4735367831343065, "grad_norm": 3.4551764863215593, "learning_rate": 6.818534879020618e-07, "loss": 1.0642, "step": 83890 }, { "epoch": 2.473684210526316, "grad_norm": 3.609467723692794, "learning_rate": 6.814836662365223e-07, "loss": 1.0704, "step": 83895 }, { "epoch": 2.4738316379183254, "grad_norm": 3.6071323225030305, "learning_rate": 6.8111393486558e-07, "loss": 1.0233, "step": 83900 }, { "epoch": 2.473979065310335, "grad_norm": 3.394819849477998, "learning_rate": 6.80744293800114e-07, "loss": 1.0502, "step": 83905 }, { "epoch": 2.474126492702344, "grad_norm": 3.7339905204465076, "learning_rate": 6.80374743051003e-07, "loss": 1.0654, "step": 83910 }, { "epoch": 2.4742739200943538, "grad_norm": 3.604333221898556, "learning_rate": 6.800052826291196e-07, "loss": 1.0682, "step": 83915 }, { "epoch": 2.4744213474863628, "grad_norm": 3.3662439886407065, "learning_rate": 6.796359125453362e-07, "loss": 1.0075, "step": 83920 }, { "epoch": 2.4745687748783722, "grad_norm": 3.4946999074400362, "learning_rate": 6.79266632810522e-07, "loss": 1.0186, "step": 83925 }, { "epoch": 2.4747162022703817, "grad_norm": 3.412399666962098, "learning_rate": 6.788974434355435e-07, "loss": 1.0367, "step": 83930 }, { "epoch": 2.474863629662391, "grad_norm": 3.60713534533506, "learning_rate": 6.785283444312649e-07, "loss": 1.0532, "step": 83935 }, { "epoch": 2.4750110570544006, "grad_norm": 3.481624885839477, "learning_rate": 6.781593358085466e-07, "loss": 1.0458, "step": 83940 }, { "epoch": 2.47515848444641, "grad_norm": 3.4355621160956447, "learning_rate": 6.777904175782487e-07, "loss": 1.025, "step": 83945 }, { "epoch": 2.4753059118384195, "grad_norm": 3.550567263551179, "learning_rate": 6.77421589751224e-07, "loss": 1.0192, "step": 83950 }, { "epoch": 2.475453339230429, "grad_norm": 3.3957641951603703, "learning_rate": 6.770528523383292e-07, "loss": 1.0578, "step": 83955 }, { "epoch": 2.4756007666224384, "grad_norm": 3.534856643681607, "learning_rate": 6.766842053504122e-07, "loss": 1.0053, "step": 83960 }, { "epoch": 2.475748194014448, "grad_norm": 3.6264964624946994, "learning_rate": 6.763156487983221e-07, "loss": 1.0522, "step": 83965 }, { "epoch": 2.4758956214064574, "grad_norm": 3.5695778065662456, "learning_rate": 6.759471826929038e-07, "loss": 1.0689, "step": 83970 }, { "epoch": 2.476043048798467, "grad_norm": 3.6130997591447613, "learning_rate": 6.755788070449999e-07, "loss": 1.0697, "step": 83975 }, { "epoch": 2.4761904761904763, "grad_norm": 3.3629999478241466, "learning_rate": 6.752105218654509e-07, "loss": 1.0636, "step": 83980 }, { "epoch": 2.4763379035824857, "grad_norm": 3.6307771430444262, "learning_rate": 6.748423271650914e-07, "loss": 1.0225, "step": 83985 }, { "epoch": 2.476485330974495, "grad_norm": 3.4922389201877735, "learning_rate": 6.744742229547598e-07, "loss": 1.0508, "step": 83990 }, { "epoch": 2.4766327583665046, "grad_norm": 3.336734160704587, "learning_rate": 6.741062092452856e-07, "loss": 1.0499, "step": 83995 }, { "epoch": 2.476780185758514, "grad_norm": 3.5093524515080365, "learning_rate": 6.737382860474978e-07, "loss": 1.0406, "step": 84000 }, { "epoch": 2.476780185758514, "eval_loss": 1.0801947116851807, "eval_runtime": 4.179, "eval_samples_per_second": 94.759, "eval_steps_per_second": 3.111, "step": 84000 }, { "epoch": 2.476927613150523, "grad_norm": 3.5023594675838785, "learning_rate": 6.73370453372224e-07, "loss": 1.0379, "step": 84005 }, { "epoch": 2.477075040542533, "grad_norm": 3.5042765021696143, "learning_rate": 6.730027112302878e-07, "loss": 1.0421, "step": 84010 }, { "epoch": 2.477222467934542, "grad_norm": 3.4645623072399827, "learning_rate": 6.726350596325104e-07, "loss": 1.0128, "step": 84015 }, { "epoch": 2.4773698953265515, "grad_norm": 3.401110357627945, "learning_rate": 6.722674985897111e-07, "loss": 1.0332, "step": 84020 }, { "epoch": 2.477517322718561, "grad_norm": 3.5449301134875872, "learning_rate": 6.719000281127033e-07, "loss": 1.0454, "step": 84025 }, { "epoch": 2.4776647501105704, "grad_norm": 3.655897231867738, "learning_rate": 6.715326482123038e-07, "loss": 1.0267, "step": 84030 }, { "epoch": 2.47781217750258, "grad_norm": 3.6424488585625387, "learning_rate": 6.711653588993207e-07, "loss": 1.0775, "step": 84035 }, { "epoch": 2.4779596048945893, "grad_norm": 3.574789912274844, "learning_rate": 6.707981601845622e-07, "loss": 1.0454, "step": 84040 }, { "epoch": 2.478107032286599, "grad_norm": 3.4839731728016177, "learning_rate": 6.70431052078835e-07, "loss": 1.0468, "step": 84045 }, { "epoch": 2.4782544596786082, "grad_norm": 3.539696167014635, "learning_rate": 6.700640345929389e-07, "loss": 1.0492, "step": 84050 }, { "epoch": 2.4784018870706177, "grad_norm": 3.3453235415588374, "learning_rate": 6.696971077376771e-07, "loss": 1.0121, "step": 84055 }, { "epoch": 2.478549314462627, "grad_norm": 3.465127921461967, "learning_rate": 6.693302715238437e-07, "loss": 1.0555, "step": 84060 }, { "epoch": 2.4786967418546366, "grad_norm": 3.3714932788481984, "learning_rate": 6.689635259622362e-07, "loss": 1.031, "step": 84065 }, { "epoch": 2.478844169246646, "grad_norm": 3.40160254404084, "learning_rate": 6.685968710636445e-07, "loss": 1.0435, "step": 84070 }, { "epoch": 2.4789915966386555, "grad_norm": 3.6631309033709485, "learning_rate": 6.682303068388587e-07, "loss": 1.0662, "step": 84075 }, { "epoch": 2.479139024030665, "grad_norm": 3.6424473759808365, "learning_rate": 6.678638332986649e-07, "loss": 1.0612, "step": 84080 }, { "epoch": 2.4792864514226745, "grad_norm": 3.374224377074081, "learning_rate": 6.674974504538468e-07, "loss": 1.0783, "step": 84085 }, { "epoch": 2.479433878814684, "grad_norm": 3.362164299955037, "learning_rate": 6.671311583151868e-07, "loss": 1.0361, "step": 84090 }, { "epoch": 2.4795813062066934, "grad_norm": 3.5793395773562957, "learning_rate": 6.667649568934632e-07, "loss": 1.0554, "step": 84095 }, { "epoch": 2.479728733598703, "grad_norm": 3.519293809083899, "learning_rate": 6.663988461994495e-07, "loss": 1.0615, "step": 84100 }, { "epoch": 2.4798761609907123, "grad_norm": 3.528283492095994, "learning_rate": 6.660328262439231e-07, "loss": 1.0796, "step": 84105 }, { "epoch": 2.4800235883827213, "grad_norm": 3.51816623805356, "learning_rate": 6.6566689703765e-07, "loss": 1.0655, "step": 84110 }, { "epoch": 2.4801710157747308, "grad_norm": 3.5141430890780447, "learning_rate": 6.653010585914028e-07, "loss": 1.0375, "step": 84115 }, { "epoch": 2.48031844316674, "grad_norm": 3.4825487323173836, "learning_rate": 6.649353109159432e-07, "loss": 1.0671, "step": 84120 }, { "epoch": 2.4804658705587497, "grad_norm": 3.3534503871034396, "learning_rate": 6.645696540220345e-07, "loss": 1.0132, "step": 84125 }, { "epoch": 2.480613297950759, "grad_norm": 3.511168308184639, "learning_rate": 6.642040879204382e-07, "loss": 1.0326, "step": 84130 }, { "epoch": 2.4807607253427686, "grad_norm": 3.577838380464415, "learning_rate": 6.638386126219083e-07, "loss": 1.0858, "step": 84135 }, { "epoch": 2.480908152734778, "grad_norm": 3.4638026601863405, "learning_rate": 6.634732281372032e-07, "loss": 1.0064, "step": 84140 }, { "epoch": 2.4810555801267875, "grad_norm": 3.610697113409057, "learning_rate": 6.631079344770718e-07, "loss": 1.0639, "step": 84145 }, { "epoch": 2.481203007518797, "grad_norm": 3.5473294464832983, "learning_rate": 6.627427316522641e-07, "loss": 1.0726, "step": 84150 }, { "epoch": 2.4813504349108064, "grad_norm": 3.4847223701060805, "learning_rate": 6.623776196735267e-07, "loss": 1.0183, "step": 84155 }, { "epoch": 2.481497862302816, "grad_norm": 3.449213268848834, "learning_rate": 6.62012598551604e-07, "loss": 1.0151, "step": 84160 }, { "epoch": 2.4816452896948253, "grad_norm": 3.5190441990127383, "learning_rate": 6.616476682972364e-07, "loss": 1.0417, "step": 84165 }, { "epoch": 2.481792717086835, "grad_norm": 3.5332284698502283, "learning_rate": 6.612828289211637e-07, "loss": 1.0243, "step": 84170 }, { "epoch": 2.4819401444788443, "grad_norm": 3.429556642516559, "learning_rate": 6.609180804341189e-07, "loss": 1.0578, "step": 84175 }, { "epoch": 2.4820875718708537, "grad_norm": 3.1475088050154496, "learning_rate": 6.605534228468385e-07, "loss": 1.051, "step": 84180 }, { "epoch": 2.482234999262863, "grad_norm": 3.440068408636864, "learning_rate": 6.601888561700506e-07, "loss": 1.0377, "step": 84185 }, { "epoch": 2.4823824266548726, "grad_norm": 3.5484530488074646, "learning_rate": 6.598243804144841e-07, "loss": 1.0771, "step": 84190 }, { "epoch": 2.482529854046882, "grad_norm": 3.4234482100013017, "learning_rate": 6.594599955908636e-07, "loss": 1.0547, "step": 84195 }, { "epoch": 2.4826772814388915, "grad_norm": 3.4321982676856306, "learning_rate": 6.590957017099117e-07, "loss": 1.0247, "step": 84200 }, { "epoch": 2.4828247088309006, "grad_norm": 3.5820776999598634, "learning_rate": 6.587314987823495e-07, "loss": 1.0978, "step": 84205 }, { "epoch": 2.4829721362229105, "grad_norm": 3.6484857271033366, "learning_rate": 6.583673868188907e-07, "loss": 1.0749, "step": 84210 }, { "epoch": 2.4831195636149195, "grad_norm": 3.5447673404128, "learning_rate": 6.580033658302537e-07, "loss": 1.0527, "step": 84215 }, { "epoch": 2.483266991006929, "grad_norm": 3.289855547964171, "learning_rate": 6.57639435827147e-07, "loss": 1.066, "step": 84220 }, { "epoch": 2.4834144183989384, "grad_norm": 3.4370994829202433, "learning_rate": 6.572755968202813e-07, "loss": 1.0158, "step": 84225 }, { "epoch": 2.483561845790948, "grad_norm": 3.3842105493098376, "learning_rate": 6.569118488203631e-07, "loss": 1.015, "step": 84230 }, { "epoch": 2.4837092731829573, "grad_norm": 3.4752424014542624, "learning_rate": 6.565481918380953e-07, "loss": 1.054, "step": 84235 }, { "epoch": 2.4838567005749668, "grad_norm": 3.6265426146344106, "learning_rate": 6.561846258841795e-07, "loss": 1.0675, "step": 84240 }, { "epoch": 2.4840041279669762, "grad_norm": 3.4499822038832013, "learning_rate": 6.558211509693137e-07, "loss": 1.0416, "step": 84245 }, { "epoch": 2.4841515553589857, "grad_norm": 3.644309489573056, "learning_rate": 6.554577671041943e-07, "loss": 1.0325, "step": 84250 }, { "epoch": 2.484298982750995, "grad_norm": 3.572953894069006, "learning_rate": 6.550944742995133e-07, "loss": 1.0434, "step": 84255 }, { "epoch": 2.4844464101430046, "grad_norm": 3.596441486884492, "learning_rate": 6.547312725659614e-07, "loss": 1.0325, "step": 84260 }, { "epoch": 2.484593837535014, "grad_norm": 3.543968912650875, "learning_rate": 6.543681619142257e-07, "loss": 1.0322, "step": 84265 }, { "epoch": 2.4847412649270235, "grad_norm": 3.3589811107478873, "learning_rate": 6.540051423549923e-07, "loss": 1.0334, "step": 84270 }, { "epoch": 2.484888692319033, "grad_norm": 3.4531720337420357, "learning_rate": 6.536422138989426e-07, "loss": 1.0445, "step": 84275 }, { "epoch": 2.4850361197110424, "grad_norm": 3.4977221451590848, "learning_rate": 6.532793765567573e-07, "loss": 1.039, "step": 84280 }, { "epoch": 2.485183547103052, "grad_norm": 3.5919344700954077, "learning_rate": 6.529166303391107e-07, "loss": 1.0639, "step": 84285 }, { "epoch": 2.4853309744950614, "grad_norm": 3.543614343711088, "learning_rate": 6.525539752566801e-07, "loss": 1.0449, "step": 84290 }, { "epoch": 2.485478401887071, "grad_norm": 3.4320624050257087, "learning_rate": 6.521914113201355e-07, "loss": 1.0456, "step": 84295 }, { "epoch": 2.48562582927908, "grad_norm": 3.5524641615695796, "learning_rate": 6.518289385401456e-07, "loss": 1.0497, "step": 84300 }, { "epoch": 2.4857732566710897, "grad_norm": 3.596295403290564, "learning_rate": 6.51466556927377e-07, "loss": 1.0673, "step": 84305 }, { "epoch": 2.4859206840630987, "grad_norm": 3.5525402729653845, "learning_rate": 6.511042664924928e-07, "loss": 1.0687, "step": 84310 }, { "epoch": 2.486068111455108, "grad_norm": 3.5359509875952067, "learning_rate": 6.507420672461553e-07, "loss": 1.014, "step": 84315 }, { "epoch": 2.4862155388471177, "grad_norm": 3.534133470301934, "learning_rate": 6.503799591990198e-07, "loss": 1.0462, "step": 84320 }, { "epoch": 2.486362966239127, "grad_norm": 3.434540464494837, "learning_rate": 6.500179423617451e-07, "loss": 1.0254, "step": 84325 }, { "epoch": 2.4865103936311366, "grad_norm": 3.40615882235201, "learning_rate": 6.496560167449813e-07, "loss": 1.0192, "step": 84330 }, { "epoch": 2.486657821023146, "grad_norm": 3.3959723165428266, "learning_rate": 6.492941823593795e-07, "loss": 1.1062, "step": 84335 }, { "epoch": 2.4868052484151555, "grad_norm": 3.511255399405299, "learning_rate": 6.489324392155869e-07, "loss": 1.0209, "step": 84340 }, { "epoch": 2.486952675807165, "grad_norm": 3.529671658853202, "learning_rate": 6.485707873242483e-07, "loss": 1.0544, "step": 84345 }, { "epoch": 2.4871001031991744, "grad_norm": 3.4758427712142304, "learning_rate": 6.482092266960059e-07, "loss": 1.0519, "step": 84350 }, { "epoch": 2.487247530591184, "grad_norm": 3.3843523783091256, "learning_rate": 6.478477573414995e-07, "loss": 0.9991, "step": 84355 }, { "epoch": 2.4873949579831933, "grad_norm": 3.539347899992157, "learning_rate": 6.474863792713637e-07, "loss": 1.0886, "step": 84360 }, { "epoch": 2.487542385375203, "grad_norm": 3.5207459164597137, "learning_rate": 6.471250924962352e-07, "loss": 1.0753, "step": 84365 }, { "epoch": 2.4876898127672122, "grad_norm": 3.6086143630036056, "learning_rate": 6.467638970267426e-07, "loss": 1.0963, "step": 84370 }, { "epoch": 2.4878372401592217, "grad_norm": 3.4946260339862696, "learning_rate": 6.464027928735177e-07, "loss": 1.0568, "step": 84375 }, { "epoch": 2.487984667551231, "grad_norm": 3.488065191542895, "learning_rate": 6.460417800471832e-07, "loss": 1.0726, "step": 84380 }, { "epoch": 2.4881320949432406, "grad_norm": 3.437928516648694, "learning_rate": 6.456808585583643e-07, "loss": 1.001, "step": 84385 }, { "epoch": 2.48827952233525, "grad_norm": 3.4785165813619865, "learning_rate": 6.453200284176811e-07, "loss": 1.0605, "step": 84390 }, { "epoch": 2.488426949727259, "grad_norm": 3.533906149154275, "learning_rate": 6.449592896357501e-07, "loss": 1.05, "step": 84395 }, { "epoch": 2.488574377119269, "grad_norm": 3.4406422284677642, "learning_rate": 6.44598642223189e-07, "loss": 1.0246, "step": 84400 }, { "epoch": 2.488721804511278, "grad_norm": 3.5783076261050466, "learning_rate": 6.442380861906079e-07, "loss": 1.0126, "step": 84405 }, { "epoch": 2.4888692319032875, "grad_norm": 3.3261854963239754, "learning_rate": 6.438776215486176e-07, "loss": 1.018, "step": 84410 }, { "epoch": 2.489016659295297, "grad_norm": 3.541505332414699, "learning_rate": 6.435172483078253e-07, "loss": 1.0195, "step": 84415 }, { "epoch": 2.4891640866873064, "grad_norm": 3.356081518188633, "learning_rate": 6.431569664788347e-07, "loss": 1.0416, "step": 84420 }, { "epoch": 2.489311514079316, "grad_norm": 3.507427942050183, "learning_rate": 6.427967760722485e-07, "loss": 1.0525, "step": 84425 }, { "epoch": 2.4894589414713253, "grad_norm": 3.535500044178563, "learning_rate": 6.424366770986658e-07, "loss": 1.0248, "step": 84430 }, { "epoch": 2.4896063688633348, "grad_norm": 3.5780861918602365, "learning_rate": 6.42076669568681e-07, "loss": 1.0725, "step": 84435 }, { "epoch": 2.489753796255344, "grad_norm": 3.4249735628391536, "learning_rate": 6.417167534928907e-07, "loss": 1.0486, "step": 84440 }, { "epoch": 2.4899012236473537, "grad_norm": 3.5540077492691786, "learning_rate": 6.413569288818828e-07, "loss": 1.0778, "step": 84445 }, { "epoch": 2.490048651039363, "grad_norm": 3.4712226024292683, "learning_rate": 6.409971957462476e-07, "loss": 1.0127, "step": 84450 }, { "epoch": 2.4901960784313726, "grad_norm": 3.5302130586200975, "learning_rate": 6.406375540965696e-07, "loss": 1.0606, "step": 84455 }, { "epoch": 2.490343505823382, "grad_norm": 3.838127031987449, "learning_rate": 6.402780039434324e-07, "loss": 1.0594, "step": 84460 }, { "epoch": 2.4904909332153915, "grad_norm": 3.4502966321909576, "learning_rate": 6.399185452974166e-07, "loss": 1.0574, "step": 84465 }, { "epoch": 2.490638360607401, "grad_norm": 3.4218176813074876, "learning_rate": 6.395591781690971e-07, "loss": 1.0374, "step": 84470 }, { "epoch": 2.4907857879994104, "grad_norm": 3.568991777996855, "learning_rate": 6.391999025690526e-07, "loss": 1.0381, "step": 84475 }, { "epoch": 2.49093321539142, "grad_norm": 3.522068803280215, "learning_rate": 6.388407185078521e-07, "loss": 1.0438, "step": 84480 }, { "epoch": 2.4910806427834293, "grad_norm": 3.487746100063868, "learning_rate": 6.384816259960659e-07, "loss": 1.0497, "step": 84485 }, { "epoch": 2.4912280701754383, "grad_norm": 3.4700468697400173, "learning_rate": 6.38122625044261e-07, "loss": 1.0376, "step": 84490 }, { "epoch": 2.4913754975674482, "grad_norm": 3.526191306166052, "learning_rate": 6.377637156630009e-07, "loss": 1.0798, "step": 84495 }, { "epoch": 2.4915229249594573, "grad_norm": 3.5243665559591837, "learning_rate": 6.374048978628477e-07, "loss": 1.0586, "step": 84500 }, { "epoch": 2.4915229249594573, "eval_loss": 1.0799024105072021, "eval_runtime": 4.3046, "eval_samples_per_second": 91.996, "eval_steps_per_second": 3.02, "step": 84500 }, { "epoch": 2.4916703523514667, "grad_norm": 3.4518260206698232, "learning_rate": 6.370461716543596e-07, "loss": 1.0684, "step": 84505 }, { "epoch": 2.491817779743476, "grad_norm": 3.3556820695005944, "learning_rate": 6.366875370480931e-07, "loss": 1.059, "step": 84510 }, { "epoch": 2.4919652071354856, "grad_norm": 3.495811800724824, "learning_rate": 6.363289940545999e-07, "loss": 1.0193, "step": 84515 }, { "epoch": 2.492112634527495, "grad_norm": 3.3397093415592964, "learning_rate": 6.359705426844318e-07, "loss": 1.0228, "step": 84520 }, { "epoch": 2.4922600619195046, "grad_norm": 3.464432965044346, "learning_rate": 6.356121829481363e-07, "loss": 1.0584, "step": 84525 }, { "epoch": 2.492407489311514, "grad_norm": 3.639434995098176, "learning_rate": 6.352539148562583e-07, "loss": 1.0599, "step": 84530 }, { "epoch": 2.4925549167035235, "grad_norm": 3.3857381662653068, "learning_rate": 6.34895738419341e-07, "loss": 0.9991, "step": 84535 }, { "epoch": 2.492702344095533, "grad_norm": 3.5555878114346786, "learning_rate": 6.345376536479243e-07, "loss": 1.045, "step": 84540 }, { "epoch": 2.4928497714875424, "grad_norm": 3.494424796775349, "learning_rate": 6.341796605525427e-07, "loss": 1.044, "step": 84545 }, { "epoch": 2.492997198879552, "grad_norm": 3.5328825021620007, "learning_rate": 6.338217591437344e-07, "loss": 1.0387, "step": 84550 }, { "epoch": 2.4931446262715613, "grad_norm": 3.5112094679967165, "learning_rate": 6.334639494320281e-07, "loss": 1.036, "step": 84555 }, { "epoch": 2.4932920536635708, "grad_norm": 3.6357978262141883, "learning_rate": 6.331062314279542e-07, "loss": 1.0243, "step": 84560 }, { "epoch": 2.49343948105558, "grad_norm": 3.425191773360887, "learning_rate": 6.327486051420382e-07, "loss": 1.0171, "step": 84565 }, { "epoch": 2.4935869084475897, "grad_norm": 3.4317261083992117, "learning_rate": 6.32391070584804e-07, "loss": 1.0399, "step": 84570 }, { "epoch": 2.493734335839599, "grad_norm": 3.634123069340075, "learning_rate": 6.320336277667737e-07, "loss": 1.0634, "step": 84575 }, { "epoch": 2.4938817632316086, "grad_norm": 3.495112180313718, "learning_rate": 6.316762766984618e-07, "loss": 1.0572, "step": 84580 }, { "epoch": 2.494029190623618, "grad_norm": 3.2292624844835838, "learning_rate": 6.313190173903886e-07, "loss": 1.0333, "step": 84585 }, { "epoch": 2.4941766180156275, "grad_norm": 3.5443045654897953, "learning_rate": 6.309618498530635e-07, "loss": 1.0378, "step": 84590 }, { "epoch": 2.4943240454076365, "grad_norm": 3.585375622374775, "learning_rate": 6.306047740969977e-07, "loss": 1.1166, "step": 84595 }, { "epoch": 2.494471472799646, "grad_norm": 3.3777321337821857, "learning_rate": 6.30247790132698e-07, "loss": 0.9969, "step": 84600 }, { "epoch": 2.4946189001916554, "grad_norm": 3.5293669835683903, "learning_rate": 6.298908979706697e-07, "loss": 1.0638, "step": 84605 }, { "epoch": 2.494766327583665, "grad_norm": 3.4914958370501106, "learning_rate": 6.295340976214141e-07, "loss": 1.0312, "step": 84610 }, { "epoch": 2.4949137549756744, "grad_norm": 3.5560740395782, "learning_rate": 6.291773890954325e-07, "loss": 1.024, "step": 84615 }, { "epoch": 2.495061182367684, "grad_norm": 3.5704651374959946, "learning_rate": 6.288207724032173e-07, "loss": 1.0143, "step": 84620 }, { "epoch": 2.4952086097596933, "grad_norm": 3.5735659846037837, "learning_rate": 6.284642475552673e-07, "loss": 1.0546, "step": 84625 }, { "epoch": 2.4953560371517027, "grad_norm": 3.5600234412967624, "learning_rate": 6.281078145620689e-07, "loss": 1.0853, "step": 84630 }, { "epoch": 2.495503464543712, "grad_norm": 3.5657042692024046, "learning_rate": 6.277514734341152e-07, "loss": 1.0519, "step": 84635 }, { "epoch": 2.4956508919357216, "grad_norm": 3.49121560842717, "learning_rate": 6.273952241818884e-07, "loss": 1.0879, "step": 84640 }, { "epoch": 2.495798319327731, "grad_norm": 3.4645711418620158, "learning_rate": 6.270390668158728e-07, "loss": 1.0057, "step": 84645 }, { "epoch": 2.4959457467197406, "grad_norm": 3.502666246086459, "learning_rate": 6.266830013465495e-07, "loss": 1.0494, "step": 84650 }, { "epoch": 2.49609317411175, "grad_norm": 3.583843216421606, "learning_rate": 6.263270277843939e-07, "loss": 1.0532, "step": 84655 }, { "epoch": 2.4962406015037595, "grad_norm": 3.363547750410268, "learning_rate": 6.25971146139884e-07, "loss": 1.0662, "step": 84660 }, { "epoch": 2.496388028895769, "grad_norm": 3.3080745387177783, "learning_rate": 6.256153564234892e-07, "loss": 1.0065, "step": 84665 }, { "epoch": 2.4965354562877784, "grad_norm": 3.6141427768461263, "learning_rate": 6.252596586456806e-07, "loss": 1.0993, "step": 84670 }, { "epoch": 2.496682883679788, "grad_norm": 3.5476466066824397, "learning_rate": 6.249040528169247e-07, "loss": 1.0126, "step": 84675 }, { "epoch": 2.4968303110717973, "grad_norm": 3.456545236603055, "learning_rate": 6.245485389476849e-07, "loss": 1.0583, "step": 84680 }, { "epoch": 2.4969777384638068, "grad_norm": 3.472173057649236, "learning_rate": 6.241931170484238e-07, "loss": 1.0629, "step": 84685 }, { "epoch": 2.497125165855816, "grad_norm": 3.2976714748779017, "learning_rate": 6.238377871295995e-07, "loss": 1.0309, "step": 84690 }, { "epoch": 2.4972725932478257, "grad_norm": 3.3732993687922055, "learning_rate": 6.234825492016678e-07, "loss": 1.0053, "step": 84695 }, { "epoch": 2.4974200206398347, "grad_norm": 3.3094975090198475, "learning_rate": 6.231274032750832e-07, "loss": 1.0117, "step": 84700 }, { "epoch": 2.497567448031844, "grad_norm": 3.5686365928158454, "learning_rate": 6.227723493602942e-07, "loss": 1.0551, "step": 84705 }, { "epoch": 2.4977148754238536, "grad_norm": 3.4237389819858395, "learning_rate": 6.2241738746775e-07, "loss": 1.0266, "step": 84710 }, { "epoch": 2.497862302815863, "grad_norm": 3.4893425968066922, "learning_rate": 6.220625176078953e-07, "loss": 1.0505, "step": 84715 }, { "epoch": 2.4980097302078725, "grad_norm": 3.5565699923319682, "learning_rate": 6.217077397911728e-07, "loss": 1.0202, "step": 84720 }, { "epoch": 2.498157157599882, "grad_norm": 3.412229207475826, "learning_rate": 6.213530540280235e-07, "loss": 1.0088, "step": 84725 }, { "epoch": 2.4983045849918915, "grad_norm": 3.565638296758471, "learning_rate": 6.20998460328881e-07, "loss": 1.07, "step": 84730 }, { "epoch": 2.498452012383901, "grad_norm": 3.4919575217077923, "learning_rate": 6.206439587041832e-07, "loss": 1.0381, "step": 84735 }, { "epoch": 2.4985994397759104, "grad_norm": 3.5414161085335265, "learning_rate": 6.202895491643597e-07, "loss": 1.0408, "step": 84740 }, { "epoch": 2.49874686716792, "grad_norm": 3.4868904519920894, "learning_rate": 6.1993523171984e-07, "loss": 1.0488, "step": 84745 }, { "epoch": 2.4988942945599293, "grad_norm": 3.410651359764628, "learning_rate": 6.195810063810501e-07, "loss": 1.0215, "step": 84750 }, { "epoch": 2.4990417219519387, "grad_norm": 3.307120530182907, "learning_rate": 6.192268731584137e-07, "loss": 1.0486, "step": 84755 }, { "epoch": 2.499189149343948, "grad_norm": 3.635761789345687, "learning_rate": 6.188728320623519e-07, "loss": 1.0329, "step": 84760 }, { "epoch": 2.4993365767359577, "grad_norm": 3.5457057311424722, "learning_rate": 6.18518883103282e-07, "loss": 1.0589, "step": 84765 }, { "epoch": 2.499484004127967, "grad_norm": 3.5456669787736175, "learning_rate": 6.181650262916206e-07, "loss": 1.087, "step": 84770 }, { "epoch": 2.4996314315199766, "grad_norm": 3.5046290517879704, "learning_rate": 6.178112616377788e-07, "loss": 1.059, "step": 84775 }, { "epoch": 2.499778858911986, "grad_norm": 3.479299191426195, "learning_rate": 6.17457589152167e-07, "loss": 1.0389, "step": 84780 }, { "epoch": 2.499926286303995, "grad_norm": 3.4838446732979014, "learning_rate": 6.171040088451927e-07, "loss": 1.0795, "step": 84785 }, { "epoch": 2.500073713696005, "grad_norm": 3.504932119025694, "learning_rate": 6.167505207272604e-07, "loss": 1.0605, "step": 84790 }, { "epoch": 2.500221141088014, "grad_norm": 3.3827360455999735, "learning_rate": 6.163971248087717e-07, "loss": 1.0322, "step": 84795 }, { "epoch": 2.5003685684800234, "grad_norm": 3.4060149041259598, "learning_rate": 6.16043821100127e-07, "loss": 1.0077, "step": 84800 }, { "epoch": 2.500515995872033, "grad_norm": 3.483718123718198, "learning_rate": 6.156906096117195e-07, "loss": 1.0527, "step": 84805 }, { "epoch": 2.5006634232640423, "grad_norm": 3.50246597337192, "learning_rate": 6.153374903539467e-07, "loss": 1.0106, "step": 84810 }, { "epoch": 2.500810850656052, "grad_norm": 3.378809087211107, "learning_rate": 6.149844633371963e-07, "loss": 1.0438, "step": 84815 }, { "epoch": 2.5009582780480613, "grad_norm": 3.649633442434857, "learning_rate": 6.146315285718583e-07, "loss": 1.0457, "step": 84820 }, { "epoch": 2.5011057054400707, "grad_norm": 3.506764459187342, "learning_rate": 6.142786860683176e-07, "loss": 1.011, "step": 84825 }, { "epoch": 2.50125313283208, "grad_norm": 3.5154632021974694, "learning_rate": 6.139259358369573e-07, "loss": 1.0299, "step": 84830 }, { "epoch": 2.5014005602240896, "grad_norm": 3.5007055419192183, "learning_rate": 6.135732778881585e-07, "loss": 1.0165, "step": 84835 }, { "epoch": 2.501547987616099, "grad_norm": 3.5283546023460786, "learning_rate": 6.132207122322952e-07, "loss": 1.0253, "step": 84840 }, { "epoch": 2.5016954150081085, "grad_norm": 3.454819911030829, "learning_rate": 6.128682388797463e-07, "loss": 1.0447, "step": 84845 }, { "epoch": 2.501842842400118, "grad_norm": 3.5575322144500494, "learning_rate": 6.125158578408808e-07, "loss": 1.0736, "step": 84850 }, { "epoch": 2.5019902697921275, "grad_norm": 3.3748331165404744, "learning_rate": 6.121635691260688e-07, "loss": 1.0153, "step": 84855 }, { "epoch": 2.502137697184137, "grad_norm": 3.3348302610893694, "learning_rate": 6.118113727456769e-07, "loss": 1.0364, "step": 84860 }, { "epoch": 2.5022851245761464, "grad_norm": 3.5735126370579637, "learning_rate": 6.114592687100684e-07, "loss": 1.0752, "step": 84865 }, { "epoch": 2.502432551968156, "grad_norm": 3.354091443023296, "learning_rate": 6.111072570296052e-07, "loss": 1.059, "step": 84870 }, { "epoch": 2.5025799793601653, "grad_norm": 3.3204864688650213, "learning_rate": 6.107553377146459e-07, "loss": 1.0366, "step": 84875 }, { "epoch": 2.5027274067521743, "grad_norm": 3.577058192037174, "learning_rate": 6.104035107755436e-07, "loss": 1.0537, "step": 84880 }, { "epoch": 2.502874834144184, "grad_norm": 3.5288072731322795, "learning_rate": 6.100517762226545e-07, "loss": 1.0661, "step": 84885 }, { "epoch": 2.5030222615361932, "grad_norm": 3.4470988617798097, "learning_rate": 6.09700134066326e-07, "loss": 1.0318, "step": 84890 }, { "epoch": 2.503169688928203, "grad_norm": 3.6289158524333485, "learning_rate": 6.093485843169086e-07, "loss": 1.0261, "step": 84895 }, { "epoch": 2.503317116320212, "grad_norm": 3.5847539947469333, "learning_rate": 6.089971269847447e-07, "loss": 1.0342, "step": 84900 }, { "epoch": 2.5034645437122216, "grad_norm": 3.5485128625240967, "learning_rate": 6.086457620801768e-07, "loss": 1.0829, "step": 84905 }, { "epoch": 2.503611971104231, "grad_norm": 3.547652237846626, "learning_rate": 6.082944896135449e-07, "loss": 1.0543, "step": 84910 }, { "epoch": 2.5037593984962405, "grad_norm": 3.7054439636118013, "learning_rate": 6.079433095951838e-07, "loss": 1.0401, "step": 84915 }, { "epoch": 2.50390682588825, "grad_norm": 3.5004823228422226, "learning_rate": 6.075922220354305e-07, "loss": 1.0189, "step": 84920 }, { "epoch": 2.5040542532802594, "grad_norm": 3.3020934824258217, "learning_rate": 6.07241226944613e-07, "loss": 1.0326, "step": 84925 }, { "epoch": 2.504201680672269, "grad_norm": 3.4273115254903095, "learning_rate": 6.068903243330614e-07, "loss": 1.0303, "step": 84930 }, { "epoch": 2.5043491080642784, "grad_norm": 3.395718715746502, "learning_rate": 6.06539514211101e-07, "loss": 1.0109, "step": 84935 }, { "epoch": 2.504496535456288, "grad_norm": 3.6119727105835198, "learning_rate": 6.061887965890547e-07, "loss": 1.0855, "step": 84940 }, { "epoch": 2.5046439628482973, "grad_norm": 3.535534757348425, "learning_rate": 6.058381714772431e-07, "loss": 1.0268, "step": 84945 }, { "epoch": 2.5047913902403067, "grad_norm": 3.444686686653069, "learning_rate": 6.054876388859835e-07, "loss": 1.0354, "step": 84950 }, { "epoch": 2.504938817632316, "grad_norm": 3.5728625472395774, "learning_rate": 6.051371988255905e-07, "loss": 1.0213, "step": 84955 }, { "epoch": 2.5050862450243256, "grad_norm": 3.3009381909325133, "learning_rate": 6.047868513063777e-07, "loss": 1.0353, "step": 84960 }, { "epoch": 2.505233672416335, "grad_norm": 3.5425005630082325, "learning_rate": 6.044365963386522e-07, "loss": 1.0294, "step": 84965 }, { "epoch": 2.5053810998083446, "grad_norm": 3.4933720695245536, "learning_rate": 6.040864339327221e-07, "loss": 1.029, "step": 84970 }, { "epoch": 2.5055285272003536, "grad_norm": 3.5800044236914133, "learning_rate": 6.037363640988906e-07, "loss": 1.0586, "step": 84975 }, { "epoch": 2.5056759545923635, "grad_norm": 3.517109168716256, "learning_rate": 6.033863868474593e-07, "loss": 0.9721, "step": 84980 }, { "epoch": 2.5058233819843725, "grad_norm": 3.639447613991352, "learning_rate": 6.030365021887272e-07, "loss": 1.0699, "step": 84985 }, { "epoch": 2.5059708093763824, "grad_norm": 3.587646197254114, "learning_rate": 6.02686710132988e-07, "loss": 1.0362, "step": 84990 }, { "epoch": 2.5061182367683914, "grad_norm": 3.4235387107922217, "learning_rate": 6.023370106905384e-07, "loss": 1.035, "step": 84995 }, { "epoch": 2.506265664160401, "grad_norm": 3.37944687452635, "learning_rate": 6.019874038716649e-07, "loss": 1.0113, "step": 85000 }, { "epoch": 2.506265664160401, "eval_loss": 1.079696536064148, "eval_runtime": 4.1726, "eval_samples_per_second": 94.906, "eval_steps_per_second": 3.116, "step": 85000 }, { "epoch": 2.5064130915524103, "grad_norm": 3.502031189850283, "learning_rate": 6.016378896866571e-07, "loss": 0.9984, "step": 85005 }, { "epoch": 2.50656051894442, "grad_norm": 3.4613835526405023, "learning_rate": 6.012884681457992e-07, "loss": 1.0755, "step": 85010 }, { "epoch": 2.5067079463364292, "grad_norm": 3.4317137334001337, "learning_rate": 6.009391392593737e-07, "loss": 1.0526, "step": 85015 }, { "epoch": 2.5068553737284387, "grad_norm": 3.434631042684691, "learning_rate": 6.0058990303766e-07, "loss": 1.0549, "step": 85020 }, { "epoch": 2.507002801120448, "grad_norm": 3.5460230394256325, "learning_rate": 6.002407594909347e-07, "loss": 1.0702, "step": 85025 }, { "epoch": 2.5071502285124576, "grad_norm": 3.599660634167801, "learning_rate": 5.998917086294723e-07, "loss": 1.0417, "step": 85030 }, { "epoch": 2.507297655904467, "grad_norm": 3.5660984099372004, "learning_rate": 5.995427504635427e-07, "loss": 1.0481, "step": 85035 }, { "epoch": 2.5074450832964765, "grad_norm": 3.487884248850394, "learning_rate": 5.99193885003415e-07, "loss": 1.0744, "step": 85040 }, { "epoch": 2.507592510688486, "grad_norm": 3.5959877408760264, "learning_rate": 5.988451122593554e-07, "loss": 1.0327, "step": 85045 }, { "epoch": 2.5077399380804954, "grad_norm": 3.5377334167353935, "learning_rate": 5.984964322416261e-07, "loss": 1.0665, "step": 85050 }, { "epoch": 2.507887365472505, "grad_norm": 3.5965852554063904, "learning_rate": 5.981478449604882e-07, "loss": 1.0401, "step": 85055 }, { "epoch": 2.5080347928645144, "grad_norm": 3.4232741994453484, "learning_rate": 5.977993504262001e-07, "loss": 1.0182, "step": 85060 }, { "epoch": 2.508182220256524, "grad_norm": 3.4745251923228473, "learning_rate": 5.974509486490136e-07, "loss": 1.0404, "step": 85065 }, { "epoch": 2.508329647648533, "grad_norm": 3.582303444791773, "learning_rate": 5.971026396391842e-07, "loss": 1.0164, "step": 85070 }, { "epoch": 2.5084770750405427, "grad_norm": 3.5023174575753533, "learning_rate": 5.967544234069587e-07, "loss": 1.0138, "step": 85075 }, { "epoch": 2.5086245024325518, "grad_norm": 3.461520984366261, "learning_rate": 5.96406299962586e-07, "loss": 1.0259, "step": 85080 }, { "epoch": 2.5087719298245617, "grad_norm": 3.303196629097974, "learning_rate": 5.960582693163088e-07, "loss": 0.9731, "step": 85085 }, { "epoch": 2.5089193572165707, "grad_norm": 3.615500763362914, "learning_rate": 5.957103314783681e-07, "loss": 1.0209, "step": 85090 }, { "epoch": 2.50906678460858, "grad_norm": 3.46925835795051, "learning_rate": 5.953624864590035e-07, "loss": 1.0594, "step": 85095 }, { "epoch": 2.5092142120005896, "grad_norm": 3.524511874810808, "learning_rate": 5.950147342684481e-07, "loss": 1.0626, "step": 85100 }, { "epoch": 2.509361639392599, "grad_norm": 3.701472189015405, "learning_rate": 5.946670749169388e-07, "loss": 1.0473, "step": 85105 }, { "epoch": 2.5095090667846085, "grad_norm": 3.332834740693614, "learning_rate": 5.943195084147031e-07, "loss": 0.9543, "step": 85110 }, { "epoch": 2.509656494176618, "grad_norm": 3.47461707114619, "learning_rate": 5.939720347719687e-07, "loss": 1.0356, "step": 85115 }, { "epoch": 2.5098039215686274, "grad_norm": 3.504430093070204, "learning_rate": 5.936246539989613e-07, "loss": 1.0346, "step": 85120 }, { "epoch": 2.509951348960637, "grad_norm": 3.6303916614977574, "learning_rate": 5.932773661059027e-07, "loss": 1.0345, "step": 85125 }, { "epoch": 2.5100987763526463, "grad_norm": 3.4969810487855213, "learning_rate": 5.929301711030125e-07, "loss": 1.0825, "step": 85130 }, { "epoch": 2.510246203744656, "grad_norm": 3.4843407101046493, "learning_rate": 5.925830690005075e-07, "loss": 1.0575, "step": 85135 }, { "epoch": 2.5103936311366652, "grad_norm": 3.4488823723917736, "learning_rate": 5.922360598085993e-07, "loss": 1.0341, "step": 85140 }, { "epoch": 2.5105410585286747, "grad_norm": 3.603223072948646, "learning_rate": 5.918891435375027e-07, "loss": 1.0389, "step": 85145 }, { "epoch": 2.510688485920684, "grad_norm": 3.480106144296697, "learning_rate": 5.915423201974225e-07, "loss": 1.0381, "step": 85150 }, { "epoch": 2.5108359133126936, "grad_norm": 3.3709424905726206, "learning_rate": 5.911955897985676e-07, "loss": 1.0077, "step": 85155 }, { "epoch": 2.510983340704703, "grad_norm": 3.4343439792572092, "learning_rate": 5.908489523511387e-07, "loss": 1.0038, "step": 85160 }, { "epoch": 2.511130768096712, "grad_norm": 3.5515287277835297, "learning_rate": 5.905024078653371e-07, "loss": 1.0107, "step": 85165 }, { "epoch": 2.511278195488722, "grad_norm": 3.627278120492954, "learning_rate": 5.901559563513606e-07, "loss": 1.0884, "step": 85170 }, { "epoch": 2.511425622880731, "grad_norm": 3.5434416504869426, "learning_rate": 5.898095978194013e-07, "loss": 1.0256, "step": 85175 }, { "epoch": 2.511573050272741, "grad_norm": 3.565949647989709, "learning_rate": 5.89463332279655e-07, "loss": 1.0114, "step": 85180 }, { "epoch": 2.51172047766475, "grad_norm": 3.341234518154736, "learning_rate": 5.891171597423082e-07, "loss": 1.0072, "step": 85185 }, { "epoch": 2.5118679050567594, "grad_norm": 3.7883213068053396, "learning_rate": 5.887710802175483e-07, "loss": 1.0477, "step": 85190 }, { "epoch": 2.512015332448769, "grad_norm": 3.613363434141926, "learning_rate": 5.884250937155596e-07, "loss": 1.0418, "step": 85195 }, { "epoch": 2.5121627598407783, "grad_norm": 3.8604492447134744, "learning_rate": 5.880792002465221e-07, "loss": 1.0844, "step": 85200 }, { "epoch": 2.5123101872327878, "grad_norm": 3.578112639023426, "learning_rate": 5.877333998206145e-07, "loss": 1.0027, "step": 85205 }, { "epoch": 2.512457614624797, "grad_norm": 3.605176215348932, "learning_rate": 5.873876924480129e-07, "loss": 1.0564, "step": 85210 }, { "epoch": 2.5126050420168067, "grad_norm": 3.4027108287243624, "learning_rate": 5.870420781388898e-07, "loss": 1.0657, "step": 85215 }, { "epoch": 2.512752469408816, "grad_norm": 3.2691366673551543, "learning_rate": 5.866965569034164e-07, "loss": 0.9727, "step": 85220 }, { "epoch": 2.5128998968008256, "grad_norm": 3.548246522844364, "learning_rate": 5.863511287517573e-07, "loss": 1.0497, "step": 85225 }, { "epoch": 2.513047324192835, "grad_norm": 3.505819808379055, "learning_rate": 5.860057936940795e-07, "loss": 1.0519, "step": 85230 }, { "epoch": 2.5131947515848445, "grad_norm": 3.478563392655416, "learning_rate": 5.856605517405437e-07, "loss": 1.041, "step": 85235 }, { "epoch": 2.513342178976854, "grad_norm": 3.557667062152615, "learning_rate": 5.853154029013094e-07, "loss": 1.0713, "step": 85240 }, { "epoch": 2.5134896063688634, "grad_norm": 3.5391448389185767, "learning_rate": 5.849703471865341e-07, "loss": 1.0474, "step": 85245 }, { "epoch": 2.513637033760873, "grad_norm": 3.4780354880459066, "learning_rate": 5.846253846063686e-07, "loss": 1.0692, "step": 85250 }, { "epoch": 2.5137844611528823, "grad_norm": 3.4731811807469852, "learning_rate": 5.842805151709674e-07, "loss": 1.0086, "step": 85255 }, { "epoch": 2.513931888544892, "grad_norm": 3.4599938229103593, "learning_rate": 5.839357388904762e-07, "loss": 1.0371, "step": 85260 }, { "epoch": 2.5140793159369013, "grad_norm": 3.584284551150531, "learning_rate": 5.835910557750411e-07, "loss": 1.0369, "step": 85265 }, { "epoch": 2.5142267433289103, "grad_norm": 3.5305817814848366, "learning_rate": 5.832464658348048e-07, "loss": 1.0475, "step": 85270 }, { "epoch": 2.51437417072092, "grad_norm": 3.602789224729736, "learning_rate": 5.829019690799072e-07, "loss": 1.0428, "step": 85275 }, { "epoch": 2.514521598112929, "grad_norm": 3.5048870885637036, "learning_rate": 5.825575655204858e-07, "loss": 1.0521, "step": 85280 }, { "epoch": 2.514669025504939, "grad_norm": 3.460963906670781, "learning_rate": 5.822132551666748e-07, "loss": 0.9985, "step": 85285 }, { "epoch": 2.514816452896948, "grad_norm": 3.466026036322175, "learning_rate": 5.818690380286068e-07, "loss": 1.0874, "step": 85290 }, { "epoch": 2.5149638802889576, "grad_norm": 3.5575496610972, "learning_rate": 5.815249141164091e-07, "loss": 1.0072, "step": 85295 }, { "epoch": 2.515111307680967, "grad_norm": 3.411733923327443, "learning_rate": 5.811808834402088e-07, "loss": 1.0746, "step": 85300 }, { "epoch": 2.5152587350729765, "grad_norm": 3.6107939487505663, "learning_rate": 5.808369460101292e-07, "loss": 1.0792, "step": 85305 }, { "epoch": 2.515406162464986, "grad_norm": 3.6779873556725966, "learning_rate": 5.804931018362916e-07, "loss": 1.0815, "step": 85310 }, { "epoch": 2.5155535898569954, "grad_norm": 3.3875594289186264, "learning_rate": 5.801493509288136e-07, "loss": 1.0263, "step": 85315 }, { "epoch": 2.515701017249005, "grad_norm": 3.4402406917992363, "learning_rate": 5.798056932978118e-07, "loss": 1.0606, "step": 85320 }, { "epoch": 2.5158484446410143, "grad_norm": 3.5909352050394903, "learning_rate": 5.794621289533949e-07, "loss": 1.0271, "step": 85325 }, { "epoch": 2.5159958720330238, "grad_norm": 3.3865113631644808, "learning_rate": 5.791186579056777e-07, "loss": 1.0391, "step": 85330 }, { "epoch": 2.5161432994250332, "grad_norm": 3.4504892106388096, "learning_rate": 5.787752801647628e-07, "loss": 1.012, "step": 85335 }, { "epoch": 2.5162907268170427, "grad_norm": 3.6353388800599067, "learning_rate": 5.784319957407579e-07, "loss": 1.0242, "step": 85340 }, { "epoch": 2.516438154209052, "grad_norm": 3.604080734365171, "learning_rate": 5.780888046437624e-07, "loss": 1.0848, "step": 85345 }, { "epoch": 2.5165855816010616, "grad_norm": 3.634768950046281, "learning_rate": 5.777457068838758e-07, "loss": 1.0851, "step": 85350 }, { "epoch": 2.516733008993071, "grad_norm": 3.4526697430381836, "learning_rate": 5.77402702471195e-07, "loss": 1.042, "step": 85355 }, { "epoch": 2.5168804363850805, "grad_norm": 3.3085121699806246, "learning_rate": 5.770597914158107e-07, "loss": 0.9881, "step": 85360 }, { "epoch": 2.5170278637770895, "grad_norm": 3.5339070477266534, "learning_rate": 5.76716973727817e-07, "loss": 1.057, "step": 85365 }, { "epoch": 2.5171752911690994, "grad_norm": 3.7639849093522098, "learning_rate": 5.763742494172984e-07, "loss": 1.0589, "step": 85370 }, { "epoch": 2.5173227185611085, "grad_norm": 3.306066131017582, "learning_rate": 5.760316184943418e-07, "loss": 1.0216, "step": 85375 }, { "epoch": 2.5174701459531184, "grad_norm": 3.5297385548851636, "learning_rate": 5.756890809690293e-07, "loss": 1.0415, "step": 85380 }, { "epoch": 2.5176175733451274, "grad_norm": 3.3910826397279585, "learning_rate": 5.753466368514402e-07, "loss": 1.0557, "step": 85385 }, { "epoch": 2.517765000737137, "grad_norm": 3.6560655257833434, "learning_rate": 5.750042861516511e-07, "loss": 1.0653, "step": 85390 }, { "epoch": 2.5179124281291463, "grad_norm": 3.6125916661450916, "learning_rate": 5.746620288797375e-07, "loss": 1.0448, "step": 85395 }, { "epoch": 2.5180598555211557, "grad_norm": 3.55809278900382, "learning_rate": 5.743198650457678e-07, "loss": 1.0499, "step": 85400 }, { "epoch": 2.518207282913165, "grad_norm": 3.328320068370907, "learning_rate": 5.739777946598139e-07, "loss": 1.0221, "step": 85405 }, { "epoch": 2.5183547103051747, "grad_norm": 3.5875292740103695, "learning_rate": 5.736358177319382e-07, "loss": 1.0254, "step": 85410 }, { "epoch": 2.518502137697184, "grad_norm": 3.477470236457183, "learning_rate": 5.732939342722075e-07, "loss": 1.0216, "step": 85415 }, { "epoch": 2.5186495650891936, "grad_norm": 3.301471856658838, "learning_rate": 5.729521442906795e-07, "loss": 1.0328, "step": 85420 }, { "epoch": 2.518796992481203, "grad_norm": 3.5555601436599384, "learning_rate": 5.726104477974123e-07, "loss": 1.0311, "step": 85425 }, { "epoch": 2.5189444198732125, "grad_norm": 3.4371149231003493, "learning_rate": 5.722688448024617e-07, "loss": 1.0298, "step": 85430 }, { "epoch": 2.519091847265222, "grad_norm": 3.516207539152612, "learning_rate": 5.719273353158771e-07, "loss": 1.0404, "step": 85435 }, { "epoch": 2.5192392746572314, "grad_norm": 3.6687697090834623, "learning_rate": 5.715859193477115e-07, "loss": 1.0357, "step": 85440 }, { "epoch": 2.519386702049241, "grad_norm": 3.6210676838605846, "learning_rate": 5.712445969080091e-07, "loss": 1.078, "step": 85445 }, { "epoch": 2.5195341294412503, "grad_norm": 3.2620331756764065, "learning_rate": 5.709033680068136e-07, "loss": 1.0342, "step": 85450 }, { "epoch": 2.51968155683326, "grad_norm": 3.4188497062741643, "learning_rate": 5.705622326541666e-07, "loss": 1.0303, "step": 85455 }, { "epoch": 2.519828984225269, "grad_norm": 3.5804748960597474, "learning_rate": 5.702211908601069e-07, "loss": 1.0039, "step": 85460 }, { "epoch": 2.5199764116172787, "grad_norm": 3.54792561118048, "learning_rate": 5.698802426346694e-07, "loss": 1.0554, "step": 85465 }, { "epoch": 2.5201238390092877, "grad_norm": 3.395669376327295, "learning_rate": 5.695393879878873e-07, "loss": 1.0482, "step": 85470 }, { "epoch": 2.5202712664012976, "grad_norm": 3.502508612152613, "learning_rate": 5.691986269297898e-07, "loss": 1.1076, "step": 85475 }, { "epoch": 2.5204186937933066, "grad_norm": 3.447725044161935, "learning_rate": 5.688579594704061e-07, "loss": 1.0278, "step": 85480 }, { "epoch": 2.520566121185316, "grad_norm": 3.624924235455645, "learning_rate": 5.685173856197587e-07, "loss": 1.0901, "step": 85485 }, { "epoch": 2.5207135485773255, "grad_norm": 3.486651400770858, "learning_rate": 5.681769053878698e-07, "loss": 1.028, "step": 85490 }, { "epoch": 2.520860975969335, "grad_norm": 3.3948386887597333, "learning_rate": 5.67836518784759e-07, "loss": 1.044, "step": 85495 }, { "epoch": 2.5210084033613445, "grad_norm": 3.396566368760908, "learning_rate": 5.674962258204419e-07, "loss": 1.0478, "step": 85500 }, { "epoch": 2.5210084033613445, "eval_loss": 1.079576849937439, "eval_runtime": 4.2382, "eval_samples_per_second": 93.436, "eval_steps_per_second": 3.067, "step": 85500 }, { "epoch": 2.521155830753354, "grad_norm": 3.6594868135775305, "learning_rate": 5.671560265049336e-07, "loss": 1.0897, "step": 85505 }, { "epoch": 2.5213032581453634, "grad_norm": 3.4947967918557943, "learning_rate": 5.668159208482418e-07, "loss": 1.0291, "step": 85510 }, { "epoch": 2.521450685537373, "grad_norm": 3.516050356650656, "learning_rate": 5.664759088603781e-07, "loss": 1.0514, "step": 85515 }, { "epoch": 2.5215981129293823, "grad_norm": 3.502467147328952, "learning_rate": 5.661359905513449e-07, "loss": 1.0057, "step": 85520 }, { "epoch": 2.5217455403213918, "grad_norm": 3.4488127998513245, "learning_rate": 5.65796165931146e-07, "loss": 1.0318, "step": 85525 }, { "epoch": 2.521892967713401, "grad_norm": 3.4940941814266817, "learning_rate": 5.654564350097808e-07, "loss": 1.0378, "step": 85530 }, { "epoch": 2.5220403951054107, "grad_norm": 3.5142235520892293, "learning_rate": 5.651167977972465e-07, "loss": 1.0618, "step": 85535 }, { "epoch": 2.52218782249742, "grad_norm": 3.280073050548367, "learning_rate": 5.647772543035372e-07, "loss": 0.9772, "step": 85540 }, { "epoch": 2.5223352498894296, "grad_norm": 3.3726915331526293, "learning_rate": 5.644378045386442e-07, "loss": 1.0451, "step": 85545 }, { "epoch": 2.522482677281439, "grad_norm": 3.4457761389358486, "learning_rate": 5.640984485125571e-07, "loss": 1.0736, "step": 85550 }, { "epoch": 2.522630104673448, "grad_norm": 3.5638910091576936, "learning_rate": 5.637591862352603e-07, "loss": 1.0411, "step": 85555 }, { "epoch": 2.522777532065458, "grad_norm": 3.652686474245468, "learning_rate": 5.634200177167377e-07, "loss": 1.0335, "step": 85560 }, { "epoch": 2.522924959457467, "grad_norm": 3.7610499516278737, "learning_rate": 5.630809429669699e-07, "loss": 1.0949, "step": 85565 }, { "epoch": 2.523072386849477, "grad_norm": 3.4344516368950386, "learning_rate": 5.627419619959343e-07, "loss": 1.004, "step": 85570 }, { "epoch": 2.523219814241486, "grad_norm": 3.6820471173028255, "learning_rate": 5.624030748136061e-07, "loss": 1.0486, "step": 85575 }, { "epoch": 2.5233672416334954, "grad_norm": 3.7346722559475443, "learning_rate": 5.620642814299583e-07, "loss": 1.0073, "step": 85580 }, { "epoch": 2.523514669025505, "grad_norm": 3.571478493146123, "learning_rate": 5.617255818549572e-07, "loss": 1.0374, "step": 85585 }, { "epoch": 2.5236620964175143, "grad_norm": 3.451448665779482, "learning_rate": 5.61386976098573e-07, "loss": 1.0438, "step": 85590 }, { "epoch": 2.5238095238095237, "grad_norm": 3.688143448783985, "learning_rate": 5.610484641707668e-07, "loss": 1.0333, "step": 85595 }, { "epoch": 2.523956951201533, "grad_norm": 3.3672134544502246, "learning_rate": 5.607100460815025e-07, "loss": 1.0218, "step": 85600 }, { "epoch": 2.5241043785935426, "grad_norm": 3.600516446489121, "learning_rate": 5.603717218407359e-07, "loss": 1.0568, "step": 85605 }, { "epoch": 2.524251805985552, "grad_norm": 3.691906713379976, "learning_rate": 5.600334914584236e-07, "loss": 1.0462, "step": 85610 }, { "epoch": 2.5243992333775616, "grad_norm": 3.3995917192362795, "learning_rate": 5.596953549445194e-07, "loss": 1.0717, "step": 85615 }, { "epoch": 2.524546660769571, "grad_norm": 3.664317554959761, "learning_rate": 5.593573123089707e-07, "loss": 1.0201, "step": 85620 }, { "epoch": 2.5246940881615805, "grad_norm": 3.5201851376572724, "learning_rate": 5.590193635617279e-07, "loss": 1.0478, "step": 85625 }, { "epoch": 2.52484151555359, "grad_norm": 3.4652488243088317, "learning_rate": 5.58681508712733e-07, "loss": 1.028, "step": 85630 }, { "epoch": 2.5249889429455994, "grad_norm": 3.616026739422011, "learning_rate": 5.583437477719293e-07, "loss": 1.05, "step": 85635 }, { "epoch": 2.525136370337609, "grad_norm": 3.4657479216246476, "learning_rate": 5.580060807492547e-07, "loss": 1.0304, "step": 85640 }, { "epoch": 2.5252837977296183, "grad_norm": 3.545770839287988, "learning_rate": 5.576685076546468e-07, "loss": 1.0786, "step": 85645 }, { "epoch": 2.5254312251216273, "grad_norm": 3.3113793176456894, "learning_rate": 5.573310284980377e-07, "loss": 1.0457, "step": 85650 }, { "epoch": 2.5255786525136372, "grad_norm": 3.539244645871333, "learning_rate": 5.5699364328936e-07, "loss": 1.0484, "step": 85655 }, { "epoch": 2.5257260799056462, "grad_norm": 3.3710211196769784, "learning_rate": 5.566563520385382e-07, "loss": 1.032, "step": 85660 }, { "epoch": 2.525873507297656, "grad_norm": 3.353967394330959, "learning_rate": 5.563191547555016e-07, "loss": 1.002, "step": 85665 }, { "epoch": 2.526020934689665, "grad_norm": 3.348599979844187, "learning_rate": 5.55982051450169e-07, "loss": 1.065, "step": 85670 }, { "epoch": 2.5261683620816746, "grad_norm": 3.491064959714522, "learning_rate": 5.556450421324637e-07, "loss": 1.0621, "step": 85675 }, { "epoch": 2.526315789473684, "grad_norm": 3.4432841903812568, "learning_rate": 5.553081268122993e-07, "loss": 1.0848, "step": 85680 }, { "epoch": 2.5264632168656935, "grad_norm": 3.7321168349494087, "learning_rate": 5.549713054995914e-07, "loss": 1.0506, "step": 85685 }, { "epoch": 2.526610644257703, "grad_norm": 3.456269778216062, "learning_rate": 5.54634578204252e-07, "loss": 1.0707, "step": 85690 }, { "epoch": 2.5267580716497124, "grad_norm": 3.4036984354888493, "learning_rate": 5.542979449361872e-07, "loss": 1.0493, "step": 85695 }, { "epoch": 2.526905499041722, "grad_norm": 3.50211164477698, "learning_rate": 5.539614057053063e-07, "loss": 1.0597, "step": 85700 }, { "epoch": 2.5270529264337314, "grad_norm": 3.5167636875454473, "learning_rate": 5.536249605215094e-07, "loss": 1.0544, "step": 85705 }, { "epoch": 2.527200353825741, "grad_norm": 3.2568557626810124, "learning_rate": 5.532886093946979e-07, "loss": 1.0241, "step": 85710 }, { "epoch": 2.5273477812177503, "grad_norm": 3.3280279624558156, "learning_rate": 5.529523523347694e-07, "loss": 1.031, "step": 85715 }, { "epoch": 2.5274952086097597, "grad_norm": 3.5469362273174863, "learning_rate": 5.52616189351619e-07, "loss": 1.0291, "step": 85720 }, { "epoch": 2.527642636001769, "grad_norm": 3.593194048175315, "learning_rate": 5.52280120455138e-07, "loss": 1.048, "step": 85725 }, { "epoch": 2.5277900633937787, "grad_norm": 3.468819857038107, "learning_rate": 5.519441456552158e-07, "loss": 1.0325, "step": 85730 }, { "epoch": 2.527937490785788, "grad_norm": 3.442506934168874, "learning_rate": 5.516082649617386e-07, "loss": 1.0462, "step": 85735 }, { "epoch": 2.5280849181777976, "grad_norm": 3.576718873461176, "learning_rate": 5.512724783845917e-07, "loss": 1.0431, "step": 85740 }, { "epoch": 2.528232345569807, "grad_norm": 3.470577828970243, "learning_rate": 5.509367859336536e-07, "loss": 1.0307, "step": 85745 }, { "epoch": 2.5283797729618165, "grad_norm": 3.440710443464632, "learning_rate": 5.506011876188037e-07, "loss": 1.035, "step": 85750 }, { "epoch": 2.5285272003538255, "grad_norm": 3.4891130911161268, "learning_rate": 5.502656834499169e-07, "loss": 1.0491, "step": 85755 }, { "epoch": 2.5286746277458354, "grad_norm": 3.5400039948090947, "learning_rate": 5.499302734368665e-07, "loss": 1.0102, "step": 85760 }, { "epoch": 2.5288220551378444, "grad_norm": 3.49273643251743, "learning_rate": 5.495949575895226e-07, "loss": 1.0435, "step": 85765 }, { "epoch": 2.5289694825298543, "grad_norm": 3.3850612187276106, "learning_rate": 5.492597359177498e-07, "loss": 1.0334, "step": 85770 }, { "epoch": 2.5291169099218633, "grad_norm": 3.4705220848804266, "learning_rate": 5.489246084314156e-07, "loss": 1.0598, "step": 85775 }, { "epoch": 2.529264337313873, "grad_norm": 3.523325907957534, "learning_rate": 5.485895751403792e-07, "loss": 1.0293, "step": 85780 }, { "epoch": 2.5294117647058822, "grad_norm": 3.5160398941149444, "learning_rate": 5.482546360545007e-07, "loss": 1.0302, "step": 85785 }, { "epoch": 2.5295591920978917, "grad_norm": 3.2388114548688605, "learning_rate": 5.47919791183635e-07, "loss": 1.0004, "step": 85790 }, { "epoch": 2.529706619489901, "grad_norm": 3.485679414801115, "learning_rate": 5.475850405376361e-07, "loss": 1.0216, "step": 85795 }, { "epoch": 2.5298540468819106, "grad_norm": 3.536747401598972, "learning_rate": 5.472503841263542e-07, "loss": 1.0493, "step": 85800 }, { "epoch": 2.53000147427392, "grad_norm": 3.523043937881475, "learning_rate": 5.469158219596369e-07, "loss": 1.0162, "step": 85805 }, { "epoch": 2.5301489016659295, "grad_norm": 3.575247783167066, "learning_rate": 5.465813540473298e-07, "loss": 1.0514, "step": 85810 }, { "epoch": 2.530296329057939, "grad_norm": 3.519950137708436, "learning_rate": 5.462469803992732e-07, "loss": 1.0354, "step": 85815 }, { "epoch": 2.5304437564499485, "grad_norm": 3.63010376150707, "learning_rate": 5.459127010253077e-07, "loss": 1.0252, "step": 85820 }, { "epoch": 2.530591183841958, "grad_norm": 3.738370897515252, "learning_rate": 5.4557851593527e-07, "loss": 1.0546, "step": 85825 }, { "epoch": 2.5307386112339674, "grad_norm": 3.528325495073986, "learning_rate": 5.452444251389933e-07, "loss": 1.0424, "step": 85830 }, { "epoch": 2.530886038625977, "grad_norm": 3.5200472319157647, "learning_rate": 5.449104286463087e-07, "loss": 1.0343, "step": 85835 }, { "epoch": 2.5310334660179863, "grad_norm": 3.382691076846805, "learning_rate": 5.445765264670453e-07, "loss": 0.9757, "step": 85840 }, { "epoch": 2.5311808934099957, "grad_norm": 3.4708576734766003, "learning_rate": 5.442427186110266e-07, "loss": 1.0594, "step": 85845 }, { "epoch": 2.5313283208020048, "grad_norm": 3.541185556456471, "learning_rate": 5.439090050880779e-07, "loss": 1.0229, "step": 85850 }, { "epoch": 2.5314757481940147, "grad_norm": 3.42284047305536, "learning_rate": 5.435753859080162e-07, "loss": 1.0437, "step": 85855 }, { "epoch": 2.5316231755860237, "grad_norm": 3.42198573179269, "learning_rate": 5.432418610806612e-07, "loss": 1.0434, "step": 85860 }, { "epoch": 2.5317706029780336, "grad_norm": 3.64199355175192, "learning_rate": 5.429084306158258e-07, "loss": 1.066, "step": 85865 }, { "epoch": 2.5319180303700426, "grad_norm": 3.3857135821061934, "learning_rate": 5.425750945233219e-07, "loss": 0.9999, "step": 85870 }, { "epoch": 2.532065457762052, "grad_norm": 3.31869945904962, "learning_rate": 5.422418528129594e-07, "loss": 1.0135, "step": 85875 }, { "epoch": 2.5322128851540615, "grad_norm": 3.6150256283948075, "learning_rate": 5.419087054945415e-07, "loss": 1.0884, "step": 85880 }, { "epoch": 2.532360312546071, "grad_norm": 3.434796460921432, "learning_rate": 5.415756525778747e-07, "loss": 1.0413, "step": 85885 }, { "epoch": 2.5325077399380804, "grad_norm": 3.4458734947489478, "learning_rate": 5.412426940727571e-07, "loss": 1.0609, "step": 85890 }, { "epoch": 2.53265516733009, "grad_norm": 3.36251604033281, "learning_rate": 5.409098299889872e-07, "loss": 1.0379, "step": 85895 }, { "epoch": 2.5328025947220993, "grad_norm": 3.4511524635378654, "learning_rate": 5.405770603363599e-07, "loss": 1.0571, "step": 85900 }, { "epoch": 2.532950022114109, "grad_norm": 3.373879289823649, "learning_rate": 5.402443851246679e-07, "loss": 1.0307, "step": 85905 }, { "epoch": 2.5330974495061183, "grad_norm": 3.5717297966619634, "learning_rate": 5.399118043636997e-07, "loss": 1.052, "step": 85910 }, { "epoch": 2.5332448768981277, "grad_norm": 3.478110055598561, "learning_rate": 5.39579318063243e-07, "loss": 1.072, "step": 85915 }, { "epoch": 2.533392304290137, "grad_norm": 3.412255898895146, "learning_rate": 5.392469262330787e-07, "loss": 1.0518, "step": 85920 }, { "epoch": 2.5335397316821466, "grad_norm": 3.4767170753507637, "learning_rate": 5.38914628882992e-07, "loss": 1.0463, "step": 85925 }, { "epoch": 2.533687159074156, "grad_norm": 3.5302133854957094, "learning_rate": 5.385824260227575e-07, "loss": 1.0679, "step": 85930 }, { "epoch": 2.5338345864661656, "grad_norm": 3.6403989792421676, "learning_rate": 5.382503176621535e-07, "loss": 1.0884, "step": 85935 }, { "epoch": 2.533982013858175, "grad_norm": 3.4511168080670225, "learning_rate": 5.379183038109507e-07, "loss": 1.0256, "step": 85940 }, { "epoch": 2.534129441250184, "grad_norm": 3.5398316882767986, "learning_rate": 5.375863844789196e-07, "loss": 1.0633, "step": 85945 }, { "epoch": 2.534276868642194, "grad_norm": 3.39156071575571, "learning_rate": 5.372545596758282e-07, "loss": 1.047, "step": 85950 }, { "epoch": 2.534424296034203, "grad_norm": 3.585438039929473, "learning_rate": 5.369228294114381e-07, "loss": 1.0287, "step": 85955 }, { "epoch": 2.534571723426213, "grad_norm": 3.6319575980525856, "learning_rate": 5.365911936955146e-07, "loss": 1.0144, "step": 85960 }, { "epoch": 2.534719150818222, "grad_norm": 3.604877501963449, "learning_rate": 5.362596525378137e-07, "loss": 1.0706, "step": 85965 }, { "epoch": 2.5348665782102313, "grad_norm": 3.4284633091262533, "learning_rate": 5.35928205948092e-07, "loss": 1.0565, "step": 85970 }, { "epoch": 2.5350140056022408, "grad_norm": 3.675372075478482, "learning_rate": 5.35596853936103e-07, "loss": 1.0431, "step": 85975 }, { "epoch": 2.5351614329942502, "grad_norm": 3.596207812513834, "learning_rate": 5.35265596511597e-07, "loss": 1.0619, "step": 85980 }, { "epoch": 2.5353088603862597, "grad_norm": 3.4148786998614415, "learning_rate": 5.349344336843218e-07, "loss": 1.0448, "step": 85985 }, { "epoch": 2.535456287778269, "grad_norm": 3.57413251789187, "learning_rate": 5.346033654640222e-07, "loss": 1.0921, "step": 85990 }, { "epoch": 2.5356037151702786, "grad_norm": 3.3410762202801343, "learning_rate": 5.342723918604399e-07, "loss": 1.021, "step": 85995 }, { "epoch": 2.535751142562288, "grad_norm": 3.5440144181759643, "learning_rate": 5.339415128833155e-07, "loss": 1.0241, "step": 86000 }, { "epoch": 2.535751142562288, "eval_loss": 1.0794392824172974, "eval_runtime": 4.1797, "eval_samples_per_second": 94.744, "eval_steps_per_second": 3.11, "step": 86000 }, { "epoch": 2.5358985699542975, "grad_norm": 3.528221524926141, "learning_rate": 5.336107285423835e-07, "loss": 1.0593, "step": 86005 }, { "epoch": 2.536045997346307, "grad_norm": 3.7366874883349275, "learning_rate": 5.332800388473787e-07, "loss": 1.0176, "step": 86010 }, { "epoch": 2.5361934247383164, "grad_norm": 3.5027136095415914, "learning_rate": 5.329494438080313e-07, "loss": 1.0428, "step": 86015 }, { "epoch": 2.536340852130326, "grad_norm": 3.4633774976027487, "learning_rate": 5.326189434340708e-07, "loss": 1.0602, "step": 86020 }, { "epoch": 2.5364882795223354, "grad_norm": 3.464583353679619, "learning_rate": 5.322885377352223e-07, "loss": 1.0948, "step": 86025 }, { "epoch": 2.536635706914345, "grad_norm": 3.512387058340479, "learning_rate": 5.319582267212057e-07, "loss": 1.0421, "step": 86030 }, { "epoch": 2.5367831343063543, "grad_norm": 3.6463729311461357, "learning_rate": 5.316280104017452e-07, "loss": 1.0591, "step": 86035 }, { "epoch": 2.5369305616983633, "grad_norm": 3.616959947098775, "learning_rate": 5.312978887865544e-07, "loss": 1.0456, "step": 86040 }, { "epoch": 2.537077989090373, "grad_norm": 3.432142674325094, "learning_rate": 5.309678618853487e-07, "loss": 1.011, "step": 86045 }, { "epoch": 2.537225416482382, "grad_norm": 3.5091764633496556, "learning_rate": 5.306379297078389e-07, "loss": 1.0214, "step": 86050 }, { "epoch": 2.537372843874392, "grad_norm": 3.4788799220934443, "learning_rate": 5.303080922637348e-07, "loss": 1.0776, "step": 86055 }, { "epoch": 2.537520271266401, "grad_norm": 3.690037953295291, "learning_rate": 5.299783495627411e-07, "loss": 1.0627, "step": 86060 }, { "epoch": 2.5376676986584106, "grad_norm": 3.5088651952493253, "learning_rate": 5.296487016145618e-07, "loss": 1.08, "step": 86065 }, { "epoch": 2.53781512605042, "grad_norm": 3.368862712271885, "learning_rate": 5.29319148428897e-07, "loss": 1.0427, "step": 86070 }, { "epoch": 2.5379625534424295, "grad_norm": 3.4232672588253403, "learning_rate": 5.289896900154435e-07, "loss": 0.9937, "step": 86075 }, { "epoch": 2.538109980834439, "grad_norm": 3.5631494636120147, "learning_rate": 5.286603263838962e-07, "loss": 1.0679, "step": 86080 }, { "epoch": 2.5382574082264484, "grad_norm": 3.629144611100882, "learning_rate": 5.283310575439471e-07, "loss": 1.0152, "step": 86085 }, { "epoch": 2.538404835618458, "grad_norm": 3.650021292598068, "learning_rate": 5.280018835052857e-07, "loss": 1.09, "step": 86090 }, { "epoch": 2.5385522630104673, "grad_norm": 3.409543844554992, "learning_rate": 5.276728042775979e-07, "loss": 1.035, "step": 86095 }, { "epoch": 2.538699690402477, "grad_norm": 3.4997362597993815, "learning_rate": 5.273438198705681e-07, "loss": 1.026, "step": 86100 }, { "epoch": 2.5388471177944862, "grad_norm": 3.4229901269146628, "learning_rate": 5.270149302938744e-07, "loss": 1.0525, "step": 86105 }, { "epoch": 2.5389945451864957, "grad_norm": 3.5817187556845624, "learning_rate": 5.266861355571987e-07, "loss": 1.1139, "step": 86110 }, { "epoch": 2.539141972578505, "grad_norm": 3.273249065611448, "learning_rate": 5.263574356702122e-07, "loss": 1.0243, "step": 86115 }, { "epoch": 2.5392893999705146, "grad_norm": 3.6272040004176196, "learning_rate": 5.260288306425907e-07, "loss": 1.0425, "step": 86120 }, { "epoch": 2.539436827362524, "grad_norm": 3.5130427292361355, "learning_rate": 5.257003204840019e-07, "loss": 1.0671, "step": 86125 }, { "epoch": 2.5395842547545335, "grad_norm": 3.388906199132548, "learning_rate": 5.253719052041125e-07, "loss": 1.0258, "step": 86130 }, { "epoch": 2.539731682146543, "grad_norm": 3.2992361896538855, "learning_rate": 5.250435848125879e-07, "loss": 1.0164, "step": 86135 }, { "epoch": 2.5398791095385524, "grad_norm": 3.586350276960409, "learning_rate": 5.247153593190868e-07, "loss": 1.0506, "step": 86140 }, { "epoch": 2.5400265369305615, "grad_norm": 3.645625775891437, "learning_rate": 5.243872287332708e-07, "loss": 1.06, "step": 86145 }, { "epoch": 2.5401739643225714, "grad_norm": 3.6910543640624875, "learning_rate": 5.240591930647928e-07, "loss": 1.0725, "step": 86150 }, { "epoch": 2.5403213917145804, "grad_norm": 3.4304834148484344, "learning_rate": 5.237312523233066e-07, "loss": 1.0602, "step": 86155 }, { "epoch": 2.5404688191065903, "grad_norm": 3.5547460140094183, "learning_rate": 5.234034065184627e-07, "loss": 1.0566, "step": 86160 }, { "epoch": 2.5406162464985993, "grad_norm": 3.687588823532875, "learning_rate": 5.230756556599076e-07, "loss": 1.0673, "step": 86165 }, { "epoch": 2.5407636738906088, "grad_norm": 3.629272391012173, "learning_rate": 5.227479997572864e-07, "loss": 1.0393, "step": 86170 }, { "epoch": 2.540911101282618, "grad_norm": 3.408383305746463, "learning_rate": 5.22420438820241e-07, "loss": 1.0492, "step": 86175 }, { "epoch": 2.5410585286746277, "grad_norm": 3.605676495912027, "learning_rate": 5.220929728584082e-07, "loss": 1.0858, "step": 86180 }, { "epoch": 2.541205956066637, "grad_norm": 3.5047583088874785, "learning_rate": 5.217656018814272e-07, "loss": 1.0203, "step": 86185 }, { "epoch": 2.5413533834586466, "grad_norm": 3.718988537250564, "learning_rate": 5.21438325898928e-07, "loss": 1.0286, "step": 86190 }, { "epoch": 2.541500810850656, "grad_norm": 3.3483415243961856, "learning_rate": 5.211111449205442e-07, "loss": 1.052, "step": 86195 }, { "epoch": 2.5416482382426655, "grad_norm": 3.5838951068446097, "learning_rate": 5.207840589559011e-07, "loss": 1.0604, "step": 86200 }, { "epoch": 2.541795665634675, "grad_norm": 3.5853752657943185, "learning_rate": 5.204570680146246e-07, "loss": 1.0803, "step": 86205 }, { "epoch": 2.5419430930266844, "grad_norm": 3.3633994551625324, "learning_rate": 5.201301721063375e-07, "loss": 1.0347, "step": 86210 }, { "epoch": 2.542090520418694, "grad_norm": 3.6305239529382654, "learning_rate": 5.198033712406564e-07, "loss": 1.0568, "step": 86215 }, { "epoch": 2.5422379478107033, "grad_norm": 3.421411980317089, "learning_rate": 5.194766654272016e-07, "loss": 1.0424, "step": 86220 }, { "epoch": 2.542385375202713, "grad_norm": 3.60895461603479, "learning_rate": 5.191500546755837e-07, "loss": 1.0723, "step": 86225 }, { "epoch": 2.5425328025947223, "grad_norm": 3.5147073332351013, "learning_rate": 5.188235389954143e-07, "loss": 1.0435, "step": 86230 }, { "epoch": 2.5426802299867317, "grad_norm": 3.5837518824064425, "learning_rate": 5.184971183963026e-07, "loss": 1.0719, "step": 86235 }, { "epoch": 2.5428276573787407, "grad_norm": 3.5000451936245085, "learning_rate": 5.18170792887853e-07, "loss": 1.0275, "step": 86240 }, { "epoch": 2.5429750847707506, "grad_norm": 3.48076030460302, "learning_rate": 5.178445624796679e-07, "loss": 1.0765, "step": 86245 }, { "epoch": 2.5431225121627596, "grad_norm": 3.4272879780023127, "learning_rate": 5.175184271813472e-07, "loss": 1.0348, "step": 86250 }, { "epoch": 2.5432699395547695, "grad_norm": 3.378653630317116, "learning_rate": 5.171923870024882e-07, "loss": 1.0269, "step": 86255 }, { "epoch": 2.5434173669467786, "grad_norm": 3.496820075748384, "learning_rate": 5.168664419526852e-07, "loss": 1.0361, "step": 86260 }, { "epoch": 2.543564794338788, "grad_norm": 3.491009707542587, "learning_rate": 5.165405920415278e-07, "loss": 1.0611, "step": 86265 }, { "epoch": 2.5437122217307975, "grad_norm": 3.6752017218777193, "learning_rate": 5.162148372786069e-07, "loss": 1.0451, "step": 86270 }, { "epoch": 2.543859649122807, "grad_norm": 3.5453436349853806, "learning_rate": 5.158891776735063e-07, "loss": 1.0339, "step": 86275 }, { "epoch": 2.5440070765148164, "grad_norm": 3.60137716131166, "learning_rate": 5.155636132358101e-07, "loss": 1.0339, "step": 86280 }, { "epoch": 2.544154503906826, "grad_norm": 3.2980664790634053, "learning_rate": 5.15238143975098e-07, "loss": 1.033, "step": 86285 }, { "epoch": 2.5443019312988353, "grad_norm": 3.4350890817855566, "learning_rate": 5.149127699009459e-07, "loss": 1.042, "step": 86290 }, { "epoch": 2.5444493586908448, "grad_norm": 3.620477169450353, "learning_rate": 5.145874910229315e-07, "loss": 1.0491, "step": 86295 }, { "epoch": 2.5445967860828542, "grad_norm": 3.5630206161906686, "learning_rate": 5.142623073506236e-07, "loss": 1.0215, "step": 86300 }, { "epoch": 2.5447442134748637, "grad_norm": 3.4058240006647624, "learning_rate": 5.139372188935919e-07, "loss": 1.0478, "step": 86305 }, { "epoch": 2.544891640866873, "grad_norm": 3.4206043559689046, "learning_rate": 5.136122256614032e-07, "loss": 1.028, "step": 86310 }, { "epoch": 2.5450390682588826, "grad_norm": 3.5854197352657615, "learning_rate": 5.132873276636201e-07, "loss": 1.0887, "step": 86315 }, { "epoch": 2.545186495650892, "grad_norm": 3.4920107552705093, "learning_rate": 5.129625249098033e-07, "loss": 1.0405, "step": 86320 }, { "epoch": 2.5453339230429015, "grad_norm": 3.5430984561666703, "learning_rate": 5.126378174095106e-07, "loss": 1.0704, "step": 86325 }, { "epoch": 2.545481350434911, "grad_norm": 3.730546912770886, "learning_rate": 5.123132051722966e-07, "loss": 1.0702, "step": 86330 }, { "epoch": 2.54562877782692, "grad_norm": 3.57734850362421, "learning_rate": 5.119886882077145e-07, "loss": 1.088, "step": 86335 }, { "epoch": 2.54577620521893, "grad_norm": 3.508429780123325, "learning_rate": 5.11664266525312e-07, "loss": 0.9968, "step": 86340 }, { "epoch": 2.545923632610939, "grad_norm": 3.5981304398602787, "learning_rate": 5.113399401346361e-07, "loss": 1.0051, "step": 86345 }, { "epoch": 2.546071060002949, "grad_norm": 3.436451410548143, "learning_rate": 5.110157090452304e-07, "loss": 1.0199, "step": 86350 }, { "epoch": 2.546218487394958, "grad_norm": 3.5971441396370545, "learning_rate": 5.106915732666364e-07, "loss": 1.1108, "step": 86355 }, { "epoch": 2.5463659147869673, "grad_norm": 3.5261220850823216, "learning_rate": 5.103675328083924e-07, "loss": 1.0349, "step": 86360 }, { "epoch": 2.5465133421789767, "grad_norm": 3.4807572561379083, "learning_rate": 5.100435876800316e-07, "loss": 1.014, "step": 86365 }, { "epoch": 2.546660769570986, "grad_norm": 3.533657928019437, "learning_rate": 5.097197378910892e-07, "loss": 1.036, "step": 86370 }, { "epoch": 2.5468081969629957, "grad_norm": 3.471076034951721, "learning_rate": 5.093959834510922e-07, "loss": 0.9826, "step": 86375 }, { "epoch": 2.546955624355005, "grad_norm": 3.5289526775154476, "learning_rate": 5.090723243695701e-07, "loss": 1.0316, "step": 86380 }, { "epoch": 2.5471030517470146, "grad_norm": 3.3857914860681, "learning_rate": 5.087487606560449e-07, "loss": 1.0205, "step": 86385 }, { "epoch": 2.547250479139024, "grad_norm": 3.6884010467365953, "learning_rate": 5.084252923200383e-07, "loss": 1.0355, "step": 86390 }, { "epoch": 2.5473979065310335, "grad_norm": 3.45506061955045, "learning_rate": 5.081019193710693e-07, "loss": 1.0309, "step": 86395 }, { "epoch": 2.547545333923043, "grad_norm": 3.5894659520836907, "learning_rate": 5.077786418186534e-07, "loss": 1.0554, "step": 86400 }, { "epoch": 2.5476927613150524, "grad_norm": 3.5059004435909955, "learning_rate": 5.074554596723034e-07, "loss": 0.9949, "step": 86405 }, { "epoch": 2.547840188707062, "grad_norm": 3.3651285405679987, "learning_rate": 5.071323729415285e-07, "loss": 1.0116, "step": 86410 }, { "epoch": 2.5479876160990713, "grad_norm": 3.6154669221319296, "learning_rate": 5.068093816358368e-07, "loss": 0.9921, "step": 86415 }, { "epoch": 2.5481350434910808, "grad_norm": 3.5774058690176878, "learning_rate": 5.064864857647321e-07, "loss": 0.9894, "step": 86420 }, { "epoch": 2.5482824708830902, "grad_norm": 3.5339354006202313, "learning_rate": 5.061636853377162e-07, "loss": 1.0492, "step": 86425 }, { "epoch": 2.5484298982750992, "grad_norm": 3.5991882696232733, "learning_rate": 5.058409803642883e-07, "loss": 1.0691, "step": 86430 }, { "epoch": 2.548577325667109, "grad_norm": 3.5565058766831394, "learning_rate": 5.055183708539444e-07, "loss": 1.0425, "step": 86435 }, { "epoch": 2.548724753059118, "grad_norm": 3.3574475116920275, "learning_rate": 5.051958568161759e-07, "loss": 1.0708, "step": 86440 }, { "epoch": 2.548872180451128, "grad_norm": 3.6906565649786325, "learning_rate": 5.048734382604758e-07, "loss": 1.0572, "step": 86445 }, { "epoch": 2.549019607843137, "grad_norm": 3.3584119922730093, "learning_rate": 5.045511151963288e-07, "loss": 1.0736, "step": 86450 }, { "epoch": 2.5491670352351465, "grad_norm": 3.3727641132323214, "learning_rate": 5.042288876332229e-07, "loss": 1.0028, "step": 86455 }, { "epoch": 2.549314462627156, "grad_norm": 3.365574718283854, "learning_rate": 5.039067555806375e-07, "loss": 1.0082, "step": 86460 }, { "epoch": 2.5494618900191655, "grad_norm": 3.626319232771514, "learning_rate": 5.035847190480526e-07, "loss": 1.0491, "step": 86465 }, { "epoch": 2.549609317411175, "grad_norm": 3.274852940089258, "learning_rate": 5.032627780449446e-07, "loss": 1.0332, "step": 86470 }, { "epoch": 2.5497567448031844, "grad_norm": 3.5144029396793406, "learning_rate": 5.029409325807854e-07, "loss": 1.0178, "step": 86475 }, { "epoch": 2.549904172195194, "grad_norm": 3.4461510263913895, "learning_rate": 5.026191826650487e-07, "loss": 1.0255, "step": 86480 }, { "epoch": 2.5500515995872033, "grad_norm": 3.5723985322295446, "learning_rate": 5.022975283072e-07, "loss": 1.0801, "step": 86485 }, { "epoch": 2.5501990269792127, "grad_norm": 3.4560563976126852, "learning_rate": 5.019759695167052e-07, "loss": 1.0361, "step": 86490 }, { "epoch": 2.550346454371222, "grad_norm": 3.5597253615001843, "learning_rate": 5.016545063030256e-07, "loss": 1.0117, "step": 86495 }, { "epoch": 2.5504938817632317, "grad_norm": 3.4329259058990016, "learning_rate": 5.013331386756221e-07, "loss": 1.0523, "step": 86500 }, { "epoch": 2.5504938817632317, "eval_loss": 1.0789533853530884, "eval_runtime": 5.1596, "eval_samples_per_second": 76.751, "eval_steps_per_second": 2.52, "step": 86500 }, { "epoch": 2.550641309155241, "grad_norm": 3.4844721475725695, "learning_rate": 5.010118666439501e-07, "loss": 1.0498, "step": 86505 }, { "epoch": 2.5507887365472506, "grad_norm": 3.7767669653424374, "learning_rate": 5.006906902174642e-07, "loss": 1.0483, "step": 86510 }, { "epoch": 2.55093616393926, "grad_norm": 3.525882750914406, "learning_rate": 5.003696094056148e-07, "loss": 1.017, "step": 86515 }, { "epoch": 2.5510835913312695, "grad_norm": 3.5733722960976335, "learning_rate": 5.000486242178512e-07, "loss": 1.062, "step": 86520 }, { "epoch": 2.5512310187232785, "grad_norm": 3.71284507578001, "learning_rate": 4.997277346636166e-07, "loss": 1.0372, "step": 86525 }, { "epoch": 2.5513784461152884, "grad_norm": 3.435752813831255, "learning_rate": 4.994069407523559e-07, "loss": 1.0185, "step": 86530 }, { "epoch": 2.5515258735072974, "grad_norm": 3.4636039530336156, "learning_rate": 4.990862424935075e-07, "loss": 1.0357, "step": 86535 }, { "epoch": 2.5516733008993073, "grad_norm": 3.6888306792029852, "learning_rate": 4.987656398965083e-07, "loss": 1.0228, "step": 86540 }, { "epoch": 2.5518207282913163, "grad_norm": 3.722733454490338, "learning_rate": 4.984451329707935e-07, "loss": 1.0581, "step": 86545 }, { "epoch": 2.551968155683326, "grad_norm": 3.2561281233769757, "learning_rate": 4.981247217257919e-07, "loss": 0.9853, "step": 86550 }, { "epoch": 2.5521155830753353, "grad_norm": 3.6537007934326122, "learning_rate": 4.978044061709353e-07, "loss": 1.0096, "step": 86555 }, { "epoch": 2.5522630104673447, "grad_norm": 3.574278457248992, "learning_rate": 4.974841863156465e-07, "loss": 1.019, "step": 86560 }, { "epoch": 2.552410437859354, "grad_norm": 3.645064754687603, "learning_rate": 4.971640621693498e-07, "loss": 1.0492, "step": 86565 }, { "epoch": 2.5525578652513636, "grad_norm": 3.52402085502964, "learning_rate": 4.968440337414649e-07, "loss": 0.9926, "step": 86570 }, { "epoch": 2.552705292643373, "grad_norm": 3.3316509830560257, "learning_rate": 4.965241010414091e-07, "loss": 1.0286, "step": 86575 }, { "epoch": 2.5528527200353825, "grad_norm": 3.4760665502967703, "learning_rate": 4.962042640785968e-07, "loss": 1.0467, "step": 86580 }, { "epoch": 2.553000147427392, "grad_norm": 3.709775029501691, "learning_rate": 4.958845228624394e-07, "loss": 1.0574, "step": 86585 }, { "epoch": 2.5531475748194015, "grad_norm": 3.565493191652274, "learning_rate": 4.955648774023453e-07, "loss": 1.0794, "step": 86590 }, { "epoch": 2.553295002211411, "grad_norm": 3.6146612025620017, "learning_rate": 4.952453277077223e-07, "loss": 1.0457, "step": 86595 }, { "epoch": 2.5534424296034204, "grad_norm": 3.476516053220586, "learning_rate": 4.949258737879711e-07, "loss": 1.0427, "step": 86600 }, { "epoch": 2.55358985699543, "grad_norm": 3.3986547339383515, "learning_rate": 4.946065156524925e-07, "loss": 0.9896, "step": 86605 }, { "epoch": 2.5537372843874393, "grad_norm": 3.5407545122152784, "learning_rate": 4.942872533106851e-07, "loss": 1.0198, "step": 86610 }, { "epoch": 2.5538847117794488, "grad_norm": 3.496254331614542, "learning_rate": 4.939680867719424e-07, "loss": 1.0283, "step": 86615 }, { "epoch": 2.554032139171458, "grad_norm": 3.710588978463993, "learning_rate": 4.936490160456578e-07, "loss": 1.0487, "step": 86620 }, { "epoch": 2.5541795665634677, "grad_norm": 3.325901451806514, "learning_rate": 4.933300411412177e-07, "loss": 1.04, "step": 86625 }, { "epoch": 2.5543269939554767, "grad_norm": 3.3295007219264168, "learning_rate": 4.930111620680114e-07, "loss": 1.0144, "step": 86630 }, { "epoch": 2.5544744213474866, "grad_norm": 3.5823987336504737, "learning_rate": 4.926923788354189e-07, "loss": 1.0247, "step": 86635 }, { "epoch": 2.5546218487394956, "grad_norm": 3.615427914577332, "learning_rate": 4.923736914528247e-07, "loss": 0.9987, "step": 86640 }, { "epoch": 2.5547692761315055, "grad_norm": 3.5372075577000714, "learning_rate": 4.920550999296031e-07, "loss": 1.0716, "step": 86645 }, { "epoch": 2.5549167035235145, "grad_norm": 3.623193051164696, "learning_rate": 4.917366042751306e-07, "loss": 1.0393, "step": 86650 }, { "epoch": 2.555064130915524, "grad_norm": 3.45426469572445, "learning_rate": 4.914182044987794e-07, "loss": 1.0501, "step": 86655 }, { "epoch": 2.5552115583075334, "grad_norm": 3.4296883271983183, "learning_rate": 4.910999006099179e-07, "loss": 1.0147, "step": 86660 }, { "epoch": 2.555358985699543, "grad_norm": 3.516749382872998, "learning_rate": 4.907816926179142e-07, "loss": 1.0755, "step": 86665 }, { "epoch": 2.5555064130915524, "grad_norm": 3.458306672386563, "learning_rate": 4.904635805321298e-07, "loss": 1.0014, "step": 86670 }, { "epoch": 2.555653840483562, "grad_norm": 3.665159547889911, "learning_rate": 4.901455643619268e-07, "loss": 1.0477, "step": 86675 }, { "epoch": 2.5558012678755713, "grad_norm": 3.6536214773553097, "learning_rate": 4.89827644116663e-07, "loss": 1.0533, "step": 86680 }, { "epoch": 2.5559486952675807, "grad_norm": 3.4902013727296697, "learning_rate": 4.895098198056934e-07, "loss": 1.0126, "step": 86685 }, { "epoch": 2.55609612265959, "grad_norm": 3.4330151776412947, "learning_rate": 4.891920914383702e-07, "loss": 1.0261, "step": 86690 }, { "epoch": 2.5562435500515996, "grad_norm": 3.387169311009479, "learning_rate": 4.888744590240442e-07, "loss": 1.0163, "step": 86695 }, { "epoch": 2.556390977443609, "grad_norm": 3.530075418115542, "learning_rate": 4.885569225720596e-07, "loss": 1.0537, "step": 86700 }, { "epoch": 2.5565384048356186, "grad_norm": 3.419921264228327, "learning_rate": 4.882394820917631e-07, "loss": 1.0252, "step": 86705 }, { "epoch": 2.556685832227628, "grad_norm": 3.4354020445658255, "learning_rate": 4.879221375924926e-07, "loss": 1.0257, "step": 86710 }, { "epoch": 2.5568332596196375, "grad_norm": 3.3847534144421427, "learning_rate": 4.876048890835903e-07, "loss": 1.0521, "step": 86715 }, { "epoch": 2.556980687011647, "grad_norm": 3.5554168314353136, "learning_rate": 4.872877365743882e-07, "loss": 1.0637, "step": 86720 }, { "epoch": 2.557128114403656, "grad_norm": 3.5426569696188057, "learning_rate": 4.869706800742199e-07, "loss": 1.0273, "step": 86725 }, { "epoch": 2.557275541795666, "grad_norm": 3.5549916345016412, "learning_rate": 4.866537195924164e-07, "loss": 1.0409, "step": 86730 }, { "epoch": 2.557422969187675, "grad_norm": 3.454988872021018, "learning_rate": 4.863368551383021e-07, "loss": 1.0345, "step": 86735 }, { "epoch": 2.5575703965796848, "grad_norm": 3.3782699782151058, "learning_rate": 4.860200867212037e-07, "loss": 1.0222, "step": 86740 }, { "epoch": 2.557717823971694, "grad_norm": 3.5328796340554667, "learning_rate": 4.85703414350441e-07, "loss": 1.0801, "step": 86745 }, { "epoch": 2.5578652513637032, "grad_norm": 3.4204174410663963, "learning_rate": 4.853868380353322e-07, "loss": 1.0492, "step": 86750 }, { "epoch": 2.5580126787557127, "grad_norm": 3.7986610689956866, "learning_rate": 4.850703577851941e-07, "loss": 1.0108, "step": 86755 }, { "epoch": 2.558160106147722, "grad_norm": 3.569863451734334, "learning_rate": 4.847539736093386e-07, "loss": 1.0685, "step": 86760 }, { "epoch": 2.5583075335397316, "grad_norm": 3.7375332649112365, "learning_rate": 4.844376855170758e-07, "loss": 1.0891, "step": 86765 }, { "epoch": 2.558454960931741, "grad_norm": 3.6018476982347054, "learning_rate": 4.841214935177134e-07, "loss": 1.0924, "step": 86770 }, { "epoch": 2.5586023883237505, "grad_norm": 3.457347431708098, "learning_rate": 4.838053976205546e-07, "loss": 1.0322, "step": 86775 }, { "epoch": 2.55874981571576, "grad_norm": 3.5573954366237475, "learning_rate": 4.83489397834903e-07, "loss": 1.0527, "step": 86780 }, { "epoch": 2.5588972431077694, "grad_norm": 3.4434752474761297, "learning_rate": 4.831734941700538e-07, "loss": 0.9872, "step": 86785 }, { "epoch": 2.559044670499779, "grad_norm": 3.580025680027044, "learning_rate": 4.82857686635307e-07, "loss": 1.0469, "step": 86790 }, { "epoch": 2.5591920978917884, "grad_norm": 3.447954846230434, "learning_rate": 4.825419752399524e-07, "loss": 1.0355, "step": 86795 }, { "epoch": 2.559339525283798, "grad_norm": 3.557981056082076, "learning_rate": 4.822263599932809e-07, "loss": 1.0324, "step": 86800 }, { "epoch": 2.5594869526758073, "grad_norm": 3.535682700153375, "learning_rate": 4.819108409045815e-07, "loss": 1.0422, "step": 86805 }, { "epoch": 2.5596343800678167, "grad_norm": 3.5243680182353163, "learning_rate": 4.815954179831354e-07, "loss": 1.0671, "step": 86810 }, { "epoch": 2.559781807459826, "grad_norm": 3.53635231071037, "learning_rate": 4.812800912382282e-07, "loss": 1.0393, "step": 86815 }, { "epoch": 2.559929234851835, "grad_norm": 3.4946338597526188, "learning_rate": 4.809648606791356e-07, "loss": 0.9985, "step": 86820 }, { "epoch": 2.560076662243845, "grad_norm": 3.62411620032189, "learning_rate": 4.80649726315135e-07, "loss": 1.0501, "step": 86825 }, { "epoch": 2.560224089635854, "grad_norm": 3.607833415423869, "learning_rate": 4.803346881554992e-07, "loss": 1.0328, "step": 86830 }, { "epoch": 2.560371517027864, "grad_norm": 3.5941308722829293, "learning_rate": 4.800197462094992e-07, "loss": 1.0274, "step": 86835 }, { "epoch": 2.560518944419873, "grad_norm": 3.569357599607061, "learning_rate": 4.797049004864018e-07, "loss": 1.0673, "step": 86840 }, { "epoch": 2.5606663718118825, "grad_norm": 3.647339223705369, "learning_rate": 4.79390150995472e-07, "loss": 1.0577, "step": 86845 }, { "epoch": 2.560813799203892, "grad_norm": 3.5832139630176267, "learning_rate": 4.790754977459717e-07, "loss": 1.0731, "step": 86850 }, { "epoch": 2.5609612265959014, "grad_norm": 3.4820440026886676, "learning_rate": 4.787609407471605e-07, "loss": 1.0402, "step": 86855 }, { "epoch": 2.561108653987911, "grad_norm": 3.5598524581811515, "learning_rate": 4.784464800082934e-07, "loss": 1.0615, "step": 86860 }, { "epoch": 2.5612560813799203, "grad_norm": 3.409612955349007, "learning_rate": 4.781321155386244e-07, "loss": 1.0714, "step": 86865 }, { "epoch": 2.56140350877193, "grad_norm": 3.5435945933878763, "learning_rate": 4.778178473474038e-07, "loss": 1.0631, "step": 86870 }, { "epoch": 2.5615509361639393, "grad_norm": 3.51450009013932, "learning_rate": 4.775036754438795e-07, "loss": 1.003, "step": 86875 }, { "epoch": 2.5616983635559487, "grad_norm": 3.6416038192206006, "learning_rate": 4.771895998372973e-07, "loss": 1.05, "step": 86880 }, { "epoch": 2.561845790947958, "grad_norm": 3.4736840088338976, "learning_rate": 4.768756205368968e-07, "loss": 1.0367, "step": 86885 }, { "epoch": 2.5619932183399676, "grad_norm": 3.592837259135093, "learning_rate": 4.765617375519199e-07, "loss": 1.0614, "step": 86890 }, { "epoch": 2.562140645731977, "grad_norm": 3.6516695367501324, "learning_rate": 4.762479508916004e-07, "loss": 1.0141, "step": 86895 }, { "epoch": 2.5622880731239865, "grad_norm": 3.656016564342524, "learning_rate": 4.759342605651747e-07, "loss": 1.0164, "step": 86900 }, { "epoch": 2.562435500515996, "grad_norm": 3.5840085512835183, "learning_rate": 4.756206665818712e-07, "loss": 1.0492, "step": 86905 }, { "epoch": 2.5625829279080055, "grad_norm": 3.475039860067992, "learning_rate": 4.7530716895091834e-07, "loss": 1.0477, "step": 86910 }, { "epoch": 2.5627303553000145, "grad_norm": 3.5536358487930775, "learning_rate": 4.7499376768154177e-07, "loss": 1.0544, "step": 86915 }, { "epoch": 2.5628777826920244, "grad_norm": 3.494053008088567, "learning_rate": 4.7468046278296273e-07, "loss": 1.0666, "step": 86920 }, { "epoch": 2.5630252100840334, "grad_norm": 3.6390098603786107, "learning_rate": 4.743672542644023e-07, "loss": 1.0303, "step": 86925 }, { "epoch": 2.5631726374760433, "grad_norm": 3.5179353708551675, "learning_rate": 4.740541421350751e-07, "loss": 1.0524, "step": 86930 }, { "epoch": 2.5633200648680523, "grad_norm": 3.4744044055128045, "learning_rate": 4.737411264041951e-07, "loss": 1.0164, "step": 86935 }, { "epoch": 2.5634674922600618, "grad_norm": 3.5867214952541553, "learning_rate": 4.734282070809735e-07, "loss": 1.0704, "step": 86940 }, { "epoch": 2.5636149196520712, "grad_norm": 3.5820233361543887, "learning_rate": 4.73115384174619e-07, "loss": 1.0321, "step": 86945 }, { "epoch": 2.5637623470440807, "grad_norm": 3.7327085716646073, "learning_rate": 4.7280265769433535e-07, "loss": 1.0464, "step": 86950 }, { "epoch": 2.56390977443609, "grad_norm": 3.504303922108807, "learning_rate": 4.7249002764932684e-07, "loss": 1.0447, "step": 86955 }, { "epoch": 2.5640572018280996, "grad_norm": 3.358431661425256, "learning_rate": 4.7217749404879037e-07, "loss": 1.0443, "step": 86960 }, { "epoch": 2.564204629220109, "grad_norm": 3.4254825764103796, "learning_rate": 4.718650569019252e-07, "loss": 1.0133, "step": 86965 }, { "epoch": 2.5643520566121185, "grad_norm": 3.4989257460304413, "learning_rate": 4.715527162179227e-07, "loss": 1.0222, "step": 86970 }, { "epoch": 2.564499484004128, "grad_norm": 3.582766463836484, "learning_rate": 4.712404720059768e-07, "loss": 1.047, "step": 86975 }, { "epoch": 2.5646469113961374, "grad_norm": 3.5094622763378585, "learning_rate": 4.7092832427527264e-07, "loss": 1.0694, "step": 86980 }, { "epoch": 2.564794338788147, "grad_norm": 3.597742448735013, "learning_rate": 4.706162730349975e-07, "loss": 1.0155, "step": 86985 }, { "epoch": 2.5649417661801563, "grad_norm": 3.615292369897736, "learning_rate": 4.7030431829433354e-07, "loss": 1.0579, "step": 86990 }, { "epoch": 2.565089193572166, "grad_norm": 3.5911059113777344, "learning_rate": 4.699924600624585e-07, "loss": 1.0579, "step": 86995 }, { "epoch": 2.5652366209641753, "grad_norm": 3.613085723485871, "learning_rate": 4.696806983485524e-07, "loss": 1.0275, "step": 87000 }, { "epoch": 2.5652366209641753, "eval_loss": 1.0786571502685547, "eval_runtime": 4.2859, "eval_samples_per_second": 92.396, "eval_steps_per_second": 3.033, "step": 87000 }, { "epoch": 2.5653840483561847, "grad_norm": 3.623757644801044, "learning_rate": 4.6936903316178643e-07, "loss": 1.0429, "step": 87005 }, { "epoch": 2.565531475748194, "grad_norm": 3.5966186906971025, "learning_rate": 4.690574645113331e-07, "loss": 1.0379, "step": 87010 }, { "epoch": 2.5656789031402036, "grad_norm": 3.525719731550494, "learning_rate": 4.6874599240636017e-07, "loss": 1.0293, "step": 87015 }, { "epoch": 2.5658263305322127, "grad_norm": 3.652085289477475, "learning_rate": 4.6843461685603313e-07, "loss": 1.0361, "step": 87020 }, { "epoch": 2.5659737579242226, "grad_norm": 3.522443179403578, "learning_rate": 4.6812333786951476e-07, "loss": 1.0254, "step": 87025 }, { "epoch": 2.5661211853162316, "grad_norm": 3.4631278171287736, "learning_rate": 4.678121554559647e-07, "loss": 1.0526, "step": 87030 }, { "epoch": 2.566268612708241, "grad_norm": 3.637320191865806, "learning_rate": 4.6750106962453993e-07, "loss": 1.0403, "step": 87035 }, { "epoch": 2.5664160401002505, "grad_norm": 3.610261438442012, "learning_rate": 4.671900803843951e-07, "loss": 1.0067, "step": 87040 }, { "epoch": 2.56656346749226, "grad_norm": 3.8838039864828926, "learning_rate": 4.6687918774467873e-07, "loss": 1.0588, "step": 87045 }, { "epoch": 2.5667108948842694, "grad_norm": 3.3922565722880167, "learning_rate": 4.6656839171454314e-07, "loss": 1.0598, "step": 87050 }, { "epoch": 2.566858322276279, "grad_norm": 3.4434665879773663, "learning_rate": 4.662576923031314e-07, "loss": 1.0029, "step": 87055 }, { "epoch": 2.5670057496682883, "grad_norm": 3.3153115488420055, "learning_rate": 4.6594708951958625e-07, "loss": 1.023, "step": 87060 }, { "epoch": 2.5671531770602978, "grad_norm": 3.6591627961761746, "learning_rate": 4.65636583373049e-07, "loss": 1.0032, "step": 87065 }, { "epoch": 2.5673006044523072, "grad_norm": 3.4492837307119832, "learning_rate": 4.6532617387265414e-07, "loss": 1.0603, "step": 87070 }, { "epoch": 2.5674480318443167, "grad_norm": 3.5390677182276447, "learning_rate": 4.650158610275389e-07, "loss": 1.0265, "step": 87075 }, { "epoch": 2.567595459236326, "grad_norm": 3.6334038956793484, "learning_rate": 4.6470564484683174e-07, "loss": 1.0352, "step": 87080 }, { "epoch": 2.5677428866283356, "grad_norm": 3.4768919181555358, "learning_rate": 4.64395525339663e-07, "loss": 1.0075, "step": 87085 }, { "epoch": 2.567890314020345, "grad_norm": 3.3905515506083415, "learning_rate": 4.6408550251515735e-07, "loss": 1.0391, "step": 87090 }, { "epoch": 2.5680377414123545, "grad_norm": 3.5311411183145363, "learning_rate": 4.637755763824379e-07, "loss": 1.0178, "step": 87095 }, { "epoch": 2.568185168804364, "grad_norm": 3.5281312044002897, "learning_rate": 4.6346574695062453e-07, "loss": 1.0607, "step": 87100 }, { "epoch": 2.5683325961963734, "grad_norm": 3.489339984630757, "learning_rate": 4.631560142288349e-07, "loss": 1.0423, "step": 87105 }, { "epoch": 2.568480023588383, "grad_norm": 3.3697486082302417, "learning_rate": 4.6284637822618213e-07, "loss": 1.0451, "step": 87110 }, { "epoch": 2.568627450980392, "grad_norm": 3.4344667821574695, "learning_rate": 4.6253683895177935e-07, "loss": 1.0436, "step": 87115 }, { "epoch": 2.568774878372402, "grad_norm": 3.334405640324128, "learning_rate": 4.6222739641473307e-07, "loss": 1.0161, "step": 87120 }, { "epoch": 2.568922305764411, "grad_norm": 3.425304071027205, "learning_rate": 4.6191805062414964e-07, "loss": 1.014, "step": 87125 }, { "epoch": 2.5690697331564207, "grad_norm": 3.4968946331436315, "learning_rate": 4.6160880158913276e-07, "loss": 1.0299, "step": 87130 }, { "epoch": 2.5692171605484297, "grad_norm": 3.6319902686383054, "learning_rate": 4.6129964931878125e-07, "loss": 1.0641, "step": 87135 }, { "epoch": 2.569364587940439, "grad_norm": 3.5066947387130405, "learning_rate": 4.609905938221942e-07, "loss": 0.9619, "step": 87140 }, { "epoch": 2.5695120153324487, "grad_norm": 3.6634221980471895, "learning_rate": 4.6068163510846264e-07, "loss": 1.0608, "step": 87145 }, { "epoch": 2.569659442724458, "grad_norm": 3.64721179850181, "learning_rate": 4.6037277318668134e-07, "loss": 1.029, "step": 87150 }, { "epoch": 2.5698068701164676, "grad_norm": 3.5168627699369384, "learning_rate": 4.600640080659364e-07, "loss": 1.0475, "step": 87155 }, { "epoch": 2.569954297508477, "grad_norm": 3.2951285695657857, "learning_rate": 4.59755339755316e-07, "loss": 1.0453, "step": 87160 }, { "epoch": 2.5701017249004865, "grad_norm": 3.584389940579887, "learning_rate": 4.5944676826390156e-07, "loss": 1.0226, "step": 87165 }, { "epoch": 2.570249152292496, "grad_norm": 3.5897856074680443, "learning_rate": 4.591382936007729e-07, "loss": 1.1052, "step": 87170 }, { "epoch": 2.5703965796845054, "grad_norm": 3.716691520045716, "learning_rate": 4.588299157750078e-07, "loss": 1.0498, "step": 87175 }, { "epoch": 2.570544007076515, "grad_norm": 3.4900568739768696, "learning_rate": 4.5852163479568026e-07, "loss": 1.0585, "step": 87180 }, { "epoch": 2.5706914344685243, "grad_norm": 3.366940390434681, "learning_rate": 4.582134506718634e-07, "loss": 1.0662, "step": 87185 }, { "epoch": 2.570838861860534, "grad_norm": 3.49009868261965, "learning_rate": 4.5790536341262375e-07, "loss": 1.0154, "step": 87190 }, { "epoch": 2.5709862892525432, "grad_norm": 3.3157299321115272, "learning_rate": 4.5759737302702773e-07, "loss": 1.0594, "step": 87195 }, { "epoch": 2.5711337166445527, "grad_norm": 3.4911821828560354, "learning_rate": 4.5728947952413864e-07, "loss": 1.0246, "step": 87200 }, { "epoch": 2.571281144036562, "grad_norm": 3.4395519463840585, "learning_rate": 4.569816829130162e-07, "loss": 1.0889, "step": 87205 }, { "epoch": 2.571428571428571, "grad_norm": 3.5672430671786417, "learning_rate": 4.5667398320271825e-07, "loss": 1.0402, "step": 87210 }, { "epoch": 2.571575998820581, "grad_norm": 3.443106473346931, "learning_rate": 4.563663804022995e-07, "loss": 1.048, "step": 87215 }, { "epoch": 2.57172342621259, "grad_norm": 3.4573249742108825, "learning_rate": 4.5605887452081e-07, "loss": 1.0236, "step": 87220 }, { "epoch": 2.5718708536046, "grad_norm": 3.503403785691166, "learning_rate": 4.557514655673002e-07, "loss": 1.016, "step": 87225 }, { "epoch": 2.572018280996609, "grad_norm": 3.4044887732041955, "learning_rate": 4.5544415355081386e-07, "loss": 1.0421, "step": 87230 }, { "epoch": 2.5721657083886185, "grad_norm": 3.554737833488537, "learning_rate": 4.55136938480397e-07, "loss": 1.0601, "step": 87235 }, { "epoch": 2.572313135780628, "grad_norm": 3.6193954503332724, "learning_rate": 4.548298203650875e-07, "loss": 1.0587, "step": 87240 }, { "epoch": 2.5724605631726374, "grad_norm": 3.458214380294275, "learning_rate": 4.545227992139225e-07, "loss": 1.012, "step": 87245 }, { "epoch": 2.572607990564647, "grad_norm": 3.53016296785236, "learning_rate": 4.542158750359388e-07, "loss": 1.0372, "step": 87250 }, { "epoch": 2.5727554179566563, "grad_norm": 3.579234123160901, "learning_rate": 4.539090478401643e-07, "loss": 1.0508, "step": 87255 }, { "epoch": 2.5729028453486658, "grad_norm": 3.6315679123985616, "learning_rate": 4.536023176356312e-07, "loss": 1.0353, "step": 87260 }, { "epoch": 2.573050272740675, "grad_norm": 3.5328846388254083, "learning_rate": 4.532956844313638e-07, "loss": 1.0338, "step": 87265 }, { "epoch": 2.5731977001326847, "grad_norm": 3.3587342274624357, "learning_rate": 4.529891482363849e-07, "loss": 1.054, "step": 87270 }, { "epoch": 2.573345127524694, "grad_norm": 3.5186602578795365, "learning_rate": 4.526827090597152e-07, "loss": 1.0613, "step": 87275 }, { "epoch": 2.5734925549167036, "grad_norm": 3.644470612157029, "learning_rate": 4.523763669103717e-07, "loss": 1.0572, "step": 87280 }, { "epoch": 2.573639982308713, "grad_norm": 3.6254207678415953, "learning_rate": 4.5207012179736925e-07, "loss": 1.0431, "step": 87285 }, { "epoch": 2.5737874097007225, "grad_norm": 3.4690079185995306, "learning_rate": 4.517639737297194e-07, "loss": 1.0844, "step": 87290 }, { "epoch": 2.573934837092732, "grad_norm": 3.4139107643244757, "learning_rate": 4.5145792271643113e-07, "loss": 1.033, "step": 87295 }, { "epoch": 2.5740822644847414, "grad_norm": 3.582700942258113, "learning_rate": 4.511519687665103e-07, "loss": 1.0486, "step": 87300 }, { "epoch": 2.5742296918767504, "grad_norm": 3.6689867902091744, "learning_rate": 4.508461118889583e-07, "loss": 1.0384, "step": 87305 }, { "epoch": 2.5743771192687603, "grad_norm": 3.5044130190603475, "learning_rate": 4.505403520927784e-07, "loss": 1.0903, "step": 87310 }, { "epoch": 2.5745245466607694, "grad_norm": 3.6105771676969103, "learning_rate": 4.502346893869651e-07, "loss": 1.0821, "step": 87315 }, { "epoch": 2.5746719740527793, "grad_norm": 3.6400921080304602, "learning_rate": 4.499291237805141e-07, "loss": 1.079, "step": 87320 }, { "epoch": 2.5748194014447883, "grad_norm": 3.518461853604065, "learning_rate": 4.496236552824178e-07, "loss": 1.0623, "step": 87325 }, { "epoch": 2.5749668288367977, "grad_norm": 3.3415355743057815, "learning_rate": 4.493182839016627e-07, "loss": 1.0093, "step": 87330 }, { "epoch": 2.575114256228807, "grad_norm": 3.3988110227541735, "learning_rate": 4.490130096472375e-07, "loss": 1.0294, "step": 87335 }, { "epoch": 2.5752616836208166, "grad_norm": 3.701962837317241, "learning_rate": 4.48707832528123e-07, "loss": 1.0785, "step": 87340 }, { "epoch": 2.575409111012826, "grad_norm": 3.4830241490285982, "learning_rate": 4.4840275255330023e-07, "loss": 1.0444, "step": 87345 }, { "epoch": 2.5755565384048356, "grad_norm": 3.577008650860558, "learning_rate": 4.480977697317462e-07, "loss": 1.0157, "step": 87350 }, { "epoch": 2.575703965796845, "grad_norm": 3.351938070435779, "learning_rate": 4.4779288407243587e-07, "loss": 1.0654, "step": 87355 }, { "epoch": 2.5758513931888545, "grad_norm": 3.4325957643880596, "learning_rate": 4.474880955843408e-07, "loss": 1.0495, "step": 87360 }, { "epoch": 2.575998820580864, "grad_norm": 3.360136540539206, "learning_rate": 4.471834042764296e-07, "loss": 1.0383, "step": 87365 }, { "epoch": 2.5761462479728734, "grad_norm": 3.5868367520080637, "learning_rate": 4.4687881015766805e-07, "loss": 1.0266, "step": 87370 }, { "epoch": 2.576293675364883, "grad_norm": 3.7231101839939345, "learning_rate": 4.4657431323702024e-07, "loss": 1.0746, "step": 87375 }, { "epoch": 2.5764411027568923, "grad_norm": 3.554690288497751, "learning_rate": 4.462699135234444e-07, "loss": 1.0681, "step": 87380 }, { "epoch": 2.5765885301489018, "grad_norm": 3.3319417230682298, "learning_rate": 4.4596561102589875e-07, "loss": 1.0092, "step": 87385 }, { "epoch": 2.5767359575409112, "grad_norm": 3.619977393223444, "learning_rate": 4.4566140575333786e-07, "loss": 1.0613, "step": 87390 }, { "epoch": 2.5768833849329207, "grad_norm": 3.6943201919932873, "learning_rate": 4.4535729771471327e-07, "loss": 1.0685, "step": 87395 }, { "epoch": 2.5770308123249297, "grad_norm": 3.610098182783415, "learning_rate": 4.450532869189745e-07, "loss": 1.0752, "step": 87400 }, { "epoch": 2.5771782397169396, "grad_norm": 3.2753244567649187, "learning_rate": 4.447493733750649e-07, "loss": 1.0156, "step": 87405 }, { "epoch": 2.5773256671089486, "grad_norm": 3.4670823628607415, "learning_rate": 4.444455570919309e-07, "loss": 1.0651, "step": 87410 }, { "epoch": 2.5774730945009585, "grad_norm": 3.4563327345930612, "learning_rate": 4.4414183807850963e-07, "loss": 1.0238, "step": 87415 }, { "epoch": 2.5776205218929675, "grad_norm": 3.717420808628155, "learning_rate": 4.43838216343741e-07, "loss": 1.102, "step": 87420 }, { "epoch": 2.577767949284977, "grad_norm": 3.4849037288744533, "learning_rate": 4.4353469189655745e-07, "loss": 1.056, "step": 87425 }, { "epoch": 2.5779153766769864, "grad_norm": 3.3236551954222717, "learning_rate": 4.43231264745891e-07, "loss": 0.9829, "step": 87430 }, { "epoch": 2.578062804068996, "grad_norm": 3.453046071897478, "learning_rate": 4.429279349006703e-07, "loss": 1.0522, "step": 87435 }, { "epoch": 2.5782102314610054, "grad_norm": 3.5174515423910004, "learning_rate": 4.4262470236982156e-07, "loss": 1.066, "step": 87440 }, { "epoch": 2.578357658853015, "grad_norm": 3.5055230639275323, "learning_rate": 4.423215671622685e-07, "loss": 1.0448, "step": 87445 }, { "epoch": 2.5785050862450243, "grad_norm": 3.6144322523620342, "learning_rate": 4.4201852928692944e-07, "loss": 1.0652, "step": 87450 }, { "epoch": 2.5786525136370337, "grad_norm": 3.4229142244605475, "learning_rate": 4.417155887527226e-07, "loss": 1.056, "step": 87455 }, { "epoch": 2.578799941029043, "grad_norm": 3.5114778600499834, "learning_rate": 4.414127455685621e-07, "loss": 0.9721, "step": 87460 }, { "epoch": 2.5789473684210527, "grad_norm": 3.5931648062454613, "learning_rate": 4.4110999974335956e-07, "loss": 1.0532, "step": 87465 }, { "epoch": 2.579094795813062, "grad_norm": 3.4767984818603117, "learning_rate": 4.4080735128602377e-07, "loss": 1.0169, "step": 87470 }, { "epoch": 2.5792422232050716, "grad_norm": 3.5741813087663425, "learning_rate": 4.4050480020546124e-07, "loss": 1.0503, "step": 87475 }, { "epoch": 2.579389650597081, "grad_norm": 3.48288359152026, "learning_rate": 4.40202346510572e-07, "loss": 1.0469, "step": 87480 }, { "epoch": 2.5795370779890905, "grad_norm": 3.570932073781398, "learning_rate": 4.398999902102602e-07, "loss": 1.0415, "step": 87485 }, { "epoch": 2.5796845053811, "grad_norm": 3.544832501384082, "learning_rate": 4.3959773131341905e-07, "loss": 1.0527, "step": 87490 }, { "epoch": 2.5798319327731094, "grad_norm": 3.38616337348364, "learning_rate": 4.392955698289461e-07, "loss": 1.0159, "step": 87495 }, { "epoch": 2.579979360165119, "grad_norm": 3.560009886025214, "learning_rate": 4.3899350576573086e-07, "loss": 1.0601, "step": 87500 }, { "epoch": 2.579979360165119, "eval_loss": 1.0787049531936646, "eval_runtime": 4.1801, "eval_samples_per_second": 94.735, "eval_steps_per_second": 3.11, "step": 87500 }, { "epoch": 2.580126787557128, "grad_norm": 3.451201852046495, "learning_rate": 4.386915391326625e-07, "loss": 1.0086, "step": 87505 }, { "epoch": 2.580274214949138, "grad_norm": 3.4581757774127677, "learning_rate": 4.3838966993862766e-07, "loss": 1.0229, "step": 87510 }, { "epoch": 2.580421642341147, "grad_norm": 3.525487536017718, "learning_rate": 4.380878981925067e-07, "loss": 1.0515, "step": 87515 }, { "epoch": 2.5805690697331567, "grad_norm": 3.498143639870865, "learning_rate": 4.3778622390318256e-07, "loss": 1.0053, "step": 87520 }, { "epoch": 2.5807164971251657, "grad_norm": 3.4958828556085897, "learning_rate": 4.374846470795302e-07, "loss": 1.0362, "step": 87525 }, { "epoch": 2.580863924517175, "grad_norm": 3.1914889517727327, "learning_rate": 4.3718316773042415e-07, "loss": 1.0308, "step": 87530 }, { "epoch": 2.5810113519091846, "grad_norm": 3.6618016451893802, "learning_rate": 4.36881785864737e-07, "loss": 1.0149, "step": 87535 }, { "epoch": 2.581158779301194, "grad_norm": 3.4993561030840814, "learning_rate": 4.3658050149133573e-07, "loss": 1.0381, "step": 87540 }, { "epoch": 2.5813062066932035, "grad_norm": 3.6082697206032215, "learning_rate": 4.362793146190874e-07, "loss": 1.0338, "step": 87545 }, { "epoch": 2.581453634085213, "grad_norm": 3.4962768921701746, "learning_rate": 4.359782252568538e-07, "loss": 1.0604, "step": 87550 }, { "epoch": 2.5816010614772225, "grad_norm": 3.571104467320205, "learning_rate": 4.3567723341349524e-07, "loss": 1.0065, "step": 87555 }, { "epoch": 2.581748488869232, "grad_norm": 3.437349583390923, "learning_rate": 4.353763390978692e-07, "loss": 1.0388, "step": 87560 }, { "epoch": 2.5818959162612414, "grad_norm": 3.5304763768098533, "learning_rate": 4.3507554231882784e-07, "loss": 0.9987, "step": 87565 }, { "epoch": 2.582043343653251, "grad_norm": 3.6102927486889005, "learning_rate": 4.3477484308522524e-07, "loss": 1.0698, "step": 87570 }, { "epoch": 2.5821907710452603, "grad_norm": 3.5850334344793966, "learning_rate": 4.3447424140590817e-07, "loss": 1.0849, "step": 87575 }, { "epoch": 2.5823381984372697, "grad_norm": 3.4258973697774886, "learning_rate": 4.34173737289722e-07, "loss": 1.0491, "step": 87580 }, { "epoch": 2.582485625829279, "grad_norm": 3.519635528957607, "learning_rate": 4.338733307455109e-07, "loss": 1.0271, "step": 87585 }, { "epoch": 2.5826330532212887, "grad_norm": 3.5693046364353784, "learning_rate": 4.335730217821124e-07, "loss": 1.0126, "step": 87590 }, { "epoch": 2.582780480613298, "grad_norm": 3.4559011514951745, "learning_rate": 4.332728104083657e-07, "loss": 1.0454, "step": 87595 }, { "epoch": 2.582927908005307, "grad_norm": 3.5436792960449193, "learning_rate": 4.3297269663310364e-07, "loss": 1.0191, "step": 87600 }, { "epoch": 2.583075335397317, "grad_norm": 3.4030372687994785, "learning_rate": 4.326726804651572e-07, "loss": 1.0387, "step": 87605 }, { "epoch": 2.583222762789326, "grad_norm": 3.573012396347735, "learning_rate": 4.323727619133551e-07, "loss": 1.0533, "step": 87610 }, { "epoch": 2.583370190181336, "grad_norm": 3.6314902217736673, "learning_rate": 4.320729409865232e-07, "loss": 1.0789, "step": 87615 }, { "epoch": 2.583517617573345, "grad_norm": 3.484216998605248, "learning_rate": 4.3177321769348353e-07, "loss": 1.06, "step": 87620 }, { "epoch": 2.5836650449653544, "grad_norm": 3.471393068403313, "learning_rate": 4.3147359204305584e-07, "loss": 1.0266, "step": 87625 }, { "epoch": 2.583812472357364, "grad_norm": 3.4100735086407, "learning_rate": 4.3117406404405714e-07, "loss": 1.0326, "step": 87630 }, { "epoch": 2.5839598997493733, "grad_norm": 3.4922278984319735, "learning_rate": 4.308746337053017e-07, "loss": 1.0697, "step": 87635 }, { "epoch": 2.584107327141383, "grad_norm": 3.3926814897696755, "learning_rate": 4.3057530103559993e-07, "loss": 1.0203, "step": 87640 }, { "epoch": 2.5842547545333923, "grad_norm": 3.471404814980085, "learning_rate": 4.302760660437602e-07, "loss": 1.0539, "step": 87645 }, { "epoch": 2.5844021819254017, "grad_norm": 3.539980429440205, "learning_rate": 4.2997692873858797e-07, "loss": 1.0248, "step": 87650 }, { "epoch": 2.584549609317411, "grad_norm": 3.7334051001328645, "learning_rate": 4.2967788912888537e-07, "loss": 1.0716, "step": 87655 }, { "epoch": 2.5846970367094206, "grad_norm": 3.5724211835811888, "learning_rate": 4.293789472234537e-07, "loss": 1.0286, "step": 87660 }, { "epoch": 2.58484446410143, "grad_norm": 3.4205891918145714, "learning_rate": 4.2908010303108635e-07, "loss": 1.0519, "step": 87665 }, { "epoch": 2.5849918914934396, "grad_norm": 3.6587758870053726, "learning_rate": 4.287813565605804e-07, "loss": 1.0727, "step": 87670 }, { "epoch": 2.585139318885449, "grad_norm": 3.5411513353770383, "learning_rate": 4.284827078207239e-07, "loss": 1.0718, "step": 87675 }, { "epoch": 2.5852867462774585, "grad_norm": 3.499272430487073, "learning_rate": 4.281841568203085e-07, "loss": 0.979, "step": 87680 }, { "epoch": 2.585434173669468, "grad_norm": 3.4733149007622726, "learning_rate": 4.2788570356811636e-07, "loss": 1.0603, "step": 87685 }, { "epoch": 2.5855816010614774, "grad_norm": 3.405172053509302, "learning_rate": 4.2758734807293085e-07, "loss": 1.03, "step": 87690 }, { "epoch": 2.5857290284534864, "grad_norm": 3.5562717099285543, "learning_rate": 4.272890903435313e-07, "loss": 1.0234, "step": 87695 }, { "epoch": 2.5858764558454963, "grad_norm": 3.518256131137983, "learning_rate": 4.269909303886947e-07, "loss": 1.0137, "step": 87700 }, { "epoch": 2.5860238832375053, "grad_norm": 3.596329162110086, "learning_rate": 4.26692868217195e-07, "loss": 1.0603, "step": 87705 }, { "epoch": 2.586171310629515, "grad_norm": 3.6751274174789788, "learning_rate": 4.2639490383780176e-07, "loss": 1.065, "step": 87710 }, { "epoch": 2.5863187380215242, "grad_norm": 3.4872310897672145, "learning_rate": 4.2609703725928386e-07, "loss": 1.077, "step": 87715 }, { "epoch": 2.5864661654135337, "grad_norm": 3.4789898612237926, "learning_rate": 4.2579926849040593e-07, "loss": 1.049, "step": 87720 }, { "epoch": 2.586613592805543, "grad_norm": 3.5557157773779324, "learning_rate": 4.255015975399301e-07, "loss": 1.0318, "step": 87725 }, { "epoch": 2.5867610201975526, "grad_norm": 3.6311188702031485, "learning_rate": 4.252040244166161e-07, "loss": 1.032, "step": 87730 }, { "epoch": 2.586908447589562, "grad_norm": 3.449168349676006, "learning_rate": 4.24906549129221e-07, "loss": 1.012, "step": 87735 }, { "epoch": 2.5870558749815715, "grad_norm": 3.350172598577179, "learning_rate": 4.246091716864962e-07, "loss": 1.0394, "step": 87740 }, { "epoch": 2.587203302373581, "grad_norm": 3.5996774551368773, "learning_rate": 4.243118920971951e-07, "loss": 1.0249, "step": 87745 }, { "epoch": 2.5873507297655904, "grad_norm": 3.5763132204698587, "learning_rate": 4.2401471037006285e-07, "loss": 1.0204, "step": 87750 }, { "epoch": 2.5874981571576, "grad_norm": 3.576110748933726, "learning_rate": 4.2371762651384693e-07, "loss": 1.0423, "step": 87755 }, { "epoch": 2.5876455845496094, "grad_norm": 3.734282687827198, "learning_rate": 4.2342064053728704e-07, "loss": 1.051, "step": 87760 }, { "epoch": 2.587793011941619, "grad_norm": 3.6148896830821773, "learning_rate": 4.231237524491237e-07, "loss": 1.0189, "step": 87765 }, { "epoch": 2.5879404393336283, "grad_norm": 3.4698559554333395, "learning_rate": 4.2282696225809366e-07, "loss": 1.0344, "step": 87770 }, { "epoch": 2.5880878667256377, "grad_norm": 3.607508333573876, "learning_rate": 4.225302699729275e-07, "loss": 1.0194, "step": 87775 }, { "epoch": 2.588235294117647, "grad_norm": 3.399852493752988, "learning_rate": 4.2223367560235984e-07, "loss": 1.0649, "step": 87780 }, { "epoch": 2.5883827215096566, "grad_norm": 3.5288841345640893, "learning_rate": 4.2193717915511494e-07, "loss": 1.0823, "step": 87785 }, { "epoch": 2.5885301489016657, "grad_norm": 3.58965355742341, "learning_rate": 4.216407806399192e-07, "loss": 1.0378, "step": 87790 }, { "epoch": 2.5886775762936756, "grad_norm": 3.3262823777596124, "learning_rate": 4.213444800654939e-07, "loss": 0.999, "step": 87795 }, { "epoch": 2.5888250036856846, "grad_norm": 3.582844954816232, "learning_rate": 4.21048277440558e-07, "loss": 1.0636, "step": 87800 }, { "epoch": 2.5889724310776945, "grad_norm": 3.533101179588865, "learning_rate": 4.207521727738278e-07, "loss": 1.0686, "step": 87805 }, { "epoch": 2.5891198584697035, "grad_norm": 3.424221088843315, "learning_rate": 4.204561660740168e-07, "loss": 1.088, "step": 87810 }, { "epoch": 2.589267285861713, "grad_norm": 3.563601087075401, "learning_rate": 4.2016025734983504e-07, "loss": 1.0363, "step": 87815 }, { "epoch": 2.5894147132537224, "grad_norm": 3.575935299862439, "learning_rate": 4.198644466099906e-07, "loss": 1.022, "step": 87820 }, { "epoch": 2.589562140645732, "grad_norm": 3.571303258937903, "learning_rate": 4.195687338631861e-07, "loss": 1.0721, "step": 87825 }, { "epoch": 2.5897095680377413, "grad_norm": 3.4294230211567793, "learning_rate": 4.192731191181258e-07, "loss": 1.0612, "step": 87830 }, { "epoch": 2.589856995429751, "grad_norm": 3.5965828211571633, "learning_rate": 4.189776023835065e-07, "loss": 1.0675, "step": 87835 }, { "epoch": 2.5900044228217602, "grad_norm": 3.648752278494563, "learning_rate": 4.1868218366802505e-07, "loss": 1.0169, "step": 87840 }, { "epoch": 2.5901518502137697, "grad_norm": 3.4510234504548607, "learning_rate": 4.183868629803753e-07, "loss": 1.0419, "step": 87845 }, { "epoch": 2.590299277605779, "grad_norm": 3.552478108258941, "learning_rate": 4.180916403292444e-07, "loss": 1.0222, "step": 87850 }, { "epoch": 2.5904467049977886, "grad_norm": 3.4680136743138203, "learning_rate": 4.1779651572332337e-07, "loss": 1.0623, "step": 87855 }, { "epoch": 2.590594132389798, "grad_norm": 3.610948443962492, "learning_rate": 4.17501489171294e-07, "loss": 1.0381, "step": 87860 }, { "epoch": 2.5907415597818075, "grad_norm": 3.482338316051232, "learning_rate": 4.17206560681839e-07, "loss": 1.0198, "step": 87865 }, { "epoch": 2.590888987173817, "grad_norm": 3.3506597825951356, "learning_rate": 4.169117302636363e-07, "loss": 1.0167, "step": 87870 }, { "epoch": 2.5910364145658265, "grad_norm": 3.6138461465385694, "learning_rate": 4.16616997925362e-07, "loss": 1.0284, "step": 87875 }, { "epoch": 2.591183841957836, "grad_norm": 3.5548171298520392, "learning_rate": 4.1632236367568867e-07, "loss": 1.03, "step": 87880 }, { "epoch": 2.5913312693498454, "grad_norm": 3.483336452625438, "learning_rate": 4.160278275232865e-07, "loss": 1.0037, "step": 87885 }, { "epoch": 2.591478696741855, "grad_norm": 3.4261884571066235, "learning_rate": 4.157333894768227e-07, "loss": 1.0715, "step": 87890 }, { "epoch": 2.591626124133864, "grad_norm": 3.4344484025288105, "learning_rate": 4.15439049544962e-07, "loss": 1.0617, "step": 87895 }, { "epoch": 2.5917735515258737, "grad_norm": 3.595584716790426, "learning_rate": 4.151448077363637e-07, "loss": 1.0559, "step": 87900 }, { "epoch": 2.5919209789178828, "grad_norm": 3.486916982015307, "learning_rate": 4.1485066405968804e-07, "loss": 1.0601, "step": 87905 }, { "epoch": 2.592068406309892, "grad_norm": 3.5733182952013234, "learning_rate": 4.1455661852358965e-07, "loss": 1.0571, "step": 87910 }, { "epoch": 2.5922158337019017, "grad_norm": 3.347703605809089, "learning_rate": 4.142626711367213e-07, "loss": 1.0241, "step": 87915 }, { "epoch": 2.592363261093911, "grad_norm": 3.5664090938115507, "learning_rate": 4.1396882190773393e-07, "loss": 1.0584, "step": 87920 }, { "epoch": 2.5925106884859206, "grad_norm": 3.5275014955980395, "learning_rate": 4.136750708452719e-07, "loss": 1.0238, "step": 87925 }, { "epoch": 2.59265811587793, "grad_norm": 3.4736443372901173, "learning_rate": 4.133814179579816e-07, "loss": 1.0245, "step": 87930 }, { "epoch": 2.5928055432699395, "grad_norm": 3.5143251662019095, "learning_rate": 4.1308786325450193e-07, "loss": 1.0196, "step": 87935 }, { "epoch": 2.592952970661949, "grad_norm": 3.6218054709673764, "learning_rate": 4.1279440674347356e-07, "loss": 1.0342, "step": 87940 }, { "epoch": 2.5931003980539584, "grad_norm": 3.6165276426768083, "learning_rate": 4.1250104843352995e-07, "loss": 1.0385, "step": 87945 }, { "epoch": 2.593247825445968, "grad_norm": 3.461845947271031, "learning_rate": 4.122077883333038e-07, "loss": 1.0675, "step": 87950 }, { "epoch": 2.5933952528379773, "grad_norm": 3.541235281283802, "learning_rate": 4.1191462645142524e-07, "loss": 1.0264, "step": 87955 }, { "epoch": 2.593542680229987, "grad_norm": 3.5185393172164963, "learning_rate": 4.116215627965199e-07, "loss": 1.0266, "step": 87960 }, { "epoch": 2.5936901076219963, "grad_norm": 3.428151467322301, "learning_rate": 4.113285973772134e-07, "loss": 1.0276, "step": 87965 }, { "epoch": 2.5938375350140057, "grad_norm": 3.4106882621220547, "learning_rate": 4.110357302021242e-07, "loss": 1.0404, "step": 87970 }, { "epoch": 2.593984962406015, "grad_norm": 3.553677389110854, "learning_rate": 4.107429612798713e-07, "loss": 0.9987, "step": 87975 }, { "epoch": 2.5941323897980246, "grad_norm": 3.428958855228566, "learning_rate": 4.1045029061906993e-07, "loss": 1.0698, "step": 87980 }, { "epoch": 2.594279817190034, "grad_norm": 3.4966579221562695, "learning_rate": 4.1015771822833223e-07, "loss": 1.0388, "step": 87985 }, { "epoch": 2.594427244582043, "grad_norm": 3.490996985071145, "learning_rate": 4.098652441162673e-07, "loss": 1.01, "step": 87990 }, { "epoch": 2.594574671974053, "grad_norm": 3.4130531917957625, "learning_rate": 4.095728682914823e-07, "loss": 1.0212, "step": 87995 }, { "epoch": 2.594722099366062, "grad_norm": 3.535237265844233, "learning_rate": 4.0928059076257835e-07, "loss": 1.0519, "step": 88000 }, { "epoch": 2.594722099366062, "eval_loss": 1.0785444974899292, "eval_runtime": 4.3261, "eval_samples_per_second": 91.537, "eval_steps_per_second": 3.005, "step": 88000 }, { "epoch": 2.594869526758072, "grad_norm": 3.6172200940403973, "learning_rate": 4.089884115381594e-07, "loss": 1.0572, "step": 88005 }, { "epoch": 2.595016954150081, "grad_norm": 3.3652098268001414, "learning_rate": 4.0869633062681976e-07, "loss": 1.0552, "step": 88010 }, { "epoch": 2.5951643815420904, "grad_norm": 3.3401860835418296, "learning_rate": 4.08404348037158e-07, "loss": 1.0411, "step": 88015 }, { "epoch": 2.5953118089341, "grad_norm": 3.67566774489654, "learning_rate": 4.0811246377776265e-07, "loss": 1.0345, "step": 88020 }, { "epoch": 2.5954592363261093, "grad_norm": 3.535046895387251, "learning_rate": 4.0782067785722437e-07, "loss": 1.0208, "step": 88025 }, { "epoch": 2.5956066637181188, "grad_norm": 3.5718831596843827, "learning_rate": 4.075289902841296e-07, "loss": 1.0345, "step": 88030 }, { "epoch": 2.5957540911101282, "grad_norm": 3.601405577946032, "learning_rate": 4.0723740106705984e-07, "loss": 1.0528, "step": 88035 }, { "epoch": 2.5959015185021377, "grad_norm": 3.3777925741535633, "learning_rate": 4.069459102145978e-07, "loss": 1.0235, "step": 88040 }, { "epoch": 2.596048945894147, "grad_norm": 3.462866004802996, "learning_rate": 4.0665451773531913e-07, "loss": 1.0011, "step": 88045 }, { "epoch": 2.5961963732861566, "grad_norm": 3.58185359546131, "learning_rate": 4.0636322363779896e-07, "loss": 1.0723, "step": 88050 }, { "epoch": 2.596343800678166, "grad_norm": 3.5257865140831295, "learning_rate": 4.060720279306089e-07, "loss": 1.0414, "step": 88055 }, { "epoch": 2.5964912280701755, "grad_norm": 3.457008133043358, "learning_rate": 4.0578093062231745e-07, "loss": 1.005, "step": 88060 }, { "epoch": 2.596638655462185, "grad_norm": 3.625003898148476, "learning_rate": 4.0548993172149147e-07, "loss": 1.0409, "step": 88065 }, { "epoch": 2.5967860828541944, "grad_norm": 3.755331925390624, "learning_rate": 4.0519903123669296e-07, "loss": 1.0488, "step": 88070 }, { "epoch": 2.596933510246204, "grad_norm": 3.3678035781229445, "learning_rate": 4.0490822917648204e-07, "loss": 0.9905, "step": 88075 }, { "epoch": 2.5970809376382133, "grad_norm": 3.4576490453482154, "learning_rate": 4.0461752554941733e-07, "loss": 1.0355, "step": 88080 }, { "epoch": 2.5972283650302224, "grad_norm": 3.6785685327651847, "learning_rate": 4.043269203640504e-07, "loss": 1.0295, "step": 88085 }, { "epoch": 2.5973757924222323, "grad_norm": 3.195532033622519, "learning_rate": 4.0403641362893554e-07, "loss": 1.016, "step": 88090 }, { "epoch": 2.5975232198142413, "grad_norm": 3.465108378517161, "learning_rate": 4.037460053526193e-07, "loss": 1.0489, "step": 88095 }, { "epoch": 2.597670647206251, "grad_norm": 3.434670593401047, "learning_rate": 4.0345569554364776e-07, "loss": 1.0659, "step": 88100 }, { "epoch": 2.59781807459826, "grad_norm": 3.4607908017876947, "learning_rate": 4.031654842105645e-07, "loss": 1.0528, "step": 88105 }, { "epoch": 2.5979655019902697, "grad_norm": 3.350120225410243, "learning_rate": 4.0287537136190685e-07, "loss": 1.034, "step": 88110 }, { "epoch": 2.598112929382279, "grad_norm": 3.666285675560562, "learning_rate": 4.025853570062155e-07, "loss": 1.0604, "step": 88115 }, { "epoch": 2.5982603567742886, "grad_norm": 3.579893674065122, "learning_rate": 4.0229544115202105e-07, "loss": 1.0439, "step": 88120 }, { "epoch": 2.598407784166298, "grad_norm": 3.5865044795686436, "learning_rate": 4.020056238078559e-07, "loss": 1.0308, "step": 88125 }, { "epoch": 2.5985552115583075, "grad_norm": 3.3812566318243746, "learning_rate": 4.017159049822487e-07, "loss": 1.0396, "step": 88130 }, { "epoch": 2.598702638950317, "grad_norm": 3.400313863374648, "learning_rate": 4.0142628468372424e-07, "loss": 1.0235, "step": 88135 }, { "epoch": 2.5988500663423264, "grad_norm": 3.4932567748841286, "learning_rate": 4.0113676292080487e-07, "loss": 1.0682, "step": 88140 }, { "epoch": 2.598997493734336, "grad_norm": 3.434366310292013, "learning_rate": 4.0084733970201003e-07, "loss": 1.09, "step": 88145 }, { "epoch": 2.5991449211263453, "grad_norm": 3.343598650900323, "learning_rate": 4.0055801503585706e-07, "loss": 1.0506, "step": 88150 }, { "epoch": 2.599292348518355, "grad_norm": 3.488852732697867, "learning_rate": 4.0026878893085953e-07, "loss": 1.0572, "step": 88155 }, { "epoch": 2.5994397759103642, "grad_norm": 3.5157258100796764, "learning_rate": 3.9997966139552743e-07, "loss": 1.045, "step": 88160 }, { "epoch": 2.5995872033023737, "grad_norm": 3.487352900443631, "learning_rate": 3.996906324383688e-07, "loss": 1.0199, "step": 88165 }, { "epoch": 2.599734630694383, "grad_norm": 3.5085669707094174, "learning_rate": 3.99401702067889e-07, "loss": 1.0569, "step": 88170 }, { "epoch": 2.5998820580863926, "grad_norm": 3.6187874755588205, "learning_rate": 3.9911287029259045e-07, "loss": 1.0363, "step": 88175 }, { "epoch": 2.6000294854784016, "grad_norm": 3.425599507666447, "learning_rate": 3.988241371209725e-07, "loss": 1.0498, "step": 88180 }, { "epoch": 2.6001769128704115, "grad_norm": 3.4341256775116222, "learning_rate": 3.985355025615292e-07, "loss": 1.0321, "step": 88185 }, { "epoch": 2.6003243402624205, "grad_norm": 3.454782736252024, "learning_rate": 3.9824696662275754e-07, "loss": 1.0575, "step": 88190 }, { "epoch": 2.6004717676544304, "grad_norm": 3.644745485359905, "learning_rate": 3.979585293131449e-07, "loss": 1.0655, "step": 88195 }, { "epoch": 2.6006191950464395, "grad_norm": 3.566152240042292, "learning_rate": 3.9767019064118116e-07, "loss": 1.04, "step": 88200 }, { "epoch": 2.600766622438449, "grad_norm": 3.5912567384906064, "learning_rate": 3.973819506153495e-07, "loss": 1.0718, "step": 88205 }, { "epoch": 2.6009140498304584, "grad_norm": 3.4912018322832927, "learning_rate": 3.970938092441323e-07, "loss": 1.0772, "step": 88210 }, { "epoch": 2.601061477222468, "grad_norm": 3.583838210980896, "learning_rate": 3.9680576653600823e-07, "loss": 1.0126, "step": 88215 }, { "epoch": 2.6012089046144773, "grad_norm": 3.5286018884447223, "learning_rate": 3.965178224994538e-07, "loss": 1.0843, "step": 88220 }, { "epoch": 2.6013563320064867, "grad_norm": 3.393513922026856, "learning_rate": 3.9622997714294106e-07, "loss": 1.0271, "step": 88225 }, { "epoch": 2.601503759398496, "grad_norm": 3.633130714132643, "learning_rate": 3.959422304749423e-07, "loss": 1.0781, "step": 88230 }, { "epoch": 2.6016511867905057, "grad_norm": 3.5168694930022735, "learning_rate": 3.9565458250392205e-07, "loss": 1.0555, "step": 88235 }, { "epoch": 2.601798614182515, "grad_norm": 3.5375996277977984, "learning_rate": 3.9536703323834603e-07, "loss": 1.0398, "step": 88240 }, { "epoch": 2.6019460415745246, "grad_norm": 3.420060873220055, "learning_rate": 3.9507958268667583e-07, "loss": 1.0381, "step": 88245 }, { "epoch": 2.602093468966534, "grad_norm": 3.53973406143328, "learning_rate": 3.947922308573701e-07, "loss": 1.1065, "step": 88250 }, { "epoch": 2.6022408963585435, "grad_norm": 3.464236475144249, "learning_rate": 3.9450497775888455e-07, "loss": 1.0589, "step": 88255 }, { "epoch": 2.602388323750553, "grad_norm": 3.5195393208459183, "learning_rate": 3.942178233996703e-07, "loss": 1.0524, "step": 88260 }, { "epoch": 2.6025357511425624, "grad_norm": 3.497762938125991, "learning_rate": 3.9393076778817984e-07, "loss": 1.0387, "step": 88265 }, { "epoch": 2.602683178534572, "grad_norm": 3.5145103847576586, "learning_rate": 3.9364381093285716e-07, "loss": 1.0664, "step": 88270 }, { "epoch": 2.602830605926581, "grad_norm": 3.3965705441833345, "learning_rate": 3.933569528421498e-07, "loss": 1.0159, "step": 88275 }, { "epoch": 2.602978033318591, "grad_norm": 3.4514790239462187, "learning_rate": 3.9307019352449585e-07, "loss": 1.0503, "step": 88280 }, { "epoch": 2.6031254607106, "grad_norm": 3.6844063933436275, "learning_rate": 3.927835329883349e-07, "loss": 1.0649, "step": 88285 }, { "epoch": 2.6032728881026097, "grad_norm": 3.3126369905748807, "learning_rate": 3.9249697124210184e-07, "loss": 1.0346, "step": 88290 }, { "epoch": 2.6034203154946187, "grad_norm": 3.4791785173313166, "learning_rate": 3.922105082942296e-07, "loss": 1.0574, "step": 88295 }, { "epoch": 2.603567742886628, "grad_norm": 3.615353105494287, "learning_rate": 3.9192414415314793e-07, "loss": 1.0707, "step": 88300 }, { "epoch": 2.6037151702786376, "grad_norm": 3.591388240711403, "learning_rate": 3.916378788272819e-07, "loss": 1.0642, "step": 88305 }, { "epoch": 2.603862597670647, "grad_norm": 3.58219325421948, "learning_rate": 3.913517123250564e-07, "loss": 1.0192, "step": 88310 }, { "epoch": 2.6040100250626566, "grad_norm": 3.4605663072937984, "learning_rate": 3.910656446548917e-07, "loss": 1.0662, "step": 88315 }, { "epoch": 2.604157452454666, "grad_norm": 3.405027164378707, "learning_rate": 3.907796758252058e-07, "loss": 1.053, "step": 88320 }, { "epoch": 2.6043048798466755, "grad_norm": 3.5769365507791675, "learning_rate": 3.9049380584441396e-07, "loss": 1.0488, "step": 88325 }, { "epoch": 2.604452307238685, "grad_norm": 3.6847458501292403, "learning_rate": 3.902080347209282e-07, "loss": 1.0754, "step": 88330 }, { "epoch": 2.6045997346306944, "grad_norm": 3.551900885119167, "learning_rate": 3.899223624631576e-07, "loss": 1.0582, "step": 88335 }, { "epoch": 2.604747162022704, "grad_norm": 3.4465554863654075, "learning_rate": 3.8963678907950843e-07, "loss": 1.0191, "step": 88340 }, { "epoch": 2.6048945894147133, "grad_norm": 3.4358554748793484, "learning_rate": 3.893513145783831e-07, "loss": 1.0719, "step": 88345 }, { "epoch": 2.6050420168067228, "grad_norm": 3.4786621644514306, "learning_rate": 3.890659389681836e-07, "loss": 1.1038, "step": 88350 }, { "epoch": 2.605189444198732, "grad_norm": 3.5623258874411756, "learning_rate": 3.887806622573062e-07, "loss": 1.0901, "step": 88355 }, { "epoch": 2.6053368715907417, "grad_norm": 3.521479385769293, "learning_rate": 3.884954844541462e-07, "loss": 1.0361, "step": 88360 }, { "epoch": 2.605484298982751, "grad_norm": 3.369442339447927, "learning_rate": 3.882104055670953e-07, "loss": 1.0497, "step": 88365 }, { "epoch": 2.6056317263747606, "grad_norm": 3.682291257632034, "learning_rate": 3.8792542560454084e-07, "loss": 1.0391, "step": 88370 }, { "epoch": 2.60577915376677, "grad_norm": 3.5122952644019296, "learning_rate": 3.8764054457487127e-07, "loss": 1.03, "step": 88375 }, { "epoch": 2.605926581158779, "grad_norm": 3.4697221283486237, "learning_rate": 3.8735576248646725e-07, "loss": 1.0104, "step": 88380 }, { "epoch": 2.606074008550789, "grad_norm": 3.5686620858545806, "learning_rate": 3.870710793477096e-07, "loss": 1.0518, "step": 88385 }, { "epoch": 2.606221435942798, "grad_norm": 3.3741852571869937, "learning_rate": 3.867864951669754e-07, "loss": 1.0301, "step": 88390 }, { "epoch": 2.606368863334808, "grad_norm": 3.354047462885022, "learning_rate": 3.865020099526392e-07, "loss": 0.9882, "step": 88395 }, { "epoch": 2.606516290726817, "grad_norm": 3.4814257438814407, "learning_rate": 3.8621762371307186e-07, "loss": 1.0418, "step": 88400 }, { "epoch": 2.6066637181188264, "grad_norm": 3.4474648667235974, "learning_rate": 3.8593333645664197e-07, "loss": 1.0382, "step": 88405 }, { "epoch": 2.606811145510836, "grad_norm": 3.6313316125590682, "learning_rate": 3.856491481917154e-07, "loss": 1.0405, "step": 88410 }, { "epoch": 2.6069585729028453, "grad_norm": 3.4679661408505953, "learning_rate": 3.853650589266547e-07, "loss": 1.0136, "step": 88415 }, { "epoch": 2.6071060002948547, "grad_norm": 3.6167230879024492, "learning_rate": 3.8508106866981763e-07, "loss": 1.0405, "step": 88420 }, { "epoch": 2.607253427686864, "grad_norm": 3.5064452481938044, "learning_rate": 3.847971774295643e-07, "loss": 1.0504, "step": 88425 }, { "epoch": 2.6074008550788736, "grad_norm": 3.4219535752837618, "learning_rate": 3.8451338521424543e-07, "loss": 1.0208, "step": 88430 }, { "epoch": 2.607548282470883, "grad_norm": 3.4582124313483997, "learning_rate": 3.842296920322136e-07, "loss": 1.0078, "step": 88435 }, { "epoch": 2.6076957098628926, "grad_norm": 3.4017143605992715, "learning_rate": 3.8394609789181703e-07, "loss": 1.0655, "step": 88440 }, { "epoch": 2.607843137254902, "grad_norm": 3.5559536480453393, "learning_rate": 3.836626028013987e-07, "loss": 1.061, "step": 88445 }, { "epoch": 2.6079905646469115, "grad_norm": 3.577124586448806, "learning_rate": 3.8337920676930355e-07, "loss": 1.0142, "step": 88450 }, { "epoch": 2.608137992038921, "grad_norm": 3.560953226606122, "learning_rate": 3.830959098038683e-07, "loss": 1.0323, "step": 88455 }, { "epoch": 2.6082854194309304, "grad_norm": 3.410162568775204, "learning_rate": 3.828127119134317e-07, "loss": 1.0337, "step": 88460 }, { "epoch": 2.60843284682294, "grad_norm": 3.516534497330036, "learning_rate": 3.8252961310632534e-07, "loss": 1.0254, "step": 88465 }, { "epoch": 2.6085802742149493, "grad_norm": 3.5360572439504505, "learning_rate": 3.822466133908806e-07, "loss": 1.0478, "step": 88470 }, { "epoch": 2.6087277016069583, "grad_norm": 3.514069056671521, "learning_rate": 3.819637127754244e-07, "loss": 1.0479, "step": 88475 }, { "epoch": 2.6088751289989682, "grad_norm": 3.3049434341435866, "learning_rate": 3.816809112682823e-07, "loss": 1.0522, "step": 88480 }, { "epoch": 2.6090225563909772, "grad_norm": 3.548059682143791, "learning_rate": 3.813982088777751e-07, "loss": 1.0515, "step": 88485 }, { "epoch": 2.609169983782987, "grad_norm": 3.3363464067315216, "learning_rate": 3.8111560561222324e-07, "loss": 1.0377, "step": 88490 }, { "epoch": 2.609317411174996, "grad_norm": 3.4015298772299554, "learning_rate": 3.8083310147994086e-07, "loss": 1.0518, "step": 88495 }, { "epoch": 2.6094648385670056, "grad_norm": 3.7475948841469413, "learning_rate": 3.8055069648924135e-07, "loss": 1.0243, "step": 88500 }, { "epoch": 2.6094648385670056, "eval_loss": 1.0783352851867676, "eval_runtime": 4.1794, "eval_samples_per_second": 94.75, "eval_steps_per_second": 3.11, "step": 88500 }, { "epoch": 2.609612265959015, "grad_norm": 3.4742439405766126, "learning_rate": 3.802683906484355e-07, "loss": 1.0335, "step": 88505 }, { "epoch": 2.6097596933510245, "grad_norm": 3.5707849048335962, "learning_rate": 3.799861839658297e-07, "loss": 1.0487, "step": 88510 }, { "epoch": 2.609907120743034, "grad_norm": 3.4140400420665586, "learning_rate": 3.7970407644972935e-07, "loss": 1.0183, "step": 88515 }, { "epoch": 2.6100545481350435, "grad_norm": 3.5086182161263797, "learning_rate": 3.79422068108434e-07, "loss": 1.0513, "step": 88520 }, { "epoch": 2.610201975527053, "grad_norm": 3.5011141268204744, "learning_rate": 3.791401589502441e-07, "loss": 1.0302, "step": 88525 }, { "epoch": 2.6103494029190624, "grad_norm": 3.4191462162380386, "learning_rate": 3.7885834898345267e-07, "loss": 1.043, "step": 88530 }, { "epoch": 2.610496830311072, "grad_norm": 3.725659336870022, "learning_rate": 3.7857663821635553e-07, "loss": 1.0708, "step": 88535 }, { "epoch": 2.6106442577030813, "grad_norm": 3.5491377384776888, "learning_rate": 3.7829502665723976e-07, "loss": 1.0682, "step": 88540 }, { "epoch": 2.6107916850950907, "grad_norm": 3.688180077983325, "learning_rate": 3.7801351431439255e-07, "loss": 1.0901, "step": 88545 }, { "epoch": 2.6109391124871, "grad_norm": 3.688400227304069, "learning_rate": 3.7773210119609854e-07, "loss": 1.0823, "step": 88550 }, { "epoch": 2.6110865398791097, "grad_norm": 3.3909536770122886, "learning_rate": 3.774507873106381e-07, "loss": 1.0039, "step": 88555 }, { "epoch": 2.611233967271119, "grad_norm": 3.446056305852263, "learning_rate": 3.7716957266629006e-07, "loss": 1.0444, "step": 88560 }, { "epoch": 2.6113813946631286, "grad_norm": 3.3222428415249623, "learning_rate": 3.7688845727132784e-07, "loss": 1.0101, "step": 88565 }, { "epoch": 2.6115288220551376, "grad_norm": 3.6315063167898978, "learning_rate": 3.7660744113402435e-07, "loss": 1.0219, "step": 88570 }, { "epoch": 2.6116762494471475, "grad_norm": 3.4263026131933545, "learning_rate": 3.763265242626489e-07, "loss": 1.0383, "step": 88575 }, { "epoch": 2.6118236768391565, "grad_norm": 3.6220464421958236, "learning_rate": 3.7604570666546807e-07, "loss": 1.0841, "step": 88580 }, { "epoch": 2.6119711042311664, "grad_norm": 3.6440631361737195, "learning_rate": 3.75764988350745e-07, "loss": 1.0683, "step": 88585 }, { "epoch": 2.6121185316231754, "grad_norm": 3.377988815381417, "learning_rate": 3.7548436932674e-07, "loss": 1.0277, "step": 88590 }, { "epoch": 2.612265959015185, "grad_norm": 3.4810274122121974, "learning_rate": 3.752038496017107e-07, "loss": 1.061, "step": 88595 }, { "epoch": 2.6124133864071943, "grad_norm": 3.354966208018075, "learning_rate": 3.749234291839122e-07, "loss": 1.0291, "step": 88600 }, { "epoch": 2.612560813799204, "grad_norm": 3.4854556039231386, "learning_rate": 3.746431080815946e-07, "loss": 0.9959, "step": 88605 }, { "epoch": 2.6127082411912133, "grad_norm": 3.561867850464919, "learning_rate": 3.7436288630300906e-07, "loss": 1.0205, "step": 88610 }, { "epoch": 2.6128556685832227, "grad_norm": 3.51552501541785, "learning_rate": 3.7408276385639953e-07, "loss": 1.0503, "step": 88615 }, { "epoch": 2.613003095975232, "grad_norm": 3.4853063141947733, "learning_rate": 3.7380274075000983e-07, "loss": 1.011, "step": 88620 }, { "epoch": 2.6131505233672416, "grad_norm": 3.575971782958274, "learning_rate": 3.7352281699208e-07, "loss": 1.0949, "step": 88625 }, { "epoch": 2.613297950759251, "grad_norm": 3.4966993233972077, "learning_rate": 3.7324299259084553e-07, "loss": 1.0261, "step": 88630 }, { "epoch": 2.6134453781512605, "grad_norm": 3.54750125474956, "learning_rate": 3.729632675545436e-07, "loss": 1.0737, "step": 88635 }, { "epoch": 2.61359280554327, "grad_norm": 3.4048598279051787, "learning_rate": 3.726836418914026e-07, "loss": 0.9976, "step": 88640 }, { "epoch": 2.6137402329352795, "grad_norm": 3.4751788846410716, "learning_rate": 3.724041156096522e-07, "loss": 1.0296, "step": 88645 }, { "epoch": 2.613887660327289, "grad_norm": 3.6423236897169327, "learning_rate": 3.721246887175171e-07, "loss": 1.0508, "step": 88650 }, { "epoch": 2.6140350877192984, "grad_norm": 3.562046637538573, "learning_rate": 3.7184536122322033e-07, "loss": 1.0278, "step": 88655 }, { "epoch": 2.614182515111308, "grad_norm": 3.437128263393272, "learning_rate": 3.715661331349815e-07, "loss": 1.0476, "step": 88660 }, { "epoch": 2.614329942503317, "grad_norm": 3.6673658349320135, "learning_rate": 3.7128700446101655e-07, "loss": 1.0891, "step": 88665 }, { "epoch": 2.6144773698953268, "grad_norm": 3.4726779909425747, "learning_rate": 3.710079752095398e-07, "loss": 0.992, "step": 88670 }, { "epoch": 2.6146247972873358, "grad_norm": 3.458971714314834, "learning_rate": 3.707290453887625e-07, "loss": 1.0508, "step": 88675 }, { "epoch": 2.6147722246793457, "grad_norm": 3.477840162265026, "learning_rate": 3.7045021500689074e-07, "loss": 1.0216, "step": 88680 }, { "epoch": 2.6149196520713547, "grad_norm": 3.488746094844489, "learning_rate": 3.7017148407213155e-07, "loss": 1.012, "step": 88685 }, { "epoch": 2.615067079463364, "grad_norm": 3.4338519503199003, "learning_rate": 3.6989285259268463e-07, "loss": 1.0603, "step": 88690 }, { "epoch": 2.6152145068553736, "grad_norm": 3.5195770719648998, "learning_rate": 3.6961432057675065e-07, "loss": 1.05, "step": 88695 }, { "epoch": 2.615361934247383, "grad_norm": 3.5170483905138124, "learning_rate": 3.6933588803252623e-07, "loss": 1.0503, "step": 88700 }, { "epoch": 2.6155093616393925, "grad_norm": 3.552595196700573, "learning_rate": 3.6905755496820156e-07, "loss": 1.0233, "step": 88705 }, { "epoch": 2.615656789031402, "grad_norm": 3.3640743919419602, "learning_rate": 3.6877932139197087e-07, "loss": 1.0079, "step": 88710 }, { "epoch": 2.6158042164234114, "grad_norm": 3.4691683211623396, "learning_rate": 3.685011873120181e-07, "loss": 1.037, "step": 88715 }, { "epoch": 2.615951643815421, "grad_norm": 3.477379166137296, "learning_rate": 3.682231527365304e-07, "loss": 1.0693, "step": 88720 }, { "epoch": 2.6160990712074303, "grad_norm": 3.5760755346504376, "learning_rate": 3.679452176736875e-07, "loss": 1.0604, "step": 88725 }, { "epoch": 2.61624649859944, "grad_norm": 3.515315887988839, "learning_rate": 3.6766738213166824e-07, "loss": 1.0819, "step": 88730 }, { "epoch": 2.6163939259914493, "grad_norm": 3.4493309411996345, "learning_rate": 3.6738964611864867e-07, "loss": 1.0652, "step": 88735 }, { "epoch": 2.6165413533834587, "grad_norm": 3.4494616318172318, "learning_rate": 3.671120096428013e-07, "loss": 1.0521, "step": 88740 }, { "epoch": 2.616688780775468, "grad_norm": 3.5117211665431096, "learning_rate": 3.6683447271229587e-07, "loss": 1.0397, "step": 88745 }, { "epoch": 2.6168362081674776, "grad_norm": 3.4922080400929185, "learning_rate": 3.665570353353001e-07, "loss": 1.0259, "step": 88750 }, { "epoch": 2.616983635559487, "grad_norm": 3.6816445418305865, "learning_rate": 3.6627969751997614e-07, "loss": 1.0538, "step": 88755 }, { "epoch": 2.617131062951496, "grad_norm": 3.4474291417904848, "learning_rate": 3.660024592744858e-07, "loss": 1.0316, "step": 88760 }, { "epoch": 2.617278490343506, "grad_norm": 3.5820228981734523, "learning_rate": 3.6572532060698757e-07, "loss": 1.0116, "step": 88765 }, { "epoch": 2.617425917735515, "grad_norm": 3.4825688169458564, "learning_rate": 3.6544828152563576e-07, "loss": 1.0308, "step": 88770 }, { "epoch": 2.617573345127525, "grad_norm": 3.484795281622926, "learning_rate": 3.6517134203858423e-07, "loss": 1.0618, "step": 88775 }, { "epoch": 2.617720772519534, "grad_norm": 3.590272925531933, "learning_rate": 3.64894502153979e-07, "loss": 1.0497, "step": 88780 }, { "epoch": 2.6178681999115434, "grad_norm": 3.5397133771823466, "learning_rate": 3.646177618799702e-07, "loss": 1.0308, "step": 88785 }, { "epoch": 2.618015627303553, "grad_norm": 3.339519713793476, "learning_rate": 3.64341121224698e-07, "loss": 1.0, "step": 88790 }, { "epoch": 2.6181630546955623, "grad_norm": 3.4400130499159807, "learning_rate": 3.640645801963059e-07, "loss": 1.0621, "step": 88795 }, { "epoch": 2.6183104820875718, "grad_norm": 3.3282770354945543, "learning_rate": 3.63788138802929e-07, "loss": 1.0432, "step": 88800 }, { "epoch": 2.6184579094795812, "grad_norm": 3.630541675434963, "learning_rate": 3.63511797052703e-07, "loss": 1.0699, "step": 88805 }, { "epoch": 2.6186053368715907, "grad_norm": 3.4451641645481854, "learning_rate": 3.6323555495375913e-07, "loss": 1.0523, "step": 88810 }, { "epoch": 2.6187527642636, "grad_norm": 3.5298671134458983, "learning_rate": 3.6295941251422645e-07, "loss": 1.0674, "step": 88815 }, { "epoch": 2.6189001916556096, "grad_norm": 3.455253239417263, "learning_rate": 3.626833697422309e-07, "loss": 1.0449, "step": 88820 }, { "epoch": 2.619047619047619, "grad_norm": 3.5584484505035587, "learning_rate": 3.6240742664589476e-07, "loss": 1.0422, "step": 88825 }, { "epoch": 2.6191950464396285, "grad_norm": 3.6522938840939276, "learning_rate": 3.6213158323333857e-07, "loss": 1.0694, "step": 88830 }, { "epoch": 2.619342473831638, "grad_norm": 3.493387304741501, "learning_rate": 3.618558395126788e-07, "loss": 1.0781, "step": 88835 }, { "epoch": 2.6194899012236474, "grad_norm": 3.4481873620369625, "learning_rate": 3.6158019549202974e-07, "loss": 1.1094, "step": 88840 }, { "epoch": 2.619637328615657, "grad_norm": 3.5785341097977903, "learning_rate": 3.613046511795025e-07, "loss": 1.0422, "step": 88845 }, { "epoch": 2.6197847560076664, "grad_norm": 3.588376877598435, "learning_rate": 3.610292065832055e-07, "loss": 1.0198, "step": 88850 }, { "epoch": 2.619932183399676, "grad_norm": 3.4437432905124683, "learning_rate": 3.6075386171124396e-07, "loss": 1.024, "step": 88855 }, { "epoch": 2.6200796107916853, "grad_norm": 3.503257779270439, "learning_rate": 3.6047861657172095e-07, "loss": 1.0348, "step": 88860 }, { "epoch": 2.6202270381836943, "grad_norm": 3.5671368893631445, "learning_rate": 3.602034711727338e-07, "loss": 1.0445, "step": 88865 }, { "epoch": 2.620374465575704, "grad_norm": 3.743845922546749, "learning_rate": 3.5992842552238144e-07, "loss": 1.0478, "step": 88870 }, { "epoch": 2.620521892967713, "grad_norm": 3.381125373234748, "learning_rate": 3.5965347962875525e-07, "loss": 1.0266, "step": 88875 }, { "epoch": 2.620669320359723, "grad_norm": 3.607951413280661, "learning_rate": 3.5937863349994717e-07, "loss": 1.0206, "step": 88880 }, { "epoch": 2.620816747751732, "grad_norm": 3.5201378374809082, "learning_rate": 3.5910388714404483e-07, "loss": 1.0745, "step": 88885 }, { "epoch": 2.6209641751437416, "grad_norm": 3.5769133980177235, "learning_rate": 3.588292405691314e-07, "loss": 1.0261, "step": 88890 }, { "epoch": 2.621111602535751, "grad_norm": 3.587082093054016, "learning_rate": 3.585546937832916e-07, "loss": 1.0277, "step": 88895 }, { "epoch": 2.6212590299277605, "grad_norm": 3.409014864058557, "learning_rate": 3.5828024679460155e-07, "loss": 1.0639, "step": 88900 }, { "epoch": 2.62140645731977, "grad_norm": 3.427167927708203, "learning_rate": 3.580058996111381e-07, "loss": 1.0503, "step": 88905 }, { "epoch": 2.6215538847117794, "grad_norm": 3.4147350581218694, "learning_rate": 3.5773165224097433e-07, "loss": 1.0438, "step": 88910 }, { "epoch": 2.621701312103789, "grad_norm": 3.4932520187536773, "learning_rate": 3.5745750469218047e-07, "loss": 1.0252, "step": 88915 }, { "epoch": 2.6218487394957983, "grad_norm": 3.5826111293382694, "learning_rate": 3.57183456972823e-07, "loss": 1.0234, "step": 88920 }, { "epoch": 2.621996166887808, "grad_norm": 3.5216119366823797, "learning_rate": 3.5690950909096674e-07, "loss": 1.0122, "step": 88925 }, { "epoch": 2.6221435942798172, "grad_norm": 3.4341958744619507, "learning_rate": 3.566356610546727e-07, "loss": 1.062, "step": 88930 }, { "epoch": 2.6222910216718267, "grad_norm": 3.562784300945192, "learning_rate": 3.563619128719998e-07, "loss": 1.0263, "step": 88935 }, { "epoch": 2.622438449063836, "grad_norm": 3.5439291462105613, "learning_rate": 3.5608826455100177e-07, "loss": 1.0339, "step": 88940 }, { "epoch": 2.6225858764558456, "grad_norm": 3.5603217216770497, "learning_rate": 3.5581471609973285e-07, "loss": 1.0464, "step": 88945 }, { "epoch": 2.622733303847855, "grad_norm": 3.3947359077078056, "learning_rate": 3.555412675262412e-07, "loss": 1.0112, "step": 88950 }, { "epoch": 2.6228807312398645, "grad_norm": 3.5205182355879034, "learning_rate": 3.552679188385738e-07, "loss": 1.0478, "step": 88955 }, { "epoch": 2.6230281586318736, "grad_norm": 3.3070470130012914, "learning_rate": 3.5499467004477493e-07, "loss": 1.032, "step": 88960 }, { "epoch": 2.6231755860238835, "grad_norm": 3.3306323379242904, "learning_rate": 3.547215211528829e-07, "loss": 1.0204, "step": 88965 }, { "epoch": 2.6233230134158925, "grad_norm": 3.5834897164513664, "learning_rate": 3.54448472170939e-07, "loss": 1.0627, "step": 88970 }, { "epoch": 2.6234704408079024, "grad_norm": 3.493521550612455, "learning_rate": 3.5417552310697446e-07, "loss": 1.0435, "step": 88975 }, { "epoch": 2.6236178681999114, "grad_norm": 3.591129076639889, "learning_rate": 3.5390267396902403e-07, "loss": 1.0791, "step": 88980 }, { "epoch": 2.623765295591921, "grad_norm": 3.4508662791492744, "learning_rate": 3.536299247651151e-07, "loss": 1.0332, "step": 88985 }, { "epoch": 2.6239127229839303, "grad_norm": 3.5485996626377454, "learning_rate": 3.533572755032737e-07, "loss": 1.0492, "step": 88990 }, { "epoch": 2.6240601503759398, "grad_norm": 3.4895689535170464, "learning_rate": 3.530847261915229e-07, "loss": 1.0334, "step": 88995 }, { "epoch": 2.624207577767949, "grad_norm": 3.4202686290250486, "learning_rate": 3.5281227683788276e-07, "loss": 1.0071, "step": 89000 }, { "epoch": 2.624207577767949, "eval_loss": 1.0782111883163452, "eval_runtime": 4.3318, "eval_samples_per_second": 91.417, "eval_steps_per_second": 3.001, "step": 89000 }, { "epoch": 2.6243550051599587, "grad_norm": 3.636587034590064, "learning_rate": 3.5253992745037086e-07, "loss": 1.0666, "step": 89005 }, { "epoch": 2.624502432551968, "grad_norm": 3.545197867289085, "learning_rate": 3.522676780370017e-07, "loss": 1.0034, "step": 89010 }, { "epoch": 2.6246498599439776, "grad_norm": 3.5539521284984215, "learning_rate": 3.5199552860578505e-07, "loss": 1.0357, "step": 89015 }, { "epoch": 2.624797287335987, "grad_norm": 3.358203138848337, "learning_rate": 3.5172347916473043e-07, "loss": 1.0518, "step": 89020 }, { "epoch": 2.6249447147279965, "grad_norm": 3.4621814703237708, "learning_rate": 3.514515297218426e-07, "loss": 1.0536, "step": 89025 }, { "epoch": 2.625092142120006, "grad_norm": 3.675598734773371, "learning_rate": 3.51179680285124e-07, "loss": 1.0654, "step": 89030 }, { "epoch": 2.6252395695120154, "grad_norm": 3.6170052867289777, "learning_rate": 3.5090793086257565e-07, "loss": 1.0722, "step": 89035 }, { "epoch": 2.625386996904025, "grad_norm": 3.538215213740233, "learning_rate": 3.506362814621912e-07, "loss": 1.0393, "step": 89040 }, { "epoch": 2.6255344242960343, "grad_norm": 3.5843442839453754, "learning_rate": 3.5036473209196674e-07, "loss": 1.0747, "step": 89045 }, { "epoch": 2.625681851688044, "grad_norm": 3.57288832371312, "learning_rate": 3.500932827598913e-07, "loss": 1.0461, "step": 89050 }, { "epoch": 2.625829279080053, "grad_norm": 3.381008597247053, "learning_rate": 3.498219334739543e-07, "loss": 1.022, "step": 89055 }, { "epoch": 2.6259767064720627, "grad_norm": 3.598154526613061, "learning_rate": 3.495506842421389e-07, "loss": 1.0587, "step": 89060 }, { "epoch": 2.6261241338640717, "grad_norm": 3.6081782612647566, "learning_rate": 3.492795350724276e-07, "loss": 1.0156, "step": 89065 }, { "epoch": 2.6262715612560816, "grad_norm": 3.5376800158094865, "learning_rate": 3.490084859727989e-07, "loss": 1.0192, "step": 89070 }, { "epoch": 2.6264189886480906, "grad_norm": 3.775264366086664, "learning_rate": 3.487375369512294e-07, "loss": 1.0735, "step": 89075 }, { "epoch": 2.6265664160401, "grad_norm": 3.6745659778650417, "learning_rate": 3.4846668801569224e-07, "loss": 1.0849, "step": 89080 }, { "epoch": 2.6267138434321096, "grad_norm": 3.4314875599253383, "learning_rate": 3.481959391741561e-07, "loss": 1.015, "step": 89085 }, { "epoch": 2.626861270824119, "grad_norm": 3.487299328463364, "learning_rate": 3.479252904345892e-07, "loss": 1.0503, "step": 89090 }, { "epoch": 2.6270086982161285, "grad_norm": 3.7231372053975242, "learning_rate": 3.476547418049551e-07, "loss": 1.0383, "step": 89095 }, { "epoch": 2.627156125608138, "grad_norm": 3.716139038503173, "learning_rate": 3.473842932932154e-07, "loss": 1.053, "step": 89100 }, { "epoch": 2.6273035530001474, "grad_norm": 3.6036876087943166, "learning_rate": 3.4711394490732835e-07, "loss": 1.0287, "step": 89105 }, { "epoch": 2.627450980392157, "grad_norm": 3.639552540333691, "learning_rate": 3.468436966552492e-07, "loss": 1.0441, "step": 89110 }, { "epoch": 2.6275984077841663, "grad_norm": 3.5761626065942895, "learning_rate": 3.4657354854493035e-07, "loss": 1.0397, "step": 89115 }, { "epoch": 2.6277458351761758, "grad_norm": 3.43292415412287, "learning_rate": 3.4630350058432167e-07, "loss": 1.0216, "step": 89120 }, { "epoch": 2.6278932625681852, "grad_norm": 3.5910215432942154, "learning_rate": 3.460335527813677e-07, "loss": 1.0714, "step": 89125 }, { "epoch": 2.6280406899601947, "grad_norm": 3.558966564514715, "learning_rate": 3.457637051440145e-07, "loss": 1.0249, "step": 89130 }, { "epoch": 2.628188117352204, "grad_norm": 3.3881527947507206, "learning_rate": 3.454939576802012e-07, "loss": 1.0025, "step": 89135 }, { "epoch": 2.6283355447442136, "grad_norm": 3.523765906542011, "learning_rate": 3.4522431039786554e-07, "loss": 1.0738, "step": 89140 }, { "epoch": 2.628482972136223, "grad_norm": 3.601177619371247, "learning_rate": 3.4495476330494297e-07, "loss": 1.0534, "step": 89145 }, { "epoch": 2.628630399528232, "grad_norm": 3.508420254700519, "learning_rate": 3.446853164093637e-07, "loss": 1.0567, "step": 89150 }, { "epoch": 2.628777826920242, "grad_norm": 3.6219274204439165, "learning_rate": 3.4441596971905855e-07, "loss": 1.0771, "step": 89155 }, { "epoch": 2.628925254312251, "grad_norm": 3.636914929383311, "learning_rate": 3.441467232419515e-07, "loss": 1.0937, "step": 89160 }, { "epoch": 2.629072681704261, "grad_norm": 3.5210322645539267, "learning_rate": 3.438775769859663e-07, "loss": 1.0214, "step": 89165 }, { "epoch": 2.62922010909627, "grad_norm": 3.5769717614713588, "learning_rate": 3.4360853095902283e-07, "loss": 1.0286, "step": 89170 }, { "epoch": 2.6293675364882794, "grad_norm": 3.5098361756490397, "learning_rate": 3.433395851690377e-07, "loss": 1.0602, "step": 89175 }, { "epoch": 2.629514963880289, "grad_norm": 3.51659036568177, "learning_rate": 3.430707396239258e-07, "loss": 1.004, "step": 89180 }, { "epoch": 2.6296623912722983, "grad_norm": 3.3505630567862026, "learning_rate": 3.42801994331597e-07, "loss": 1.0195, "step": 89185 }, { "epoch": 2.6298098186643077, "grad_norm": 3.5113032585511794, "learning_rate": 3.425333492999609e-07, "loss": 1.0489, "step": 89190 }, { "epoch": 2.629957246056317, "grad_norm": 3.541907028328937, "learning_rate": 3.42264804536922e-07, "loss": 1.0722, "step": 89195 }, { "epoch": 2.6301046734483267, "grad_norm": 3.6076410514089545, "learning_rate": 3.419963600503814e-07, "loss": 1.036, "step": 89200 }, { "epoch": 2.630252100840336, "grad_norm": 3.563643802346057, "learning_rate": 3.417280158482408e-07, "loss": 1.0835, "step": 89205 }, { "epoch": 2.6303995282323456, "grad_norm": 3.729151739596006, "learning_rate": 3.4145977193839424e-07, "loss": 1.0691, "step": 89210 }, { "epoch": 2.630546955624355, "grad_norm": 3.4967960327235703, "learning_rate": 3.4119162832873627e-07, "loss": 1.0039, "step": 89215 }, { "epoch": 2.6306943830163645, "grad_norm": 3.45529538027394, "learning_rate": 3.4092358502715804e-07, "loss": 1.0656, "step": 89220 }, { "epoch": 2.630841810408374, "grad_norm": 3.426223155867188, "learning_rate": 3.406556420415441e-07, "loss": 1.0246, "step": 89225 }, { "epoch": 2.6309892378003834, "grad_norm": 3.3146432176024643, "learning_rate": 3.403877993797827e-07, "loss": 1.0275, "step": 89230 }, { "epoch": 2.631136665192393, "grad_norm": 3.5958098065678055, "learning_rate": 3.4012005704975217e-07, "loss": 1.0527, "step": 89235 }, { "epoch": 2.6312840925844023, "grad_norm": 3.542627545518922, "learning_rate": 3.3985241505933406e-07, "loss": 0.9976, "step": 89240 }, { "epoch": 2.631431519976412, "grad_norm": 3.4456256222770327, "learning_rate": 3.395848734164021e-07, "loss": 1.0072, "step": 89245 }, { "epoch": 2.6315789473684212, "grad_norm": 3.7663561569777646, "learning_rate": 3.393174321288292e-07, "loss": 1.0416, "step": 89250 }, { "epoch": 2.6317263747604303, "grad_norm": 3.5875193794018316, "learning_rate": 3.390500912044857e-07, "loss": 1.0637, "step": 89255 }, { "epoch": 2.63187380215244, "grad_norm": 3.428493200209043, "learning_rate": 3.3878285065123777e-07, "loss": 1.0452, "step": 89260 }, { "epoch": 2.632021229544449, "grad_norm": 3.6987728633931862, "learning_rate": 3.3851571047695e-07, "loss": 1.0663, "step": 89265 }, { "epoch": 2.6321686569364586, "grad_norm": 3.577892500570337, "learning_rate": 3.3824867068948354e-07, "loss": 1.0074, "step": 89270 }, { "epoch": 2.632316084328468, "grad_norm": 3.3889441455616702, "learning_rate": 3.3798173129669515e-07, "loss": 1.0023, "step": 89275 }, { "epoch": 2.6324635117204775, "grad_norm": 3.5191970498244665, "learning_rate": 3.3771489230644046e-07, "loss": 1.0156, "step": 89280 }, { "epoch": 2.632610939112487, "grad_norm": 3.565896727803849, "learning_rate": 3.374481537265717e-07, "loss": 1.0315, "step": 89285 }, { "epoch": 2.6327583665044965, "grad_norm": 3.4033587160626024, "learning_rate": 3.371815155649374e-07, "loss": 1.0743, "step": 89290 }, { "epoch": 2.632905793896506, "grad_norm": 3.6470875648401737, "learning_rate": 3.3691497782938477e-07, "loss": 1.0347, "step": 89295 }, { "epoch": 2.6330532212885154, "grad_norm": 3.5694407490574225, "learning_rate": 3.3664854052775503e-07, "loss": 1.0696, "step": 89300 }, { "epoch": 2.633200648680525, "grad_norm": 3.4552717357994642, "learning_rate": 3.3638220366789056e-07, "loss": 1.0454, "step": 89305 }, { "epoch": 2.6333480760725343, "grad_norm": 3.4612756583394275, "learning_rate": 3.361159672576268e-07, "loss": 1.0102, "step": 89310 }, { "epoch": 2.6334955034645438, "grad_norm": 3.5220243546572134, "learning_rate": 3.358498313048e-07, "loss": 1.0278, "step": 89315 }, { "epoch": 2.633642930856553, "grad_norm": 3.4278985893864315, "learning_rate": 3.3558379581724014e-07, "loss": 1.0279, "step": 89320 }, { "epoch": 2.6337903582485627, "grad_norm": 3.445845176258756, "learning_rate": 3.3531786080277556e-07, "loss": 1.0112, "step": 89325 }, { "epoch": 2.633937785640572, "grad_norm": 3.5719519564379154, "learning_rate": 3.3505202626923246e-07, "loss": 1.0362, "step": 89330 }, { "epoch": 2.6340852130325816, "grad_norm": 3.378083804866045, "learning_rate": 3.3478629222443254e-07, "loss": 1.0384, "step": 89335 }, { "epoch": 2.634232640424591, "grad_norm": 3.689370955713623, "learning_rate": 3.3452065867619663e-07, "loss": 1.0184, "step": 89340 }, { "epoch": 2.6343800678166005, "grad_norm": 3.5323261782325095, "learning_rate": 3.3425512563233966e-07, "loss": 1.0629, "step": 89345 }, { "epoch": 2.6345274952086095, "grad_norm": 3.377550963716547, "learning_rate": 3.3398969310067626e-07, "loss": 1.0194, "step": 89350 }, { "epoch": 2.6346749226006194, "grad_norm": 3.5507192204924385, "learning_rate": 3.3372436108901643e-07, "loss": 1.0295, "step": 89355 }, { "epoch": 2.6348223499926284, "grad_norm": 3.647653707627479, "learning_rate": 3.3345912960516855e-07, "loss": 1.0512, "step": 89360 }, { "epoch": 2.6349697773846383, "grad_norm": 3.580319750369562, "learning_rate": 3.331939986569367e-07, "loss": 1.0569, "step": 89365 }, { "epoch": 2.6351172047766473, "grad_norm": 3.561133432107996, "learning_rate": 3.3292896825212345e-07, "loss": 1.0786, "step": 89370 }, { "epoch": 2.635264632168657, "grad_norm": 3.399328317084421, "learning_rate": 3.3266403839852715e-07, "loss": 1.0266, "step": 89375 }, { "epoch": 2.6354120595606663, "grad_norm": 3.5393142135799676, "learning_rate": 3.3239920910394405e-07, "loss": 1.0007, "step": 89380 }, { "epoch": 2.6355594869526757, "grad_norm": 3.667029297778915, "learning_rate": 3.321344803761654e-07, "loss": 1.0523, "step": 89385 }, { "epoch": 2.635706914344685, "grad_norm": 3.524626372096686, "learning_rate": 3.318698522229842e-07, "loss": 1.0175, "step": 89390 }, { "epoch": 2.6358543417366946, "grad_norm": 3.479226398838384, "learning_rate": 3.3160532465218453e-07, "loss": 1.0307, "step": 89395 }, { "epoch": 2.636001769128704, "grad_norm": 3.591581998506745, "learning_rate": 3.3134089767155194e-07, "loss": 1.057, "step": 89400 }, { "epoch": 2.6361491965207136, "grad_norm": 3.3984609972865427, "learning_rate": 3.3107657128886764e-07, "loss": 1.0294, "step": 89405 }, { "epoch": 2.636296623912723, "grad_norm": 3.555748268517631, "learning_rate": 3.308123455119079e-07, "loss": 1.0304, "step": 89410 }, { "epoch": 2.6364440513047325, "grad_norm": 3.5322798289211166, "learning_rate": 3.305482203484507e-07, "loss": 1.034, "step": 89415 }, { "epoch": 2.636591478696742, "grad_norm": 3.4928451408871735, "learning_rate": 3.302841958062661e-07, "loss": 1.0365, "step": 89420 }, { "epoch": 2.6367389060887514, "grad_norm": 3.4321895171832173, "learning_rate": 3.3002027189312363e-07, "loss": 1.0358, "step": 89425 }, { "epoch": 2.636886333480761, "grad_norm": 3.506255268991013, "learning_rate": 3.2975644861679006e-07, "loss": 1.0152, "step": 89430 }, { "epoch": 2.6370337608727703, "grad_norm": 3.353271375006673, "learning_rate": 3.2949272598502833e-07, "loss": 1.0124, "step": 89435 }, { "epoch": 2.6371811882647798, "grad_norm": 3.7816911133168443, "learning_rate": 3.2922910400559933e-07, "loss": 1.0275, "step": 89440 }, { "epoch": 2.6373286156567888, "grad_norm": 3.619765782020348, "learning_rate": 3.2896558268625977e-07, "loss": 1.0259, "step": 89445 }, { "epoch": 2.6374760430487987, "grad_norm": 3.5216012082118775, "learning_rate": 3.287021620347642e-07, "loss": 1.0487, "step": 89450 }, { "epoch": 2.6376234704408077, "grad_norm": 3.518666575366269, "learning_rate": 3.2843884205886495e-07, "loss": 1.0385, "step": 89455 }, { "epoch": 2.6377708978328176, "grad_norm": 3.4864711118547773, "learning_rate": 3.281756227663085e-07, "loss": 0.9858, "step": 89460 }, { "epoch": 2.6379183252248266, "grad_norm": 3.537744314319836, "learning_rate": 3.279125041648434e-07, "loss": 1.0344, "step": 89465 }, { "epoch": 2.638065752616836, "grad_norm": 3.433604273939806, "learning_rate": 3.2764948626220925e-07, "loss": 1.0432, "step": 89470 }, { "epoch": 2.6382131800088455, "grad_norm": 3.6032258426910464, "learning_rate": 3.2738656906614736e-07, "loss": 1.0725, "step": 89475 }, { "epoch": 2.638360607400855, "grad_norm": 3.6717641375219303, "learning_rate": 3.2712375258439396e-07, "loss": 1.0899, "step": 89480 }, { "epoch": 2.6385080347928644, "grad_norm": 3.570374289031274, "learning_rate": 3.2686103682468176e-07, "loss": 1.0384, "step": 89485 }, { "epoch": 2.638655462184874, "grad_norm": 3.56304352436514, "learning_rate": 3.265984217947436e-07, "loss": 1.054, "step": 89490 }, { "epoch": 2.6388028895768834, "grad_norm": 3.3736591327346246, "learning_rate": 3.2633590750230464e-07, "loss": 1.0269, "step": 89495 }, { "epoch": 2.638950316968893, "grad_norm": 3.573355073501917, "learning_rate": 3.2607349395509246e-07, "loss": 1.0251, "step": 89500 }, { "epoch": 2.638950316968893, "eval_loss": 1.0779036283493042, "eval_runtime": 4.186, "eval_samples_per_second": 94.6, "eval_steps_per_second": 3.106, "step": 89500 }, { "epoch": 2.6390977443609023, "grad_norm": 3.4403713575001422, "learning_rate": 3.2581118116082665e-07, "loss": 1.0484, "step": 89505 }, { "epoch": 2.6392451717529117, "grad_norm": 3.5667392067362913, "learning_rate": 3.255489691272269e-07, "loss": 1.0252, "step": 89510 }, { "epoch": 2.639392599144921, "grad_norm": 3.621084807127653, "learning_rate": 3.2528685786200913e-07, "loss": 1.0388, "step": 89515 }, { "epoch": 2.6395400265369307, "grad_norm": 3.5994887306753984, "learning_rate": 3.250248473728859e-07, "loss": 1.0261, "step": 89520 }, { "epoch": 2.63968745392894, "grad_norm": 3.2431274974444033, "learning_rate": 3.2476293766756775e-07, "loss": 1.0063, "step": 89525 }, { "epoch": 2.6398348813209496, "grad_norm": 3.52378197750774, "learning_rate": 3.245011287537618e-07, "loss": 1.0726, "step": 89530 }, { "epoch": 2.639982308712959, "grad_norm": 3.4479383334051747, "learning_rate": 3.2423942063917107e-07, "loss": 1.059, "step": 89535 }, { "epoch": 2.640129736104968, "grad_norm": 3.368094864762716, "learning_rate": 3.2397781333149733e-07, "loss": 1.0255, "step": 89540 }, { "epoch": 2.640277163496978, "grad_norm": 3.452447683786921, "learning_rate": 3.2371630683843856e-07, "loss": 1.077, "step": 89545 }, { "epoch": 2.640424590888987, "grad_norm": 3.345429625080147, "learning_rate": 3.234549011676895e-07, "loss": 1.0305, "step": 89550 }, { "epoch": 2.640572018280997, "grad_norm": 3.3441322824018473, "learning_rate": 3.231935963269439e-07, "loss": 1.0164, "step": 89555 }, { "epoch": 2.640719445673006, "grad_norm": 3.5711295560161567, "learning_rate": 3.2293239232388776e-07, "loss": 1.0396, "step": 89560 }, { "epoch": 2.6408668730650153, "grad_norm": 3.5523002915855404, "learning_rate": 3.226712891662112e-07, "loss": 1.0434, "step": 89565 }, { "epoch": 2.641014300457025, "grad_norm": 3.5925448954655237, "learning_rate": 3.2241028686159383e-07, "loss": 1.011, "step": 89570 }, { "epoch": 2.6411617278490342, "grad_norm": 3.513048123168988, "learning_rate": 3.2214938541771917e-07, "loss": 1.0222, "step": 89575 }, { "epoch": 2.6413091552410437, "grad_norm": 3.5357492341247325, "learning_rate": 3.218885848422623e-07, "loss": 1.0467, "step": 89580 }, { "epoch": 2.641456582633053, "grad_norm": 3.705207738780004, "learning_rate": 3.2162788514289833e-07, "loss": 1.0528, "step": 89585 }, { "epoch": 2.6416040100250626, "grad_norm": 3.6387033523965835, "learning_rate": 3.213672863272986e-07, "loss": 1.0538, "step": 89590 }, { "epoch": 2.641751437417072, "grad_norm": 3.571292184405134, "learning_rate": 3.2110678840313156e-07, "loss": 1.0444, "step": 89595 }, { "epoch": 2.6418988648090815, "grad_norm": 3.388475490023244, "learning_rate": 3.208463913780632e-07, "loss": 1.003, "step": 89600 }, { "epoch": 2.642046292201091, "grad_norm": 3.4896471726555203, "learning_rate": 3.205860952597549e-07, "loss": 1.0361, "step": 89605 }, { "epoch": 2.6421937195931005, "grad_norm": 3.5629840982802814, "learning_rate": 3.203259000558667e-07, "loss": 0.9975, "step": 89610 }, { "epoch": 2.64234114698511, "grad_norm": 3.4980892840349505, "learning_rate": 3.2006580577405546e-07, "loss": 1.0351, "step": 89615 }, { "epoch": 2.6424885743771194, "grad_norm": 3.525209635771974, "learning_rate": 3.1980581242197424e-07, "loss": 1.0495, "step": 89620 }, { "epoch": 2.642636001769129, "grad_norm": 3.5265330768447285, "learning_rate": 3.1954592000727394e-07, "loss": 1.0119, "step": 89625 }, { "epoch": 2.6427834291611383, "grad_norm": 3.4262342766895286, "learning_rate": 3.1928612853760184e-07, "loss": 1.0347, "step": 89630 }, { "epoch": 2.6429308565531473, "grad_norm": 3.4991425691031317, "learning_rate": 3.1902643802060303e-07, "loss": 1.0858, "step": 89635 }, { "epoch": 2.643078283945157, "grad_norm": 3.389093382979741, "learning_rate": 3.187668484639197e-07, "loss": 1.003, "step": 89640 }, { "epoch": 2.643225711337166, "grad_norm": 3.4593358161128567, "learning_rate": 3.1850735987518876e-07, "loss": 1.0379, "step": 89645 }, { "epoch": 2.643373138729176, "grad_norm": 3.5264217373867273, "learning_rate": 3.182479722620482e-07, "loss": 1.078, "step": 89650 }, { "epoch": 2.643520566121185, "grad_norm": 3.469766938200915, "learning_rate": 3.1798868563212904e-07, "loss": 1.0565, "step": 89655 }, { "epoch": 2.6436679935131946, "grad_norm": 3.2806903987725864, "learning_rate": 3.177294999930622e-07, "loss": 1.0377, "step": 89660 }, { "epoch": 2.643815420905204, "grad_norm": 3.6766862711047934, "learning_rate": 3.1747041535247404e-07, "loss": 1.088, "step": 89665 }, { "epoch": 2.6439628482972135, "grad_norm": 3.503304545171143, "learning_rate": 3.1721143171798734e-07, "loss": 1.0506, "step": 89670 }, { "epoch": 2.644110275689223, "grad_norm": 3.689206240374751, "learning_rate": 3.1695254909722596e-07, "loss": 1.0652, "step": 89675 }, { "epoch": 2.6442577030812324, "grad_norm": 3.5369927203968197, "learning_rate": 3.166937674978046e-07, "loss": 1.044, "step": 89680 }, { "epoch": 2.644405130473242, "grad_norm": 3.3869490190124534, "learning_rate": 3.1643508692733967e-07, "loss": 1.0334, "step": 89685 }, { "epoch": 2.6445525578652513, "grad_norm": 3.5581235357294205, "learning_rate": 3.1617650739344347e-07, "loss": 1.0308, "step": 89690 }, { "epoch": 2.644699985257261, "grad_norm": 3.639185188013868, "learning_rate": 3.1591802890372405e-07, "loss": 1.0184, "step": 89695 }, { "epoch": 2.6448474126492703, "grad_norm": 3.4437307324674813, "learning_rate": 3.156596514657882e-07, "loss": 1.0357, "step": 89700 }, { "epoch": 2.6449948400412797, "grad_norm": 3.605942538919646, "learning_rate": 3.1540137508723867e-07, "loss": 1.0394, "step": 89705 }, { "epoch": 2.645142267433289, "grad_norm": 3.4496973705749214, "learning_rate": 3.1514319977567557e-07, "loss": 1.0171, "step": 89710 }, { "epoch": 2.6452896948252986, "grad_norm": 3.410818581858827, "learning_rate": 3.148851255386966e-07, "loss": 1.0325, "step": 89715 }, { "epoch": 2.645437122217308, "grad_norm": 3.71701855701238, "learning_rate": 3.146271523838944e-07, "loss": 1.0747, "step": 89720 }, { "epoch": 2.6455845496093175, "grad_norm": 3.4831737233249345, "learning_rate": 3.1436928031886165e-07, "loss": 1.0668, "step": 89725 }, { "epoch": 2.645731977001327, "grad_norm": 3.413577299955557, "learning_rate": 3.1411150935118565e-07, "loss": 1.0021, "step": 89730 }, { "epoch": 2.6458794043933365, "grad_norm": 3.528885554918809, "learning_rate": 3.13853839488452e-07, "loss": 1.0267, "step": 89735 }, { "epoch": 2.6460268317853455, "grad_norm": 3.646429885585321, "learning_rate": 3.1359627073824297e-07, "loss": 1.0558, "step": 89740 }, { "epoch": 2.6461742591773554, "grad_norm": 3.4698795520760486, "learning_rate": 3.1333880310813664e-07, "loss": 1.0209, "step": 89745 }, { "epoch": 2.6463216865693644, "grad_norm": 3.481803986387767, "learning_rate": 3.1308143660571153e-07, "loss": 1.066, "step": 89750 }, { "epoch": 2.6464691139613743, "grad_norm": 3.70543419914164, "learning_rate": 3.1282417123853867e-07, "loss": 1.0911, "step": 89755 }, { "epoch": 2.6466165413533833, "grad_norm": 3.4989151560372447, "learning_rate": 3.1256700701419034e-07, "loss": 1.0299, "step": 89760 }, { "epoch": 2.6467639687453928, "grad_norm": 3.36636286462365, "learning_rate": 3.123099439402326e-07, "loss": 1.0469, "step": 89765 }, { "epoch": 2.6469113961374022, "grad_norm": 3.3161468571302195, "learning_rate": 3.1205298202422976e-07, "loss": 1.0522, "step": 89770 }, { "epoch": 2.6470588235294117, "grad_norm": 3.5307891530948505, "learning_rate": 3.1179612127374414e-07, "loss": 1.0319, "step": 89775 }, { "epoch": 2.647206250921421, "grad_norm": 3.398450762872614, "learning_rate": 3.115393616963334e-07, "loss": 0.9831, "step": 89780 }, { "epoch": 2.6473536783134306, "grad_norm": 3.683799421369492, "learning_rate": 3.112827032995533e-07, "loss": 1.0072, "step": 89785 }, { "epoch": 2.64750110570544, "grad_norm": 3.5902270826046974, "learning_rate": 3.110261460909568e-07, "loss": 1.003, "step": 89790 }, { "epoch": 2.6476485330974495, "grad_norm": 3.6317548046108814, "learning_rate": 3.107696900780925e-07, "loss": 1.0272, "step": 89795 }, { "epoch": 2.647795960489459, "grad_norm": 3.457509790020058, "learning_rate": 3.1051333526850687e-07, "loss": 1.0504, "step": 89800 }, { "epoch": 2.6479433878814684, "grad_norm": 3.4080239563369186, "learning_rate": 3.1025708166974395e-07, "loss": 1.0447, "step": 89805 }, { "epoch": 2.648090815273478, "grad_norm": 3.461682843359499, "learning_rate": 3.100009292893442e-07, "loss": 1.0415, "step": 89810 }, { "epoch": 2.6482382426654874, "grad_norm": 3.6596720395198497, "learning_rate": 3.0974487813484593e-07, "loss": 1.0284, "step": 89815 }, { "epoch": 2.648385670057497, "grad_norm": 3.297928419238688, "learning_rate": 3.0948892821378183e-07, "loss": 1.0141, "step": 89820 }, { "epoch": 2.6485330974495063, "grad_norm": 3.3977579816899053, "learning_rate": 3.092330795336858e-07, "loss": 1.023, "step": 89825 }, { "epoch": 2.6486805248415157, "grad_norm": 3.3883803504797125, "learning_rate": 3.089773321020839e-07, "loss": 1.0389, "step": 89830 }, { "epoch": 2.6488279522335247, "grad_norm": 3.583645953577441, "learning_rate": 3.0872168592650475e-07, "loss": 1.0332, "step": 89835 }, { "epoch": 2.6489753796255346, "grad_norm": 3.5600257194448, "learning_rate": 3.084661410144686e-07, "loss": 1.0437, "step": 89840 }, { "epoch": 2.6491228070175437, "grad_norm": 3.4611458911206148, "learning_rate": 3.0821069737349644e-07, "loss": 1.1113, "step": 89845 }, { "epoch": 2.6492702344095536, "grad_norm": 3.672296145159303, "learning_rate": 3.0795535501110444e-07, "loss": 1.0874, "step": 89850 }, { "epoch": 2.6494176618015626, "grad_norm": 3.60500456375269, "learning_rate": 3.077001139348065e-07, "loss": 1.0802, "step": 89855 }, { "epoch": 2.649565089193572, "grad_norm": 3.624818367961297, "learning_rate": 3.0744497415211377e-07, "loss": 1.0437, "step": 89860 }, { "epoch": 2.6497125165855815, "grad_norm": 3.6698540959983292, "learning_rate": 3.071899356705335e-07, "loss": 1.0587, "step": 89865 }, { "epoch": 2.649859943977591, "grad_norm": 3.3917982018987596, "learning_rate": 3.069349984975706e-07, "loss": 1.0614, "step": 89870 }, { "epoch": 2.6500073713696004, "grad_norm": 3.708782068689443, "learning_rate": 3.066801626407265e-07, "loss": 1.0565, "step": 89875 }, { "epoch": 2.65015479876161, "grad_norm": 3.3538278854330863, "learning_rate": 3.0642542810750104e-07, "loss": 1.0504, "step": 89880 }, { "epoch": 2.6503022261536193, "grad_norm": 3.6232566326269944, "learning_rate": 3.061707949053891e-07, "loss": 1.082, "step": 89885 }, { "epoch": 2.650449653545629, "grad_norm": 3.6650432984578525, "learning_rate": 3.059162630418838e-07, "loss": 1.0622, "step": 89890 }, { "epoch": 2.6505970809376382, "grad_norm": 3.3520766143895315, "learning_rate": 3.0566183252447547e-07, "loss": 1.0312, "step": 89895 }, { "epoch": 2.6507445083296477, "grad_norm": 3.4435721880428227, "learning_rate": 3.054075033606514e-07, "loss": 1.0433, "step": 89900 }, { "epoch": 2.650891935721657, "grad_norm": 3.779925809254402, "learning_rate": 3.0515327555789356e-07, "loss": 1.0765, "step": 89905 }, { "epoch": 2.6510393631136666, "grad_norm": 3.574410890041443, "learning_rate": 3.048991491236855e-07, "loss": 1.0465, "step": 89910 }, { "epoch": 2.651186790505676, "grad_norm": 3.548837337505264, "learning_rate": 3.0464512406550335e-07, "loss": 1.0308, "step": 89915 }, { "epoch": 2.6513342178976855, "grad_norm": 3.457776410896514, "learning_rate": 3.043912003908224e-07, "loss": 1.0279, "step": 89920 }, { "epoch": 2.651481645289695, "grad_norm": 3.5274273764745354, "learning_rate": 3.041373781071158e-07, "loss": 1.0789, "step": 89925 }, { "epoch": 2.651629072681704, "grad_norm": 3.4799097519911193, "learning_rate": 3.0388365722184965e-07, "loss": 1.0422, "step": 89930 }, { "epoch": 2.651776500073714, "grad_norm": 3.4613974042115503, "learning_rate": 3.036300377424939e-07, "loss": 1.0596, "step": 89935 }, { "epoch": 2.651923927465723, "grad_norm": 3.5891240650734892, "learning_rate": 3.033765196765087e-07, "loss": 1.0131, "step": 89940 }, { "epoch": 2.652071354857733, "grad_norm": 3.5607999635594476, "learning_rate": 3.0312310303135526e-07, "loss": 1.0663, "step": 89945 }, { "epoch": 2.652218782249742, "grad_norm": 3.478951100007333, "learning_rate": 3.028697878144901e-07, "loss": 0.9827, "step": 89950 }, { "epoch": 2.6523662096417513, "grad_norm": 3.4719657910211184, "learning_rate": 3.0261657403336765e-07, "loss": 1.0457, "step": 89955 }, { "epoch": 2.6525136370337608, "grad_norm": 3.5655755558377313, "learning_rate": 3.0236346169543906e-07, "loss": 1.0332, "step": 89960 }, { "epoch": 2.65266106442577, "grad_norm": 3.6514271864433825, "learning_rate": 3.021104508081525e-07, "loss": 1.0448, "step": 89965 }, { "epoch": 2.6528084918177797, "grad_norm": 3.50890717329649, "learning_rate": 3.018575413789533e-07, "loss": 1.0571, "step": 89970 }, { "epoch": 2.652955919209789, "grad_norm": 3.5883708937532006, "learning_rate": 3.0160473341528344e-07, "loss": 1.0377, "step": 89975 }, { "epoch": 2.6531033466017986, "grad_norm": 3.6025622156273953, "learning_rate": 3.0135202692458106e-07, "loss": 1.0521, "step": 89980 }, { "epoch": 2.653250773993808, "grad_norm": 3.633671982862974, "learning_rate": 3.01099421914284e-07, "loss": 1.0355, "step": 89985 }, { "epoch": 2.6533982013858175, "grad_norm": 3.5348361890860405, "learning_rate": 3.008469183918247e-07, "loss": 1.0209, "step": 89990 }, { "epoch": 2.653545628777827, "grad_norm": 3.509137523300669, "learning_rate": 3.00594516364633e-07, "loss": 1.0299, "step": 89995 }, { "epoch": 2.6536930561698364, "grad_norm": 3.2664934275040745, "learning_rate": 3.0034221584013714e-07, "loss": 1.0334, "step": 90000 }, { "epoch": 2.6536930561698364, "eval_loss": 1.0776408910751343, "eval_runtime": 4.2961, "eval_samples_per_second": 92.177, "eval_steps_per_second": 3.026, "step": 90000 }, { "epoch": 2.653840483561846, "grad_norm": 3.574011852524168, "learning_rate": 3.0009001682575907e-07, "loss": 1.0133, "step": 90005 }, { "epoch": 2.6539879109538553, "grad_norm": 3.5041247496229597, "learning_rate": 2.998379193289233e-07, "loss": 1.0723, "step": 90010 }, { "epoch": 2.654135338345865, "grad_norm": 3.557581948673214, "learning_rate": 2.995859233570451e-07, "loss": 1.061, "step": 90015 }, { "epoch": 2.6542827657378743, "grad_norm": 3.4915312985674514, "learning_rate": 2.9933402891754237e-07, "loss": 1.0426, "step": 90020 }, { "epoch": 2.6544301931298833, "grad_norm": 3.5470586832795, "learning_rate": 2.990822360178254e-07, "loss": 1.0596, "step": 90025 }, { "epoch": 2.654577620521893, "grad_norm": 3.617909199649853, "learning_rate": 2.9883054466530404e-07, "loss": 1.0632, "step": 90030 }, { "epoch": 2.654725047913902, "grad_norm": 3.5296896565035456, "learning_rate": 2.985789548673849e-07, "loss": 1.047, "step": 90035 }, { "epoch": 2.654872475305912, "grad_norm": 3.8143733438461696, "learning_rate": 2.983274666314713e-07, "loss": 1.0859, "step": 90040 }, { "epoch": 2.655019902697921, "grad_norm": 3.525625426713725, "learning_rate": 2.98076079964963e-07, "loss": 1.045, "step": 90045 }, { "epoch": 2.6551673300899306, "grad_norm": 3.5978892118074333, "learning_rate": 2.978247948752584e-07, "loss": 1.0605, "step": 90050 }, { "epoch": 2.65531475748194, "grad_norm": 3.6252005292189047, "learning_rate": 2.975736113697507e-07, "loss": 1.0388, "step": 90055 }, { "epoch": 2.6554621848739495, "grad_norm": 3.6275337084479227, "learning_rate": 2.973225294558318e-07, "loss": 1.0348, "step": 90060 }, { "epoch": 2.655609612265959, "grad_norm": 3.500327805615581, "learning_rate": 2.970715491408897e-07, "loss": 1.0154, "step": 90065 }, { "epoch": 2.6557570396579684, "grad_norm": 3.584094382380587, "learning_rate": 2.968206704323105e-07, "loss": 1.0264, "step": 90070 }, { "epoch": 2.655904467049978, "grad_norm": 3.5576560318861845, "learning_rate": 2.9656989333747624e-07, "loss": 1.0152, "step": 90075 }, { "epoch": 2.6560518944419873, "grad_norm": 3.621151644237858, "learning_rate": 2.9631921786376594e-07, "loss": 1.0315, "step": 90080 }, { "epoch": 2.6561993218339968, "grad_norm": 3.487630894481477, "learning_rate": 2.9606864401855715e-07, "loss": 1.0335, "step": 90085 }, { "epoch": 2.656346749226006, "grad_norm": 3.5020413268949406, "learning_rate": 2.958181718092209e-07, "loss": 1.0636, "step": 90090 }, { "epoch": 2.6564941766180157, "grad_norm": 3.710177379634357, "learning_rate": 2.9556780124313065e-07, "loss": 1.0883, "step": 90095 }, { "epoch": 2.656641604010025, "grad_norm": 3.401967354474153, "learning_rate": 2.9531753232765206e-07, "loss": 1.0386, "step": 90100 }, { "epoch": 2.6567890314020346, "grad_norm": 3.463279474376575, "learning_rate": 2.9506736507014966e-07, "loss": 1.0161, "step": 90105 }, { "epoch": 2.656936458794044, "grad_norm": 3.5054483639569947, "learning_rate": 2.9481729947798507e-07, "loss": 1.0842, "step": 90110 }, { "epoch": 2.6570838861860535, "grad_norm": 3.5720646530381894, "learning_rate": 2.94567335558517e-07, "loss": 1.05, "step": 90115 }, { "epoch": 2.657231313578063, "grad_norm": 3.685514170768748, "learning_rate": 2.943174733191016e-07, "loss": 1.0186, "step": 90120 }, { "epoch": 2.6573787409700724, "grad_norm": 3.6118546597515357, "learning_rate": 2.9406771276708934e-07, "loss": 1.0943, "step": 90125 }, { "epoch": 2.6575261683620814, "grad_norm": 3.5679618332012573, "learning_rate": 2.9381805390983133e-07, "loss": 1.0598, "step": 90130 }, { "epoch": 2.6576735957540913, "grad_norm": 3.5092202856911996, "learning_rate": 2.9356849675467337e-07, "loss": 1.0211, "step": 90135 }, { "epoch": 2.6578210231461004, "grad_norm": 3.6333438541761014, "learning_rate": 2.9331904130895966e-07, "loss": 1.0406, "step": 90140 }, { "epoch": 2.65796845053811, "grad_norm": 3.6429966168639445, "learning_rate": 2.930696875800297e-07, "loss": 1.0769, "step": 90145 }, { "epoch": 2.6581158779301193, "grad_norm": 3.488104435711972, "learning_rate": 2.928204355752222e-07, "loss": 1.0713, "step": 90150 }, { "epoch": 2.6582633053221287, "grad_norm": 3.548439832187011, "learning_rate": 2.925712853018704e-07, "loss": 1.0261, "step": 90155 }, { "epoch": 2.658410732714138, "grad_norm": 3.546448970107295, "learning_rate": 2.923222367673077e-07, "loss": 1.0537, "step": 90160 }, { "epoch": 2.6585581601061476, "grad_norm": 3.4464418954664557, "learning_rate": 2.920732899788599e-07, "loss": 1.0169, "step": 90165 }, { "epoch": 2.658705587498157, "grad_norm": 3.398106879426194, "learning_rate": 2.918244449438552e-07, "loss": 1.0592, "step": 90170 }, { "epoch": 2.6588530148901666, "grad_norm": 3.499069115221987, "learning_rate": 2.9157570166961495e-07, "loss": 1.0199, "step": 90175 }, { "epoch": 2.659000442282176, "grad_norm": 3.4493843639682042, "learning_rate": 2.913270601634582e-07, "loss": 0.9872, "step": 90180 }, { "epoch": 2.6591478696741855, "grad_norm": 3.6591191118962607, "learning_rate": 2.9107852043270293e-07, "loss": 1.0522, "step": 90185 }, { "epoch": 2.659295297066195, "grad_norm": 3.3400300195591255, "learning_rate": 2.9083008248466076e-07, "loss": 1.0576, "step": 90190 }, { "epoch": 2.6594427244582044, "grad_norm": 3.4501728933863496, "learning_rate": 2.905817463266446e-07, "loss": 1.0134, "step": 90195 }, { "epoch": 2.659590151850214, "grad_norm": 3.601423466269524, "learning_rate": 2.9033351196595977e-07, "loss": 1.0563, "step": 90200 }, { "epoch": 2.6597375792422233, "grad_norm": 3.3802796998045963, "learning_rate": 2.9008537940991216e-07, "loss": 1.0638, "step": 90205 }, { "epoch": 2.6598850066342328, "grad_norm": 3.4466145317540846, "learning_rate": 2.8983734866580307e-07, "loss": 1.0477, "step": 90210 }, { "epoch": 2.6600324340262422, "grad_norm": 3.6513446653552295, "learning_rate": 2.8958941974093116e-07, "loss": 1.0476, "step": 90215 }, { "epoch": 2.6601798614182517, "grad_norm": 3.550538248618809, "learning_rate": 2.8934159264259183e-07, "loss": 1.0497, "step": 90220 }, { "epoch": 2.6603272888102607, "grad_norm": 3.405414841817667, "learning_rate": 2.8909386737807813e-07, "loss": 1.049, "step": 90225 }, { "epoch": 2.6604747162022706, "grad_norm": 3.523999463947562, "learning_rate": 2.8884624395467906e-07, "loss": 1.0283, "step": 90230 }, { "epoch": 2.6606221435942796, "grad_norm": 3.4444970315934573, "learning_rate": 2.8859872237968183e-07, "loss": 1.0448, "step": 90235 }, { "epoch": 2.6607695709862895, "grad_norm": 3.583920412478429, "learning_rate": 2.883513026603689e-07, "loss": 1.0353, "step": 90240 }, { "epoch": 2.6609169983782985, "grad_norm": 3.65453632625094, "learning_rate": 2.881039848040228e-07, "loss": 1.049, "step": 90245 }, { "epoch": 2.661064425770308, "grad_norm": 3.592914065124236, "learning_rate": 2.878567688179193e-07, "loss": 1.0443, "step": 90250 }, { "epoch": 2.6612118531623175, "grad_norm": 3.55935234073249, "learning_rate": 2.876096547093335e-07, "loss": 1.0526, "step": 90255 }, { "epoch": 2.661359280554327, "grad_norm": 3.400064080585498, "learning_rate": 2.873626424855383e-07, "loss": 0.9877, "step": 90260 }, { "epoch": 2.6615067079463364, "grad_norm": 3.5440645255691314, "learning_rate": 2.871157321538e-07, "loss": 1.0472, "step": 90265 }, { "epoch": 2.661654135338346, "grad_norm": 3.5105114476491903, "learning_rate": 2.8686892372138645e-07, "loss": 1.0578, "step": 90270 }, { "epoch": 2.6618015627303553, "grad_norm": 3.4505915607694666, "learning_rate": 2.866222171955582e-07, "loss": 1.0309, "step": 90275 }, { "epoch": 2.6619489901223647, "grad_norm": 3.4765343014012924, "learning_rate": 2.8637561258357687e-07, "loss": 1.0239, "step": 90280 }, { "epoch": 2.662096417514374, "grad_norm": 3.5866377300775434, "learning_rate": 2.8612910989269795e-07, "loss": 1.0284, "step": 90285 }, { "epoch": 2.6622438449063837, "grad_norm": 3.367907648089555, "learning_rate": 2.8588270913017525e-07, "loss": 1.0471, "step": 90290 }, { "epoch": 2.662391272298393, "grad_norm": 3.602830697413273, "learning_rate": 2.856364103032595e-07, "loss": 1.0452, "step": 90295 }, { "epoch": 2.6625386996904026, "grad_norm": 3.4908986717107804, "learning_rate": 2.853902134191984e-07, "loss": 1.027, "step": 90300 }, { "epoch": 2.662686127082412, "grad_norm": 3.5481235694200186, "learning_rate": 2.8514411848523604e-07, "loss": 1.0473, "step": 90305 }, { "epoch": 2.6628335544744215, "grad_norm": 3.372016582464002, "learning_rate": 2.8489812550861543e-07, "loss": 1.0516, "step": 90310 }, { "epoch": 2.662980981866431, "grad_norm": 3.465760423390813, "learning_rate": 2.846522344965724e-07, "loss": 1.0803, "step": 90315 }, { "epoch": 2.66312840925844, "grad_norm": 3.4803868651968215, "learning_rate": 2.844064454563462e-07, "loss": 1.0172, "step": 90320 }, { "epoch": 2.66327583665045, "grad_norm": 3.5080648803201324, "learning_rate": 2.8416075839516694e-07, "loss": 1.0377, "step": 90325 }, { "epoch": 2.663423264042459, "grad_norm": 3.2835511953546623, "learning_rate": 2.839151733202645e-07, "loss": 1.0299, "step": 90330 }, { "epoch": 2.663570691434469, "grad_norm": 3.49949991008185, "learning_rate": 2.836696902388666e-07, "loss": 1.0665, "step": 90335 }, { "epoch": 2.663718118826478, "grad_norm": 3.497074008791413, "learning_rate": 2.8342430915819573e-07, "loss": 1.0382, "step": 90340 }, { "epoch": 2.6638655462184873, "grad_norm": 3.588427441412989, "learning_rate": 2.831790300854736e-07, "loss": 1.0505, "step": 90345 }, { "epoch": 2.6640129736104967, "grad_norm": 3.3532488203052018, "learning_rate": 2.8293385302791615e-07, "loss": 1.0133, "step": 90350 }, { "epoch": 2.664160401002506, "grad_norm": 3.5117927204101784, "learning_rate": 2.8268877799274046e-07, "loss": 1.0267, "step": 90355 }, { "epoch": 2.6643078283945156, "grad_norm": 3.4710168015671456, "learning_rate": 2.824438049871558e-07, "loss": 0.979, "step": 90360 }, { "epoch": 2.664455255786525, "grad_norm": 3.452636143881481, "learning_rate": 2.8219893401837185e-07, "loss": 1.041, "step": 90365 }, { "epoch": 2.6646026831785345, "grad_norm": 3.4460287291187517, "learning_rate": 2.8195416509359404e-07, "loss": 1.0138, "step": 90370 }, { "epoch": 2.664750110570544, "grad_norm": 3.489513454944366, "learning_rate": 2.8170949822002494e-07, "loss": 1.0265, "step": 90375 }, { "epoch": 2.6648975379625535, "grad_norm": 3.5706132728166287, "learning_rate": 2.8146493340486426e-07, "loss": 1.0121, "step": 90380 }, { "epoch": 2.665044965354563, "grad_norm": 3.424522315490339, "learning_rate": 2.8122047065530914e-07, "loss": 1.0905, "step": 90385 }, { "epoch": 2.6651923927465724, "grad_norm": 3.4847941470113972, "learning_rate": 2.809761099785521e-07, "loss": 1.0142, "step": 90390 }, { "epoch": 2.665339820138582, "grad_norm": 3.4802360119166385, "learning_rate": 2.8073185138178416e-07, "loss": 1.0833, "step": 90395 }, { "epoch": 2.6654872475305913, "grad_norm": 3.565538376201009, "learning_rate": 2.804876948721928e-07, "loss": 1.0453, "step": 90400 }, { "epoch": 2.6656346749226008, "grad_norm": 3.5721937323911246, "learning_rate": 2.802436404569632e-07, "loss": 1.0653, "step": 90405 }, { "epoch": 2.66578210231461, "grad_norm": 3.5609903683904807, "learning_rate": 2.799996881432762e-07, "loss": 1.0522, "step": 90410 }, { "epoch": 2.6659295297066192, "grad_norm": 3.5847173702989417, "learning_rate": 2.797558379383107e-07, "loss": 1.0631, "step": 90415 }, { "epoch": 2.666076957098629, "grad_norm": 3.5584767207733066, "learning_rate": 2.79512089849243e-07, "loss": 1.0572, "step": 90420 }, { "epoch": 2.666224384490638, "grad_norm": 3.5282265198101617, "learning_rate": 2.792684438832441e-07, "loss": 1.0465, "step": 90425 }, { "epoch": 2.666371811882648, "grad_norm": 3.4486372777553984, "learning_rate": 2.7902490004748526e-07, "loss": 1.0164, "step": 90430 }, { "epoch": 2.666519239274657, "grad_norm": 3.474771716185364, "learning_rate": 2.7878145834913116e-07, "loss": 1.0557, "step": 90435 }, { "epoch": 2.6666666666666665, "grad_norm": 3.365481983929649, "learning_rate": 2.78538118795347e-07, "loss": 1.0026, "step": 90440 }, { "epoch": 2.666814094058676, "grad_norm": 3.5366043043321436, "learning_rate": 2.7829488139329244e-07, "loss": 1.0272, "step": 90445 }, { "epoch": 2.6669615214506854, "grad_norm": 3.5777809135406193, "learning_rate": 2.78051746150125e-07, "loss": 1.0724, "step": 90450 }, { "epoch": 2.667108948842695, "grad_norm": 3.52670515306235, "learning_rate": 2.7780871307300075e-07, "loss": 1.0511, "step": 90455 }, { "epoch": 2.6672563762347044, "grad_norm": 3.4811254771692766, "learning_rate": 2.77565782169068e-07, "loss": 1.0581, "step": 90460 }, { "epoch": 2.667403803626714, "grad_norm": 3.496195114434935, "learning_rate": 2.773229534454787e-07, "loss": 1.0313, "step": 90465 }, { "epoch": 2.6675512310187233, "grad_norm": 3.55331277052772, "learning_rate": 2.7708022690937655e-07, "loss": 1.03, "step": 90470 }, { "epoch": 2.6676986584107327, "grad_norm": 3.597248909347152, "learning_rate": 2.7683760256790427e-07, "loss": 1.0455, "step": 90475 }, { "epoch": 2.667846085802742, "grad_norm": 3.346227345509661, "learning_rate": 2.7659508042820106e-07, "loss": 1.068, "step": 90480 }, { "epoch": 2.6679935131947516, "grad_norm": 3.43859614497392, "learning_rate": 2.7635266049740424e-07, "loss": 0.9982, "step": 90485 }, { "epoch": 2.668140940586761, "grad_norm": 3.433174651665527, "learning_rate": 2.761103427826468e-07, "loss": 1.0064, "step": 90490 }, { "epoch": 2.6682883679787706, "grad_norm": 3.502224825583504, "learning_rate": 2.758681272910597e-07, "loss": 1.0265, "step": 90495 }, { "epoch": 2.66843579537078, "grad_norm": 3.584503322772786, "learning_rate": 2.7562601402976895e-07, "loss": 1.0144, "step": 90500 }, { "epoch": 2.66843579537078, "eval_loss": 1.0777382850646973, "eval_runtime": 4.1188, "eval_samples_per_second": 96.145, "eval_steps_per_second": 3.156, "step": 90500 }, { "epoch": 2.6685832227627895, "grad_norm": 3.3350920634450305, "learning_rate": 2.7538400300590126e-07, "loss": 0.9975, "step": 90505 }, { "epoch": 2.6687306501547985, "grad_norm": 3.692770038526025, "learning_rate": 2.7514209422657643e-07, "loss": 1.0388, "step": 90510 }, { "epoch": 2.6688780775468084, "grad_norm": 3.3797114617334967, "learning_rate": 2.749002876989129e-07, "loss": 1.0272, "step": 90515 }, { "epoch": 2.6690255049388174, "grad_norm": 3.3534844798597936, "learning_rate": 2.7465858343002745e-07, "loss": 1.0157, "step": 90520 }, { "epoch": 2.6691729323308273, "grad_norm": 3.3960746555550223, "learning_rate": 2.744169814270307e-07, "loss": 1.0255, "step": 90525 }, { "epoch": 2.6693203597228363, "grad_norm": 3.6625665128433305, "learning_rate": 2.7417548169703363e-07, "loss": 1.0495, "step": 90530 }, { "epoch": 2.669467787114846, "grad_norm": 3.4823183668466284, "learning_rate": 2.7393408424714093e-07, "loss": 1.0425, "step": 90535 }, { "epoch": 2.6696152145068552, "grad_norm": 3.584750302657472, "learning_rate": 2.7369278908445857e-07, "loss": 1.0317, "step": 90540 }, { "epoch": 2.6697626418988647, "grad_norm": 3.5938835008026633, "learning_rate": 2.734515962160847e-07, "loss": 1.0714, "step": 90545 }, { "epoch": 2.669910069290874, "grad_norm": 3.503725604882879, "learning_rate": 2.7321050564911727e-07, "loss": 1.0379, "step": 90550 }, { "epoch": 2.6700574966828836, "grad_norm": 3.3402151622287635, "learning_rate": 2.7296951739065073e-07, "loss": 1.0298, "step": 90555 }, { "epoch": 2.670204924074893, "grad_norm": 3.3886044045982633, "learning_rate": 2.7272863144777644e-07, "loss": 1.0124, "step": 90560 }, { "epoch": 2.6703523514669025, "grad_norm": 3.4712446231595635, "learning_rate": 2.724878478275829e-07, "loss": 1.0413, "step": 90565 }, { "epoch": 2.670499778858912, "grad_norm": 3.656366731340479, "learning_rate": 2.7224716653715606e-07, "loss": 1.0977, "step": 90570 }, { "epoch": 2.6706472062509214, "grad_norm": 3.4224172536469184, "learning_rate": 2.720065875835762e-07, "loss": 1.04, "step": 90575 }, { "epoch": 2.670794633642931, "grad_norm": 3.683579223630985, "learning_rate": 2.7176611097392545e-07, "loss": 1.0933, "step": 90580 }, { "epoch": 2.6709420610349404, "grad_norm": 3.5249217149748917, "learning_rate": 2.715257367152774e-07, "loss": 1.0565, "step": 90585 }, { "epoch": 2.67108948842695, "grad_norm": 3.3784875754653725, "learning_rate": 2.7128546481470715e-07, "loss": 1.0178, "step": 90590 }, { "epoch": 2.6712369158189593, "grad_norm": 3.547308865295643, "learning_rate": 2.7104529527928414e-07, "loss": 1.0309, "step": 90595 }, { "epoch": 2.6713843432109687, "grad_norm": 3.64186642504084, "learning_rate": 2.7080522811607557e-07, "loss": 1.0541, "step": 90600 }, { "epoch": 2.671531770602978, "grad_norm": 3.505508062619059, "learning_rate": 2.70565263332147e-07, "loss": 1.0295, "step": 90605 }, { "epoch": 2.6716791979949877, "grad_norm": 3.5834515740193686, "learning_rate": 2.703254009345571e-07, "loss": 1.0476, "step": 90610 }, { "epoch": 2.6718266253869967, "grad_norm": 3.543509578855589, "learning_rate": 2.700856409303672e-07, "loss": 1.0093, "step": 90615 }, { "epoch": 2.6719740527790066, "grad_norm": 3.629052465695175, "learning_rate": 2.6984598332663034e-07, "loss": 1.0637, "step": 90620 }, { "epoch": 2.6721214801710156, "grad_norm": 3.5566099582540023, "learning_rate": 2.6960642813039935e-07, "loss": 1.0117, "step": 90625 }, { "epoch": 2.6722689075630255, "grad_norm": 3.6186117098097297, "learning_rate": 2.693669753487231e-07, "loss": 1.0331, "step": 90630 }, { "epoch": 2.6724163349550345, "grad_norm": 3.5997761778722586, "learning_rate": 2.691276249886485e-07, "loss": 1.0763, "step": 90635 }, { "epoch": 2.672563762347044, "grad_norm": 3.6314029389970175, "learning_rate": 2.688883770572177e-07, "loss": 1.0804, "step": 90640 }, { "epoch": 2.6727111897390534, "grad_norm": 3.7125516348213146, "learning_rate": 2.6864923156147234e-07, "loss": 1.0794, "step": 90645 }, { "epoch": 2.672858617131063, "grad_norm": 3.4302521719646695, "learning_rate": 2.6841018850844836e-07, "loss": 1.0486, "step": 90650 }, { "epoch": 2.6730060445230723, "grad_norm": 3.552784341060613, "learning_rate": 2.6817124790517966e-07, "loss": 1.0775, "step": 90655 }, { "epoch": 2.673153471915082, "grad_norm": 3.3679203607182147, "learning_rate": 2.6793240975869783e-07, "loss": 0.9853, "step": 90660 }, { "epoch": 2.6733008993070912, "grad_norm": 3.433822599573885, "learning_rate": 2.6769367407603134e-07, "loss": 1.0353, "step": 90665 }, { "epoch": 2.6734483266991007, "grad_norm": 3.5636113022993547, "learning_rate": 2.674550408642046e-07, "loss": 1.0254, "step": 90670 }, { "epoch": 2.67359575409111, "grad_norm": 3.6138004420162257, "learning_rate": 2.6721651013023986e-07, "loss": 1.0443, "step": 90675 }, { "epoch": 2.6737431814831196, "grad_norm": 3.61262218100576, "learning_rate": 2.6697808188115694e-07, "loss": 1.0862, "step": 90680 }, { "epoch": 2.673890608875129, "grad_norm": 3.6171425518237714, "learning_rate": 2.667397561239698e-07, "loss": 1.0129, "step": 90685 }, { "epoch": 2.6740380362671385, "grad_norm": 3.4853776664781035, "learning_rate": 2.6650153286569415e-07, "loss": 1.0965, "step": 90690 }, { "epoch": 2.674185463659148, "grad_norm": 3.78222154075049, "learning_rate": 2.662634121133381e-07, "loss": 1.0476, "step": 90695 }, { "epoch": 2.6743328910511575, "grad_norm": 3.4643379997723613, "learning_rate": 2.660253938739089e-07, "loss": 1.0471, "step": 90700 }, { "epoch": 2.674480318443167, "grad_norm": 3.4409033685652197, "learning_rate": 2.6578747815441105e-07, "loss": 1.0436, "step": 90705 }, { "epoch": 2.674627745835176, "grad_norm": 3.6001600345117497, "learning_rate": 2.655496649618451e-07, "loss": 1.0543, "step": 90710 }, { "epoch": 2.674775173227186, "grad_norm": 3.6492780653961923, "learning_rate": 2.6531195430320966e-07, "loss": 1.0667, "step": 90715 }, { "epoch": 2.674922600619195, "grad_norm": 3.404957535524429, "learning_rate": 2.650743461854979e-07, "loss": 1.053, "step": 90720 }, { "epoch": 2.6750700280112047, "grad_norm": 3.5547226012618602, "learning_rate": 2.6483684061570423e-07, "loss": 1.073, "step": 90725 }, { "epoch": 2.6752174554032138, "grad_norm": 3.5550434721241793, "learning_rate": 2.6459943760081513e-07, "loss": 1.0379, "step": 90730 }, { "epoch": 2.675364882795223, "grad_norm": 3.521181566592579, "learning_rate": 2.643621371478179e-07, "loss": 1.0165, "step": 90735 }, { "epoch": 2.6755123101872327, "grad_norm": 3.386877829602076, "learning_rate": 2.641249392636949e-07, "loss": 1.01, "step": 90740 }, { "epoch": 2.675659737579242, "grad_norm": 3.584973745891208, "learning_rate": 2.63887843955426e-07, "loss": 1.0579, "step": 90745 }, { "epoch": 2.6758071649712516, "grad_norm": 3.6532733484916378, "learning_rate": 2.6365085122998763e-07, "loss": 1.0468, "step": 90750 }, { "epoch": 2.675954592363261, "grad_norm": 3.538539328527618, "learning_rate": 2.6341396109435505e-07, "loss": 1.0343, "step": 90755 }, { "epoch": 2.6761020197552705, "grad_norm": 3.4753949407832954, "learning_rate": 2.631771735554965e-07, "loss": 1.062, "step": 90760 }, { "epoch": 2.67624944714728, "grad_norm": 3.555995953470957, "learning_rate": 2.629404886203822e-07, "loss": 0.9953, "step": 90765 }, { "epoch": 2.6763968745392894, "grad_norm": 3.436307190907762, "learning_rate": 2.6270390629597573e-07, "loss": 1.0212, "step": 90770 }, { "epoch": 2.676544301931299, "grad_norm": 3.5514805199040658, "learning_rate": 2.6246742658923866e-07, "loss": 1.0544, "step": 90775 }, { "epoch": 2.6766917293233083, "grad_norm": 3.421450969551638, "learning_rate": 2.6223104950713036e-07, "loss": 1.0273, "step": 90780 }, { "epoch": 2.676839156715318, "grad_norm": 3.480847034714272, "learning_rate": 2.6199477505660487e-07, "loss": 0.9918, "step": 90785 }, { "epoch": 2.6769865841073273, "grad_norm": 3.4714165542584374, "learning_rate": 2.61758603244617e-07, "loss": 1.0362, "step": 90790 }, { "epoch": 2.6771340114993367, "grad_norm": 3.7445487608084798, "learning_rate": 2.6152253407811466e-07, "loss": 1.0563, "step": 90795 }, { "epoch": 2.677281438891346, "grad_norm": 3.60716573274757, "learning_rate": 2.6128656756404593e-07, "loss": 1.026, "step": 90800 }, { "epoch": 2.677428866283355, "grad_norm": 3.598868717788445, "learning_rate": 2.610507037093536e-07, "loss": 1.0556, "step": 90805 }, { "epoch": 2.677576293675365, "grad_norm": 3.518332210068912, "learning_rate": 2.60814942520978e-07, "loss": 1.0332, "step": 90810 }, { "epoch": 2.677723721067374, "grad_norm": 3.7550355909981223, "learning_rate": 2.6057928400585683e-07, "loss": 1.0793, "step": 90815 }, { "epoch": 2.677871148459384, "grad_norm": 3.6626911260256114, "learning_rate": 2.603437281709246e-07, "loss": 1.052, "step": 90820 }, { "epoch": 2.678018575851393, "grad_norm": 3.486349652986387, "learning_rate": 2.601082750231133e-07, "loss": 1.0365, "step": 90825 }, { "epoch": 2.6781660032434025, "grad_norm": 3.5344346597241136, "learning_rate": 2.5987292456935154e-07, "loss": 1.081, "step": 90830 }, { "epoch": 2.678313430635412, "grad_norm": 3.4369060710015935, "learning_rate": 2.5963767681656284e-07, "loss": 1.0565, "step": 90835 }, { "epoch": 2.6784608580274214, "grad_norm": 3.5176111626820625, "learning_rate": 2.5940253177167224e-07, "loss": 1.0137, "step": 90840 }, { "epoch": 2.678608285419431, "grad_norm": 3.6427028529917482, "learning_rate": 2.5916748944159783e-07, "loss": 1.0094, "step": 90845 }, { "epoch": 2.6787557128114403, "grad_norm": 3.5771892510410646, "learning_rate": 2.589325498332558e-07, "loss": 1.0454, "step": 90850 }, { "epoch": 2.6789031402034498, "grad_norm": 3.4965700885516946, "learning_rate": 2.5869771295356017e-07, "loss": 1.0709, "step": 90855 }, { "epoch": 2.6790505675954592, "grad_norm": 3.542113238463654, "learning_rate": 2.584629788094209e-07, "loss": 1.0571, "step": 90860 }, { "epoch": 2.6791979949874687, "grad_norm": 3.2916355887946676, "learning_rate": 2.582283474077457e-07, "loss": 1.0241, "step": 90865 }, { "epoch": 2.679345422379478, "grad_norm": 3.571974550708792, "learning_rate": 2.579938187554375e-07, "loss": 1.0749, "step": 90870 }, { "epoch": 2.6794928497714876, "grad_norm": 3.7157922330255464, "learning_rate": 2.577593928594002e-07, "loss": 1.0505, "step": 90875 }, { "epoch": 2.679640277163497, "grad_norm": 3.2548668557695986, "learning_rate": 2.5752506972652923e-07, "loss": 1.0237, "step": 90880 }, { "epoch": 2.6797877045555065, "grad_norm": 3.421061658254893, "learning_rate": 2.572908493637216e-07, "loss": 1.0235, "step": 90885 }, { "epoch": 2.679935131947516, "grad_norm": 3.475439472542285, "learning_rate": 2.570567317778688e-07, "loss": 1.005, "step": 90890 }, { "epoch": 2.6800825593395254, "grad_norm": 3.4443552182261845, "learning_rate": 2.5682271697586036e-07, "loss": 1.0169, "step": 90895 }, { "epoch": 2.6802299867315345, "grad_norm": 3.694245383540501, "learning_rate": 2.5658880496458206e-07, "loss": 1.0991, "step": 90900 }, { "epoch": 2.6803774141235444, "grad_norm": 3.483713298483908, "learning_rate": 2.5635499575091785e-07, "loss": 1.078, "step": 90905 }, { "epoch": 2.6805248415155534, "grad_norm": 3.596506556162262, "learning_rate": 2.561212893417465e-07, "loss": 1.0809, "step": 90910 }, { "epoch": 2.6806722689075633, "grad_norm": 3.5908390267242063, "learning_rate": 2.558876857439459e-07, "loss": 1.0601, "step": 90915 }, { "epoch": 2.6808196962995723, "grad_norm": 3.3382002556048396, "learning_rate": 2.5565418496439034e-07, "loss": 1.0195, "step": 90920 }, { "epoch": 2.6809671236915817, "grad_norm": 3.4898793263367405, "learning_rate": 2.5542078700994993e-07, "loss": 1.0348, "step": 90925 }, { "epoch": 2.681114551083591, "grad_norm": 3.602069862746645, "learning_rate": 2.551874918874937e-07, "loss": 1.0351, "step": 90930 }, { "epoch": 2.6812619784756007, "grad_norm": 3.453364762689865, "learning_rate": 2.549542996038862e-07, "loss": 1.0107, "step": 90935 }, { "epoch": 2.68140940586761, "grad_norm": 3.5888843149276783, "learning_rate": 2.5472121016598975e-07, "loss": 1.004, "step": 90940 }, { "epoch": 2.6815568332596196, "grad_norm": 3.5331694889490746, "learning_rate": 2.544882235806614e-07, "loss": 1.0323, "step": 90945 }, { "epoch": 2.681704260651629, "grad_norm": 3.42389451018482, "learning_rate": 2.5425533985476036e-07, "loss": 1.0558, "step": 90950 }, { "epoch": 2.6818516880436385, "grad_norm": 3.5290615492348723, "learning_rate": 2.5402255899513646e-07, "loss": 1.0185, "step": 90955 }, { "epoch": 2.681999115435648, "grad_norm": 3.525424545773195, "learning_rate": 2.5378988100864126e-07, "loss": 1.0278, "step": 90960 }, { "epoch": 2.6821465428276574, "grad_norm": 3.3557716806467655, "learning_rate": 2.5355730590212065e-07, "loss": 0.9908, "step": 90965 }, { "epoch": 2.682293970219667, "grad_norm": 3.550586566899369, "learning_rate": 2.5332483368241914e-07, "loss": 1.0417, "step": 90970 }, { "epoch": 2.6824413976116763, "grad_norm": 3.4917265859319464, "learning_rate": 2.530924643563774e-07, "loss": 1.0273, "step": 90975 }, { "epoch": 2.682588825003686, "grad_norm": 3.5441305180705562, "learning_rate": 2.5286019793083217e-07, "loss": 1.0391, "step": 90980 }, { "epoch": 2.6827362523956952, "grad_norm": 3.594492501918307, "learning_rate": 2.5262803441262e-07, "loss": 1.0637, "step": 90985 }, { "epoch": 2.6828836797877047, "grad_norm": 3.582522182974804, "learning_rate": 2.5239597380857084e-07, "loss": 1.058, "step": 90990 }, { "epoch": 2.6830311071797137, "grad_norm": 3.452516898870229, "learning_rate": 2.5216401612551425e-07, "loss": 1.0287, "step": 90995 }, { "epoch": 2.6831785345717236, "grad_norm": 3.5852571248079124, "learning_rate": 2.5193216137027517e-07, "loss": 1.0212, "step": 91000 }, { "epoch": 2.6831785345717236, "eval_loss": 1.0774517059326172, "eval_runtime": 4.853, "eval_samples_per_second": 81.599, "eval_steps_per_second": 2.679, "step": 91000 }, { "epoch": 2.6833259619637326, "grad_norm": 3.534112254716065, "learning_rate": 2.5170040954967696e-07, "loss": 1.0376, "step": 91005 }, { "epoch": 2.6834733893557425, "grad_norm": 3.513315167909154, "learning_rate": 2.514687606705386e-07, "loss": 1.0492, "step": 91010 }, { "epoch": 2.6836208167477515, "grad_norm": 3.3788230718924206, "learning_rate": 2.512372147396777e-07, "loss": 0.9926, "step": 91015 }, { "epoch": 2.683768244139761, "grad_norm": 3.571562035498439, "learning_rate": 2.5100577176390543e-07, "loss": 1.0597, "step": 91020 }, { "epoch": 2.6839156715317705, "grad_norm": 3.5970264555507594, "learning_rate": 2.50774431750035e-07, "loss": 1.0386, "step": 91025 }, { "epoch": 2.68406309892378, "grad_norm": 3.432522927803202, "learning_rate": 2.505431947048724e-07, "loss": 1.0402, "step": 91030 }, { "epoch": 2.6842105263157894, "grad_norm": 3.5438137032561996, "learning_rate": 2.503120606352216e-07, "loss": 1.0508, "step": 91035 }, { "epoch": 2.684357953707799, "grad_norm": 3.5226103495482546, "learning_rate": 2.500810295478856e-07, "loss": 1.0682, "step": 91040 }, { "epoch": 2.6845053810998083, "grad_norm": 3.661928725231429, "learning_rate": 2.4985010144966097e-07, "loss": 1.05, "step": 91045 }, { "epoch": 2.6846528084918178, "grad_norm": 3.4641306106538474, "learning_rate": 2.496192763473444e-07, "loss": 1.0447, "step": 91050 }, { "epoch": 2.684800235883827, "grad_norm": 3.4566149782939553, "learning_rate": 2.4938855424772673e-07, "loss": 1.0761, "step": 91055 }, { "epoch": 2.6849476632758367, "grad_norm": 3.523652893559864, "learning_rate": 2.4915793515759955e-07, "loss": 1.0564, "step": 91060 }, { "epoch": 2.685095090667846, "grad_norm": 3.3578337323824514, "learning_rate": 2.489274190837466e-07, "loss": 1.0424, "step": 91065 }, { "epoch": 2.6852425180598556, "grad_norm": 3.420906574343082, "learning_rate": 2.486970060329526e-07, "loss": 0.9924, "step": 91070 }, { "epoch": 2.685389945451865, "grad_norm": 3.6062157193471585, "learning_rate": 2.484666960119969e-07, "loss": 1.0615, "step": 91075 }, { "epoch": 2.6855373728438745, "grad_norm": 3.262694772429374, "learning_rate": 2.4823648902765714e-07, "loss": 0.9944, "step": 91080 }, { "epoch": 2.685684800235884, "grad_norm": 3.461124761525513, "learning_rate": 2.4800638508670745e-07, "loss": 1.015, "step": 91085 }, { "epoch": 2.6858322276278934, "grad_norm": 3.489758552987375, "learning_rate": 2.4777638419591906e-07, "loss": 1.0484, "step": 91090 }, { "epoch": 2.685979655019903, "grad_norm": 3.5242299138908817, "learning_rate": 2.475464863620587e-07, "loss": 1.0506, "step": 91095 }, { "epoch": 2.686127082411912, "grad_norm": 3.416219131416879, "learning_rate": 2.473166915918934e-07, "loss": 1.0553, "step": 91100 }, { "epoch": 2.686274509803922, "grad_norm": 3.4724651433654814, "learning_rate": 2.470869998921836e-07, "loss": 1.0446, "step": 91105 }, { "epoch": 2.686421937195931, "grad_norm": 3.3592454942323386, "learning_rate": 2.468574112696889e-07, "loss": 1.032, "step": 91110 }, { "epoch": 2.6865693645879407, "grad_norm": 3.439036194590117, "learning_rate": 2.4662792573116474e-07, "loss": 1.0129, "step": 91115 }, { "epoch": 2.6867167919799497, "grad_norm": 3.4342260340301403, "learning_rate": 2.4639854328336444e-07, "loss": 1.0417, "step": 91120 }, { "epoch": 2.686864219371959, "grad_norm": 3.6141386701836744, "learning_rate": 2.4616926393303846e-07, "loss": 1.0417, "step": 91125 }, { "epoch": 2.6870116467639686, "grad_norm": 3.3814112498988425, "learning_rate": 2.459400876869314e-07, "loss": 1.0033, "step": 91130 }, { "epoch": 2.687159074155978, "grad_norm": 3.7245000387467018, "learning_rate": 2.4571101455178947e-07, "loss": 1.0329, "step": 91135 }, { "epoch": 2.6873065015479876, "grad_norm": 3.5830914810946966, "learning_rate": 2.454820445343524e-07, "loss": 1.048, "step": 91140 }, { "epoch": 2.687453928939997, "grad_norm": 3.391815213176938, "learning_rate": 2.452531776413576e-07, "loss": 1.018, "step": 91145 }, { "epoch": 2.6876013563320065, "grad_norm": 3.6227909197960537, "learning_rate": 2.4502441387953977e-07, "loss": 1.0569, "step": 91150 }, { "epoch": 2.687748783724016, "grad_norm": 3.3803111319957457, "learning_rate": 2.4479575325563134e-07, "loss": 0.9978, "step": 91155 }, { "epoch": 2.6878962111160254, "grad_norm": 3.5924451083438016, "learning_rate": 2.4456719577636e-07, "loss": 1.0489, "step": 91160 }, { "epoch": 2.688043638508035, "grad_norm": 3.6737171081535265, "learning_rate": 2.443387414484523e-07, "loss": 1.0749, "step": 91165 }, { "epoch": 2.6881910659000443, "grad_norm": 3.5320475148898534, "learning_rate": 2.4411039027862997e-07, "loss": 1.0954, "step": 91170 }, { "epoch": 2.6883384932920538, "grad_norm": 3.4315834142272377, "learning_rate": 2.438821422736123e-07, "loss": 1.0457, "step": 91175 }, { "epoch": 2.6884859206840632, "grad_norm": 3.4675231353735283, "learning_rate": 2.436539974401164e-07, "loss": 1.0816, "step": 91180 }, { "epoch": 2.6886333480760727, "grad_norm": 3.5861780139811668, "learning_rate": 2.434259557848556e-07, "loss": 1.0942, "step": 91185 }, { "epoch": 2.688780775468082, "grad_norm": 3.3821365454017203, "learning_rate": 2.4319801731454e-07, "loss": 1.0632, "step": 91190 }, { "epoch": 2.688928202860091, "grad_norm": 3.5016976892973797, "learning_rate": 2.4297018203587665e-07, "loss": 1.0082, "step": 91195 }, { "epoch": 2.689075630252101, "grad_norm": 3.593853531110007, "learning_rate": 2.4274244995557154e-07, "loss": 1.063, "step": 91200 }, { "epoch": 2.68922305764411, "grad_norm": 3.532925913259214, "learning_rate": 2.4251482108032295e-07, "loss": 1.0393, "step": 91205 }, { "epoch": 2.68937048503612, "grad_norm": 3.526107292571931, "learning_rate": 2.422872954168323e-07, "loss": 1.0447, "step": 91210 }, { "epoch": 2.689517912428129, "grad_norm": 3.5161523780561565, "learning_rate": 2.4205987297179296e-07, "loss": 1.0659, "step": 91215 }, { "epoch": 2.6896653398201384, "grad_norm": 3.485545288260298, "learning_rate": 2.418325537518974e-07, "loss": 1.0175, "step": 91220 }, { "epoch": 2.689812767212148, "grad_norm": 3.3987691185352342, "learning_rate": 2.4160533776383497e-07, "loss": 1.0491, "step": 91225 }, { "epoch": 2.6899601946041574, "grad_norm": 3.29315042492299, "learning_rate": 2.4137822501429154e-07, "loss": 1.0099, "step": 91230 }, { "epoch": 2.690107621996167, "grad_norm": 3.5511551996038926, "learning_rate": 2.4115121550995086e-07, "loss": 1.0668, "step": 91235 }, { "epoch": 2.6902550493881763, "grad_norm": 3.4108577115796592, "learning_rate": 2.40924309257491e-07, "loss": 1.0637, "step": 91240 }, { "epoch": 2.6904024767801857, "grad_norm": 3.6474810469636263, "learning_rate": 2.406975062635916e-07, "loss": 1.07, "step": 91245 }, { "epoch": 2.690549904172195, "grad_norm": 3.6014597176556253, "learning_rate": 2.404708065349248e-07, "loss": 1.0301, "step": 91250 }, { "epoch": 2.6906973315642047, "grad_norm": 3.447690359646337, "learning_rate": 2.4024421007816156e-07, "loss": 1.0323, "step": 91255 }, { "epoch": 2.690844758956214, "grad_norm": 3.4898759523042173, "learning_rate": 2.4001771689997063e-07, "loss": 1.0191, "step": 91260 }, { "epoch": 2.6909921863482236, "grad_norm": 3.5684354156813356, "learning_rate": 2.397913270070159e-07, "loss": 1.0877, "step": 91265 }, { "epoch": 2.691139613740233, "grad_norm": 3.4278808202938684, "learning_rate": 2.3956504040596005e-07, "loss": 1.0156, "step": 91270 }, { "epoch": 2.6912870411322425, "grad_norm": 3.464238989018201, "learning_rate": 2.3933885710346175e-07, "loss": 1.0578, "step": 91275 }, { "epoch": 2.691434468524252, "grad_norm": 3.437537098849613, "learning_rate": 2.391127771061753e-07, "loss": 1.02, "step": 91280 }, { "epoch": 2.6915818959162614, "grad_norm": 3.5007068994814152, "learning_rate": 2.388868004207555e-07, "loss": 1.0247, "step": 91285 }, { "epoch": 2.6917293233082704, "grad_norm": 3.6235912295098385, "learning_rate": 2.386609270538502e-07, "loss": 1.0661, "step": 91290 }, { "epoch": 2.6918767507002803, "grad_norm": 3.4448321126590207, "learning_rate": 2.384351570121071e-07, "loss": 1.0492, "step": 91295 }, { "epoch": 2.6920241780922893, "grad_norm": 3.4275289723936724, "learning_rate": 2.382094903021692e-07, "loss": 1.0101, "step": 91300 }, { "epoch": 2.6921716054842992, "grad_norm": 3.3951397977712774, "learning_rate": 2.379839269306762e-07, "loss": 1.0144, "step": 91305 }, { "epoch": 2.6923190328763082, "grad_norm": 3.450151885710181, "learning_rate": 2.3775846690426776e-07, "loss": 1.0456, "step": 91310 }, { "epoch": 2.6924664602683177, "grad_norm": 3.5254477958143613, "learning_rate": 2.3753311022957567e-07, "loss": 1.0271, "step": 91315 }, { "epoch": 2.692613887660327, "grad_norm": 3.6099350004265864, "learning_rate": 2.373078569132338e-07, "loss": 1.0347, "step": 91320 }, { "epoch": 2.6927613150523366, "grad_norm": 3.5937446155793604, "learning_rate": 2.370827069618689e-07, "loss": 1.0153, "step": 91325 }, { "epoch": 2.692908742444346, "grad_norm": 3.4657034994946208, "learning_rate": 2.3685766038210654e-07, "loss": 1.0223, "step": 91330 }, { "epoch": 2.6930561698363555, "grad_norm": 3.498398005462558, "learning_rate": 2.3663271718056893e-07, "loss": 1.0376, "step": 91335 }, { "epoch": 2.693203597228365, "grad_norm": 3.6540589309924627, "learning_rate": 2.3640787736387573e-07, "loss": 1.0747, "step": 91340 }, { "epoch": 2.6933510246203745, "grad_norm": 3.484112407771303, "learning_rate": 2.3618314093864294e-07, "loss": 1.053, "step": 91345 }, { "epoch": 2.693498452012384, "grad_norm": 3.5415957317251534, "learning_rate": 2.35958507911484e-07, "loss": 1.029, "step": 91350 }, { "epoch": 2.6936458794043934, "grad_norm": 3.517454150701125, "learning_rate": 2.3573397828900736e-07, "loss": 1.0557, "step": 91355 }, { "epoch": 2.693793306796403, "grad_norm": 3.5152298074576085, "learning_rate": 2.3550955207782236e-07, "loss": 1.0358, "step": 91360 }, { "epoch": 2.6939407341884123, "grad_norm": 3.476419862448204, "learning_rate": 2.352852292845316e-07, "loss": 1.0277, "step": 91365 }, { "epoch": 2.6940881615804217, "grad_norm": 3.52050298486428, "learning_rate": 2.3506100991573602e-07, "loss": 1.1004, "step": 91370 }, { "epoch": 2.694235588972431, "grad_norm": 3.6681527960478237, "learning_rate": 2.3483689397803417e-07, "loss": 1.0393, "step": 91375 }, { "epoch": 2.6943830163644407, "grad_norm": 3.65470285268906, "learning_rate": 2.3461288147802074e-07, "loss": 1.0683, "step": 91380 }, { "epoch": 2.6945304437564497, "grad_norm": 3.5127149152844366, "learning_rate": 2.343889724222875e-07, "loss": 1.0233, "step": 91385 }, { "epoch": 2.6946778711484596, "grad_norm": 3.3614712438083663, "learning_rate": 2.3416516681742218e-07, "loss": 1.0578, "step": 91390 }, { "epoch": 2.6948252985404686, "grad_norm": 3.7719352084902127, "learning_rate": 2.3394146467001276e-07, "loss": 1.0778, "step": 91395 }, { "epoch": 2.6949727259324785, "grad_norm": 3.594232175985399, "learning_rate": 2.3371786598664027e-07, "loss": 1.0036, "step": 91400 }, { "epoch": 2.6951201533244875, "grad_norm": 3.5355536200044058, "learning_rate": 2.3349437077388443e-07, "loss": 1.0242, "step": 91405 }, { "epoch": 2.695267580716497, "grad_norm": 3.5106685258571093, "learning_rate": 2.332709790383221e-07, "loss": 1.048, "step": 91410 }, { "epoch": 2.6954150081085064, "grad_norm": 3.5734402343140372, "learning_rate": 2.3304769078652713e-07, "loss": 1.0451, "step": 91415 }, { "epoch": 2.695562435500516, "grad_norm": 3.5264546855833294, "learning_rate": 2.3282450602506972e-07, "loss": 1.0715, "step": 91420 }, { "epoch": 2.6957098628925253, "grad_norm": 3.562509745547193, "learning_rate": 2.3260142476051794e-07, "loss": 1.0427, "step": 91425 }, { "epoch": 2.695857290284535, "grad_norm": 3.4383623730594226, "learning_rate": 2.323784469994353e-07, "loss": 1.0326, "step": 91430 }, { "epoch": 2.6960047176765443, "grad_norm": 3.547414916532784, "learning_rate": 2.3215557274838316e-07, "loss": 1.0529, "step": 91435 }, { "epoch": 2.6961521450685537, "grad_norm": 3.5883045889450726, "learning_rate": 2.3193280201392052e-07, "loss": 1.0616, "step": 91440 }, { "epoch": 2.696299572460563, "grad_norm": 3.6878846217111048, "learning_rate": 2.3171013480260253e-07, "loss": 1.0468, "step": 91445 }, { "epoch": 2.6964469998525726, "grad_norm": 3.499855047924822, "learning_rate": 2.3148757112098102e-07, "loss": 1.0192, "step": 91450 }, { "epoch": 2.696594427244582, "grad_norm": 3.3948697680983106, "learning_rate": 2.3126511097560575e-07, "loss": 1.0553, "step": 91455 }, { "epoch": 2.6967418546365916, "grad_norm": 3.53666625401205, "learning_rate": 2.3104275437302272e-07, "loss": 1.0775, "step": 91460 }, { "epoch": 2.696889282028601, "grad_norm": 3.5874626926633164, "learning_rate": 2.3082050131977422e-07, "loss": 1.066, "step": 91465 }, { "epoch": 2.6970367094206105, "grad_norm": 3.4527708186323243, "learning_rate": 2.305983518224017e-07, "loss": 1.0246, "step": 91470 }, { "epoch": 2.69718413681262, "grad_norm": 3.6228455444410375, "learning_rate": 2.3037630588744073e-07, "loss": 1.0376, "step": 91475 }, { "epoch": 2.6973315642046294, "grad_norm": 3.3284352341918986, "learning_rate": 2.301543635214265e-07, "loss": 1.0368, "step": 91480 }, { "epoch": 2.697478991596639, "grad_norm": 3.417947462698185, "learning_rate": 2.299325247308888e-07, "loss": 0.997, "step": 91485 }, { "epoch": 2.697626418988648, "grad_norm": 3.3536449672065123, "learning_rate": 2.2971078952235658e-07, "loss": 1.0477, "step": 91490 }, { "epoch": 2.6977738463806578, "grad_norm": 3.623483989249558, "learning_rate": 2.294891579023542e-07, "loss": 1.0462, "step": 91495 }, { "epoch": 2.6979212737726668, "grad_norm": 3.5700550460378904, "learning_rate": 2.2926762987740227e-07, "loss": 1.0367, "step": 91500 }, { "epoch": 2.6979212737726668, "eval_loss": 1.0774365663528442, "eval_runtime": 4.1711, "eval_samples_per_second": 94.938, "eval_steps_per_second": 3.117, "step": 91500 }, { "epoch": 2.6980687011646762, "grad_norm": 3.569584697108717, "learning_rate": 2.2904620545402223e-07, "loss": 1.0224, "step": 91505 }, { "epoch": 2.6982161285566857, "grad_norm": 3.6003820701888563, "learning_rate": 2.2882488463872683e-07, "loss": 1.0536, "step": 91510 }, { "epoch": 2.698363555948695, "grad_norm": 3.620095649274621, "learning_rate": 2.286036674380304e-07, "loss": 1.0479, "step": 91515 }, { "epoch": 2.6985109833407046, "grad_norm": 3.551003046004778, "learning_rate": 2.2838255385844193e-07, "loss": 1.0871, "step": 91520 }, { "epoch": 2.698658410732714, "grad_norm": 3.454707659055616, "learning_rate": 2.2816154390646825e-07, "loss": 0.9994, "step": 91525 }, { "epoch": 2.6988058381247235, "grad_norm": 3.6400538946145393, "learning_rate": 2.2794063758861256e-07, "loss": 1.0643, "step": 91530 }, { "epoch": 2.698953265516733, "grad_norm": 3.6990781700209023, "learning_rate": 2.2771983491137586e-07, "loss": 1.073, "step": 91535 }, { "epoch": 2.6991006929087424, "grad_norm": 3.3688957868205875, "learning_rate": 2.274991358812542e-07, "loss": 0.9997, "step": 91540 }, { "epoch": 2.699248120300752, "grad_norm": 3.321137134240933, "learning_rate": 2.2727854050474368e-07, "loss": 1.0244, "step": 91545 }, { "epoch": 2.6993955476927614, "grad_norm": 3.4160074659296775, "learning_rate": 2.2705804878833405e-07, "loss": 1.0219, "step": 91550 }, { "epoch": 2.699542975084771, "grad_norm": 3.5811286452007645, "learning_rate": 2.268376607385139e-07, "loss": 1.0543, "step": 91555 }, { "epoch": 2.6996904024767803, "grad_norm": 3.650641896881809, "learning_rate": 2.2661737636176932e-07, "loss": 1.0242, "step": 91560 }, { "epoch": 2.6998378298687897, "grad_norm": 3.4378075607820904, "learning_rate": 2.263971956645805e-07, "loss": 1.0399, "step": 91565 }, { "epoch": 2.699985257260799, "grad_norm": 3.500589642096056, "learning_rate": 2.2617711865342936e-07, "loss": 1.0178, "step": 91570 }, { "epoch": 2.7001326846528086, "grad_norm": 3.7434314470119907, "learning_rate": 2.2595714533478862e-07, "loss": 1.0169, "step": 91575 }, { "epoch": 2.700280112044818, "grad_norm": 3.5619075051371047, "learning_rate": 2.2573727571513435e-07, "loss": 1.0529, "step": 91580 }, { "epoch": 2.700427539436827, "grad_norm": 3.5708215562832413, "learning_rate": 2.2551750980093433e-07, "loss": 1.0474, "step": 91585 }, { "epoch": 2.700574966828837, "grad_norm": 3.421898697435209, "learning_rate": 2.2529784759865585e-07, "loss": 1.0193, "step": 91590 }, { "epoch": 2.700722394220846, "grad_norm": 3.3679377823310728, "learning_rate": 2.2507828911476332e-07, "loss": 1.012, "step": 91595 }, { "epoch": 2.700869821612856, "grad_norm": 3.4194306717676324, "learning_rate": 2.2485883435571744e-07, "loss": 1.0393, "step": 91600 }, { "epoch": 2.701017249004865, "grad_norm": 3.461734027351525, "learning_rate": 2.246394833279751e-07, "loss": 1.0404, "step": 91605 }, { "epoch": 2.7011646763968744, "grad_norm": 3.641392736570126, "learning_rate": 2.244202360379924e-07, "loss": 1.042, "step": 91610 }, { "epoch": 2.701312103788884, "grad_norm": 3.325298548754303, "learning_rate": 2.2420109249221873e-07, "loss": 0.9942, "step": 91615 }, { "epoch": 2.7014595311808933, "grad_norm": 3.67977821903707, "learning_rate": 2.2398205269710523e-07, "loss": 1.0316, "step": 91620 }, { "epoch": 2.701606958572903, "grad_norm": 3.5336948163980115, "learning_rate": 2.2376311665909546e-07, "loss": 1.0376, "step": 91625 }, { "epoch": 2.7017543859649122, "grad_norm": 3.5804147492378564, "learning_rate": 2.235442843846326e-07, "loss": 1.0743, "step": 91630 }, { "epoch": 2.7019018133569217, "grad_norm": 3.4869375949235, "learning_rate": 2.2332555588015572e-07, "loss": 1.0282, "step": 91635 }, { "epoch": 2.702049240748931, "grad_norm": 3.4339554522875777, "learning_rate": 2.2310693115210168e-07, "loss": 1.056, "step": 91640 }, { "epoch": 2.7021966681409406, "grad_norm": 3.4061056590861316, "learning_rate": 2.228884102069041e-07, "loss": 0.9862, "step": 91645 }, { "epoch": 2.70234409553295, "grad_norm": 3.5683264237156895, "learning_rate": 2.2266999305099122e-07, "loss": 0.991, "step": 91650 }, { "epoch": 2.7024915229249595, "grad_norm": 3.5386293804570497, "learning_rate": 2.2245167969079286e-07, "loss": 1.017, "step": 91655 }, { "epoch": 2.702638950316969, "grad_norm": 3.6363847963014537, "learning_rate": 2.2223347013273095e-07, "loss": 1.0368, "step": 91660 }, { "epoch": 2.7027863777089784, "grad_norm": 3.574906100218632, "learning_rate": 2.220153643832279e-07, "loss": 1.053, "step": 91665 }, { "epoch": 2.702933805100988, "grad_norm": 3.5337112370675485, "learning_rate": 2.2179736244870101e-07, "loss": 1.0796, "step": 91670 }, { "epoch": 2.7030812324929974, "grad_norm": 3.4931526430928153, "learning_rate": 2.2157946433556607e-07, "loss": 1.038, "step": 91675 }, { "epoch": 2.7032286598850064, "grad_norm": 3.4038516738626168, "learning_rate": 2.213616700502337e-07, "loss": 1.0613, "step": 91680 }, { "epoch": 2.7033760872770163, "grad_norm": 3.545886455119823, "learning_rate": 2.2114397959911465e-07, "loss": 1.0688, "step": 91685 }, { "epoch": 2.7035235146690253, "grad_norm": 3.3957641318118137, "learning_rate": 2.2092639298861254e-07, "loss": 1.0295, "step": 91690 }, { "epoch": 2.703670942061035, "grad_norm": 3.3583705725805135, "learning_rate": 2.20708910225131e-07, "loss": 1.015, "step": 91695 }, { "epoch": 2.703818369453044, "grad_norm": 3.3718316130578767, "learning_rate": 2.204915313150703e-07, "loss": 1.0153, "step": 91700 }, { "epoch": 2.7039657968450537, "grad_norm": 3.40291549800441, "learning_rate": 2.202742562648262e-07, "loss": 1.0475, "step": 91705 }, { "epoch": 2.704113224237063, "grad_norm": 3.4028616903891256, "learning_rate": 2.2005708508079272e-07, "loss": 1.061, "step": 91710 }, { "epoch": 2.7042606516290726, "grad_norm": 3.261636556150977, "learning_rate": 2.1984001776936018e-07, "loss": 1.0381, "step": 91715 }, { "epoch": 2.704408079021082, "grad_norm": 3.449056997162082, "learning_rate": 2.1962305433691682e-07, "loss": 1.0459, "step": 91720 }, { "epoch": 2.7045555064130915, "grad_norm": 3.50168563183126, "learning_rate": 2.1940619478984543e-07, "loss": 1.0042, "step": 91725 }, { "epoch": 2.704702933805101, "grad_norm": 3.4516530059375947, "learning_rate": 2.191894391345292e-07, "loss": 1.0408, "step": 91730 }, { "epoch": 2.7048503611971104, "grad_norm": 3.529604760689064, "learning_rate": 2.1897278737734477e-07, "loss": 1.0625, "step": 91735 }, { "epoch": 2.70499778858912, "grad_norm": 3.493420505252865, "learning_rate": 2.1875623952466825e-07, "loss": 1.0755, "step": 91740 }, { "epoch": 2.7051452159811293, "grad_norm": 3.5604015770551687, "learning_rate": 2.1853979558287164e-07, "loss": 1.0229, "step": 91745 }, { "epoch": 2.705292643373139, "grad_norm": 3.500620617512761, "learning_rate": 2.1832345555832396e-07, "loss": 1.0359, "step": 91750 }, { "epoch": 2.7054400707651483, "grad_norm": 3.3669909604008628, "learning_rate": 2.1810721945739187e-07, "loss": 1.0462, "step": 91755 }, { "epoch": 2.7055874981571577, "grad_norm": 3.465706395453068, "learning_rate": 2.178910872864369e-07, "loss": 1.0339, "step": 91760 }, { "epoch": 2.705734925549167, "grad_norm": 3.532619806043433, "learning_rate": 2.1767505905182102e-07, "loss": 1.0266, "step": 91765 }, { "epoch": 2.7058823529411766, "grad_norm": 3.532576872298896, "learning_rate": 2.174591347598992e-07, "loss": 1.0877, "step": 91770 }, { "epoch": 2.7060297803331856, "grad_norm": 3.6201464853722105, "learning_rate": 2.1724331441702633e-07, "loss": 1.047, "step": 91775 }, { "epoch": 2.7061772077251955, "grad_norm": 3.40512514511389, "learning_rate": 2.1702759802955317e-07, "loss": 1.0102, "step": 91780 }, { "epoch": 2.7063246351172046, "grad_norm": 3.5060121981015264, "learning_rate": 2.168119856038267e-07, "loss": 1.06, "step": 91785 }, { "epoch": 2.7064720625092145, "grad_norm": 3.5177019650895716, "learning_rate": 2.165964771461923e-07, "loss": 1.0009, "step": 91790 }, { "epoch": 2.7066194899012235, "grad_norm": 3.5768799237206106, "learning_rate": 2.1638107266299154e-07, "loss": 1.0346, "step": 91795 }, { "epoch": 2.706766917293233, "grad_norm": 3.584243749789487, "learning_rate": 2.1616577216056184e-07, "loss": 1.0668, "step": 91800 }, { "epoch": 2.7069143446852424, "grad_norm": 3.516341888550074, "learning_rate": 2.1595057564524068e-07, "loss": 1.0372, "step": 91805 }, { "epoch": 2.707061772077252, "grad_norm": 3.494535108285233, "learning_rate": 2.1573548312335878e-07, "loss": 1.0711, "step": 91810 }, { "epoch": 2.7072091994692613, "grad_norm": 3.4613121253992967, "learning_rate": 2.1552049460124567e-07, "loss": 1.0817, "step": 91815 }, { "epoch": 2.7073566268612708, "grad_norm": 3.3575610201237787, "learning_rate": 2.1530561008522882e-07, "loss": 1.051, "step": 91820 }, { "epoch": 2.7075040542532802, "grad_norm": 3.4175769961691325, "learning_rate": 2.1509082958162938e-07, "loss": 1.0248, "step": 91825 }, { "epoch": 2.7076514816452897, "grad_norm": 3.6100319042977778, "learning_rate": 2.148761530967698e-07, "loss": 1.071, "step": 91830 }, { "epoch": 2.707798909037299, "grad_norm": 3.4017820076516685, "learning_rate": 2.1466158063696505e-07, "loss": 0.9817, "step": 91835 }, { "epoch": 2.7079463364293086, "grad_norm": 3.699387200059798, "learning_rate": 2.144471122085309e-07, "loss": 1.0459, "step": 91840 }, { "epoch": 2.708093763821318, "grad_norm": 3.4332782833267297, "learning_rate": 2.1423274781777774e-07, "loss": 1.0421, "step": 91845 }, { "epoch": 2.7082411912133275, "grad_norm": 3.4931202742203276, "learning_rate": 2.14018487471013e-07, "loss": 1.0382, "step": 91850 }, { "epoch": 2.708388618605337, "grad_norm": 3.4114619571635143, "learning_rate": 2.1380433117454203e-07, "loss": 1.0123, "step": 91855 }, { "epoch": 2.7085360459973464, "grad_norm": 3.500478946504835, "learning_rate": 2.135902789346665e-07, "loss": 1.0461, "step": 91860 }, { "epoch": 2.708683473389356, "grad_norm": 3.495399753260667, "learning_rate": 2.1337633075768553e-07, "loss": 1.0086, "step": 91865 }, { "epoch": 2.708830900781365, "grad_norm": 3.553272794710932, "learning_rate": 2.131624866498945e-07, "loss": 1.0023, "step": 91870 }, { "epoch": 2.708978328173375, "grad_norm": 3.388759602375401, "learning_rate": 2.12948746617585e-07, "loss": 1.0076, "step": 91875 }, { "epoch": 2.709125755565384, "grad_norm": 3.561925288367718, "learning_rate": 2.1273511066704872e-07, "loss": 1.0442, "step": 91880 }, { "epoch": 2.7092731829573937, "grad_norm": 3.494885668162279, "learning_rate": 2.125215788045702e-07, "loss": 1.0423, "step": 91885 }, { "epoch": 2.7094206103494027, "grad_norm": 3.5600659816616163, "learning_rate": 2.12308151036434e-07, "loss": 1.0792, "step": 91890 }, { "epoch": 2.709568037741412, "grad_norm": 3.620505669354797, "learning_rate": 2.1209482736891967e-07, "loss": 1.0752, "step": 91895 }, { "epoch": 2.7097154651334217, "grad_norm": 3.5812871029099433, "learning_rate": 2.118816078083051e-07, "loss": 1.0426, "step": 91900 }, { "epoch": 2.709862892525431, "grad_norm": 3.5536425495142856, "learning_rate": 2.1166849236086486e-07, "loss": 1.0074, "step": 91905 }, { "epoch": 2.7100103199174406, "grad_norm": 3.553350414524517, "learning_rate": 2.1145548103286812e-07, "loss": 1.0589, "step": 91910 }, { "epoch": 2.71015774730945, "grad_norm": 3.577141262918799, "learning_rate": 2.1124257383058608e-07, "loss": 1.047, "step": 91915 }, { "epoch": 2.7103051747014595, "grad_norm": 3.614806149606658, "learning_rate": 2.1102977076028123e-07, "loss": 1.0447, "step": 91920 }, { "epoch": 2.710452602093469, "grad_norm": 3.6255119041540134, "learning_rate": 2.1081707182821653e-07, "loss": 0.9899, "step": 91925 }, { "epoch": 2.7106000294854784, "grad_norm": 3.5543729621895372, "learning_rate": 2.1060447704065108e-07, "loss": 1.0403, "step": 91930 }, { "epoch": 2.710747456877488, "grad_norm": 3.571269831340679, "learning_rate": 2.1039198640384032e-07, "loss": 1.0051, "step": 91935 }, { "epoch": 2.7108948842694973, "grad_norm": 3.583163208920779, "learning_rate": 2.1017959992403676e-07, "loss": 1.0409, "step": 91940 }, { "epoch": 2.7110423116615068, "grad_norm": 3.475544068862589, "learning_rate": 2.0996731760749163e-07, "loss": 1.0078, "step": 91945 }, { "epoch": 2.7111897390535162, "grad_norm": 3.396326360522975, "learning_rate": 2.0975513946044953e-07, "loss": 1.0128, "step": 91950 }, { "epoch": 2.7113371664455257, "grad_norm": 3.404287606722469, "learning_rate": 2.0954306548915545e-07, "loss": 1.0261, "step": 91955 }, { "epoch": 2.711484593837535, "grad_norm": 3.528673756717496, "learning_rate": 2.09331095699849e-07, "loss": 1.0319, "step": 91960 }, { "epoch": 2.7116320212295446, "grad_norm": 3.425577970340443, "learning_rate": 2.0911923009876806e-07, "loss": 1.0099, "step": 91965 }, { "epoch": 2.711779448621554, "grad_norm": 3.6039820741852444, "learning_rate": 2.089074686921473e-07, "loss": 1.0021, "step": 91970 }, { "epoch": 2.711926876013563, "grad_norm": 3.427045689538373, "learning_rate": 2.0869581148621794e-07, "loss": 1.0164, "step": 91975 }, { "epoch": 2.712074303405573, "grad_norm": 3.4609801410341423, "learning_rate": 2.0848425848720833e-07, "loss": 1.0716, "step": 91980 }, { "epoch": 2.712221730797582, "grad_norm": 3.531847427068479, "learning_rate": 2.0827280970134227e-07, "loss": 1.0618, "step": 91985 }, { "epoch": 2.712369158189592, "grad_norm": 3.5274428759813383, "learning_rate": 2.0806146513484433e-07, "loss": 1.071, "step": 91990 }, { "epoch": 2.712516585581601, "grad_norm": 3.384557916930945, "learning_rate": 2.0785022479393122e-07, "loss": 1.0365, "step": 91995 }, { "epoch": 2.7126640129736104, "grad_norm": 3.470203140949488, "learning_rate": 2.0763908868482047e-07, "loss": 1.0551, "step": 92000 }, { "epoch": 2.7126640129736104, "eval_loss": 1.0771052837371826, "eval_runtime": 4.3509, "eval_samples_per_second": 91.015, "eval_steps_per_second": 2.988, "step": 92000 }, { "epoch": 2.71281144036562, "grad_norm": 3.334423364885638, "learning_rate": 2.074280568137242e-07, "loss": 1.0407, "step": 92005 }, { "epoch": 2.7129588677576293, "grad_norm": 3.3746743006371074, "learning_rate": 2.0721712918685283e-07, "loss": 1.0389, "step": 92010 }, { "epoch": 2.7131062951496387, "grad_norm": 3.487183888332113, "learning_rate": 2.070063058104131e-07, "loss": 1.0054, "step": 92015 }, { "epoch": 2.713253722541648, "grad_norm": 3.4804194887758366, "learning_rate": 2.0679558669060754e-07, "loss": 1.1057, "step": 92020 }, { "epoch": 2.7134011499336577, "grad_norm": 3.5392364908630207, "learning_rate": 2.0658497183363905e-07, "loss": 1.0796, "step": 92025 }, { "epoch": 2.713548577325667, "grad_norm": 3.540386926665232, "learning_rate": 2.0637446124570316e-07, "loss": 0.9904, "step": 92030 }, { "epoch": 2.7136960047176766, "grad_norm": 3.7658705910155517, "learning_rate": 2.0616405493299488e-07, "loss": 1.0635, "step": 92035 }, { "epoch": 2.713843432109686, "grad_norm": 3.5097020325094777, "learning_rate": 2.0595375290170638e-07, "loss": 1.0329, "step": 92040 }, { "epoch": 2.7139908595016955, "grad_norm": 3.6909944745261303, "learning_rate": 2.0574355515802556e-07, "loss": 1.0098, "step": 92045 }, { "epoch": 2.714138286893705, "grad_norm": 3.626533362690053, "learning_rate": 2.0553346170813754e-07, "loss": 1.0192, "step": 92050 }, { "epoch": 2.7142857142857144, "grad_norm": 3.571083293059395, "learning_rate": 2.0532347255822522e-07, "loss": 1.0471, "step": 92055 }, { "epoch": 2.714433141677724, "grad_norm": 3.7351691347606693, "learning_rate": 2.0511358771446623e-07, "loss": 1.0492, "step": 92060 }, { "epoch": 2.7145805690697333, "grad_norm": 3.477005477362284, "learning_rate": 2.0490380718303933e-07, "loss": 1.0276, "step": 92065 }, { "epoch": 2.7147279964617423, "grad_norm": 3.573934482512572, "learning_rate": 2.0469413097011503e-07, "loss": 1.0392, "step": 92070 }, { "epoch": 2.7148754238537522, "grad_norm": 3.3366012836339833, "learning_rate": 2.044845590818642e-07, "loss": 0.9992, "step": 92075 }, { "epoch": 2.7150228512457613, "grad_norm": 3.6082657529068287, "learning_rate": 2.0427509152445443e-07, "loss": 1.0158, "step": 92080 }, { "epoch": 2.715170278637771, "grad_norm": 3.5659504779459925, "learning_rate": 2.0406572830404784e-07, "loss": 1.0323, "step": 92085 }, { "epoch": 2.71531770602978, "grad_norm": 3.6756999857833748, "learning_rate": 2.0385646942680704e-07, "loss": 1.0507, "step": 92090 }, { "epoch": 2.7154651334217896, "grad_norm": 3.496437603085066, "learning_rate": 2.0364731489888836e-07, "loss": 1.0176, "step": 92095 }, { "epoch": 2.715612560813799, "grad_norm": 3.617865038754552, "learning_rate": 2.0343826472644767e-07, "loss": 1.0263, "step": 92100 }, { "epoch": 2.7157599882058086, "grad_norm": 3.5535756401410077, "learning_rate": 2.0322931891563506e-07, "loss": 1.0364, "step": 92105 }, { "epoch": 2.715907415597818, "grad_norm": 3.535097569759605, "learning_rate": 2.030204774725998e-07, "loss": 1.0606, "step": 92110 }, { "epoch": 2.7160548429898275, "grad_norm": 3.4367299614867246, "learning_rate": 2.0281174040348739e-07, "loss": 1.0062, "step": 92115 }, { "epoch": 2.716202270381837, "grad_norm": 3.656976256442396, "learning_rate": 2.0260310771443997e-07, "loss": 1.0841, "step": 92120 }, { "epoch": 2.7163496977738464, "grad_norm": 3.544557160129143, "learning_rate": 2.023945794115968e-07, "loss": 1.054, "step": 92125 }, { "epoch": 2.716497125165856, "grad_norm": 3.541235509973585, "learning_rate": 2.0218615550109467e-07, "loss": 1.0856, "step": 92130 }, { "epoch": 2.7166445525578653, "grad_norm": 3.5593914002417746, "learning_rate": 2.019778359890649e-07, "loss": 1.0105, "step": 92135 }, { "epoch": 2.7167919799498748, "grad_norm": 3.7610349860653103, "learning_rate": 2.017696208816401e-07, "loss": 1.0616, "step": 92140 }, { "epoch": 2.716939407341884, "grad_norm": 3.5212107762418676, "learning_rate": 2.0156151018494494e-07, "loss": 1.0207, "step": 92145 }, { "epoch": 2.7170868347338937, "grad_norm": 3.5848157657187723, "learning_rate": 2.0135350390510453e-07, "loss": 1.026, "step": 92150 }, { "epoch": 2.717234262125903, "grad_norm": 3.6423947220435164, "learning_rate": 2.0114560204823898e-07, "loss": 1.0692, "step": 92155 }, { "epoch": 2.7173816895179126, "grad_norm": 3.447747230744012, "learning_rate": 2.0093780462046672e-07, "loss": 1.0417, "step": 92160 }, { "epoch": 2.7175291169099216, "grad_norm": 3.558237242651293, "learning_rate": 2.0073011162790283e-07, "loss": 0.9812, "step": 92165 }, { "epoch": 2.7176765443019315, "grad_norm": 3.5300053850590922, "learning_rate": 2.0052252307665706e-07, "loss": 1.0348, "step": 92170 }, { "epoch": 2.7178239716939405, "grad_norm": 3.52313191226921, "learning_rate": 2.003150389728399e-07, "loss": 1.0488, "step": 92175 }, { "epoch": 2.7179713990859504, "grad_norm": 3.4892900663498763, "learning_rate": 2.0010765932255606e-07, "loss": 1.0546, "step": 92180 }, { "epoch": 2.7181188264779594, "grad_norm": 3.5532312862806155, "learning_rate": 1.9990038413190733e-07, "loss": 1.0469, "step": 92185 }, { "epoch": 2.718266253869969, "grad_norm": 3.5139564503741862, "learning_rate": 1.9969321340699384e-07, "loss": 1.0485, "step": 92190 }, { "epoch": 2.7184136812619784, "grad_norm": 3.5510167337886047, "learning_rate": 1.9948614715391194e-07, "loss": 1.0256, "step": 92195 }, { "epoch": 2.718561108653988, "grad_norm": 3.559930866554887, "learning_rate": 1.9927918537875386e-07, "loss": 1.0781, "step": 92200 }, { "epoch": 2.7187085360459973, "grad_norm": 3.644137756277856, "learning_rate": 1.9907232808761097e-07, "loss": 1.0561, "step": 92205 }, { "epoch": 2.7188559634380067, "grad_norm": 3.5701878898019377, "learning_rate": 1.9886557528656924e-07, "loss": 1.0125, "step": 92210 }, { "epoch": 2.719003390830016, "grad_norm": 3.5448776558157924, "learning_rate": 1.9865892698171254e-07, "loss": 1.007, "step": 92215 }, { "epoch": 2.7191508182220256, "grad_norm": 3.42557594406655, "learning_rate": 1.9845238317912226e-07, "loss": 1.0382, "step": 92220 }, { "epoch": 2.719298245614035, "grad_norm": 3.4635089929208953, "learning_rate": 1.9824594388487604e-07, "loss": 1.0654, "step": 92225 }, { "epoch": 2.7194456730060446, "grad_norm": 3.5521036561584602, "learning_rate": 1.9803960910504903e-07, "loss": 1.0667, "step": 92230 }, { "epoch": 2.719593100398054, "grad_norm": 3.5884301313778453, "learning_rate": 1.9783337884571178e-07, "loss": 1.0851, "step": 92235 }, { "epoch": 2.7197405277900635, "grad_norm": 3.4796445953018087, "learning_rate": 1.9762725311293444e-07, "loss": 0.9785, "step": 92240 }, { "epoch": 2.719887955182073, "grad_norm": 3.6418402315954546, "learning_rate": 1.974212319127805e-07, "loss": 1.0517, "step": 92245 }, { "epoch": 2.7200353825740824, "grad_norm": 3.500926960552224, "learning_rate": 1.9721531525131427e-07, "loss": 1.008, "step": 92250 }, { "epoch": 2.720182809966092, "grad_norm": 3.6484870202212885, "learning_rate": 1.9700950313459379e-07, "loss": 1.0068, "step": 92255 }, { "epoch": 2.720330237358101, "grad_norm": 3.3982986328260014, "learning_rate": 1.9680379556867592e-07, "loss": 1.0394, "step": 92260 }, { "epoch": 2.7204776647501108, "grad_norm": 3.4506250949548107, "learning_rate": 1.9659819255961331e-07, "loss": 1.0409, "step": 92265 }, { "epoch": 2.72062509214212, "grad_norm": 3.585370974006672, "learning_rate": 1.9639269411345695e-07, "loss": 1.0229, "step": 92270 }, { "epoch": 2.7207725195341297, "grad_norm": 3.399916516766973, "learning_rate": 1.9618730023625367e-07, "loss": 1.0086, "step": 92275 }, { "epoch": 2.7209199469261387, "grad_norm": 3.4219463388213276, "learning_rate": 1.9598201093404573e-07, "loss": 1.0502, "step": 92280 }, { "epoch": 2.721067374318148, "grad_norm": 3.5672399002057453, "learning_rate": 1.95776826212877e-07, "loss": 1.0381, "step": 92285 }, { "epoch": 2.7212148017101576, "grad_norm": 3.332295497683481, "learning_rate": 1.9557174607878272e-07, "loss": 1.0438, "step": 92290 }, { "epoch": 2.721362229102167, "grad_norm": 3.6223430949976425, "learning_rate": 1.9536677053779886e-07, "loss": 1.0751, "step": 92295 }, { "epoch": 2.7215096564941765, "grad_norm": 3.5844390666316337, "learning_rate": 1.9516189959595644e-07, "loss": 1.0906, "step": 92300 }, { "epoch": 2.721657083886186, "grad_norm": 3.315937913516426, "learning_rate": 1.9495713325928435e-07, "loss": 1.0124, "step": 92305 }, { "epoch": 2.7218045112781954, "grad_norm": 3.481115490051893, "learning_rate": 1.9475247153380825e-07, "loss": 1.037, "step": 92310 }, { "epoch": 2.721951938670205, "grad_norm": 3.5063081983462, "learning_rate": 1.9454791442555076e-07, "loss": 1.0147, "step": 92315 }, { "epoch": 2.7220993660622144, "grad_norm": 3.339301334941187, "learning_rate": 1.943434619405296e-07, "loss": 1.012, "step": 92320 }, { "epoch": 2.722246793454224, "grad_norm": 3.535093611155302, "learning_rate": 1.9413911408476328e-07, "loss": 1.0184, "step": 92325 }, { "epoch": 2.7223942208462333, "grad_norm": 3.5848769361805384, "learning_rate": 1.9393487086426366e-07, "loss": 1.0216, "step": 92330 }, { "epoch": 2.7225416482382427, "grad_norm": 3.57038457554088, "learning_rate": 1.9373073228504097e-07, "loss": 1.0277, "step": 92335 }, { "epoch": 2.722689075630252, "grad_norm": 3.546190405650386, "learning_rate": 1.9352669835310284e-07, "loss": 1.0821, "step": 92340 }, { "epoch": 2.7228365030222617, "grad_norm": 3.7441004985618904, "learning_rate": 1.9332276907445157e-07, "loss": 1.1097, "step": 92345 }, { "epoch": 2.722983930414271, "grad_norm": 3.6131042445290293, "learning_rate": 1.9311894445508987e-07, "loss": 1.0725, "step": 92350 }, { "epoch": 2.7231313578062806, "grad_norm": 3.347344722718, "learning_rate": 1.9291522450101421e-07, "loss": 1.0389, "step": 92355 }, { "epoch": 2.72327878519829, "grad_norm": 3.4382863429098, "learning_rate": 1.927116092182206e-07, "loss": 1.0085, "step": 92360 }, { "epoch": 2.723426212590299, "grad_norm": 3.6984246980050917, "learning_rate": 1.9250809861269925e-07, "loss": 1.0584, "step": 92365 }, { "epoch": 2.723573639982309, "grad_norm": 3.4323738128071555, "learning_rate": 1.9230469269043955e-07, "loss": 0.9845, "step": 92370 }, { "epoch": 2.723721067374318, "grad_norm": 3.60715768661869, "learning_rate": 1.921013914574267e-07, "loss": 1.0634, "step": 92375 }, { "epoch": 2.7238684947663274, "grad_norm": 3.5080277902493537, "learning_rate": 1.91898194919643e-07, "loss": 1.0307, "step": 92380 }, { "epoch": 2.724015922158337, "grad_norm": 3.3131950849490885, "learning_rate": 1.9169510308306826e-07, "loss": 0.9932, "step": 92385 }, { "epoch": 2.7241633495503463, "grad_norm": 3.50652999776483, "learning_rate": 1.914921159536781e-07, "loss": 1.0397, "step": 92390 }, { "epoch": 2.724310776942356, "grad_norm": 3.3843721725229363, "learning_rate": 1.9128923353744528e-07, "loss": 1.0353, "step": 92395 }, { "epoch": 2.7244582043343653, "grad_norm": 3.419697051067193, "learning_rate": 1.910864558403412e-07, "loss": 1.023, "step": 92400 }, { "epoch": 2.7246056317263747, "grad_norm": 3.4762412042709734, "learning_rate": 1.9088378286833117e-07, "loss": 1.0305, "step": 92405 }, { "epoch": 2.724753059118384, "grad_norm": 3.487854688280919, "learning_rate": 1.9068121462738078e-07, "loss": 1.0279, "step": 92410 }, { "epoch": 2.7249004865103936, "grad_norm": 3.389235132433908, "learning_rate": 1.9047875112344946e-07, "loss": 0.9997, "step": 92415 }, { "epoch": 2.725047913902403, "grad_norm": 3.509949461152506, "learning_rate": 1.9027639236249534e-07, "loss": 1.057, "step": 92420 }, { "epoch": 2.7251953412944125, "grad_norm": 3.4148166488188227, "learning_rate": 1.9007413835047408e-07, "loss": 1.0467, "step": 92425 }, { "epoch": 2.725342768686422, "grad_norm": 3.4925198172891925, "learning_rate": 1.898719890933351e-07, "loss": 1.0432, "step": 92430 }, { "epoch": 2.7254901960784315, "grad_norm": 3.395044647919441, "learning_rate": 1.8966994459702901e-07, "loss": 1.0189, "step": 92435 }, { "epoch": 2.725637623470441, "grad_norm": 3.4418341514402746, "learning_rate": 1.8946800486749946e-07, "loss": 1.0688, "step": 92440 }, { "epoch": 2.7257850508624504, "grad_norm": 3.5597627401675256, "learning_rate": 1.8926616991069e-07, "loss": 1.0625, "step": 92445 }, { "epoch": 2.72593247825446, "grad_norm": 3.5116246574104175, "learning_rate": 1.8906443973253917e-07, "loss": 1.0133, "step": 92450 }, { "epoch": 2.7260799056464693, "grad_norm": 3.6210101759935944, "learning_rate": 1.8886281433898354e-07, "loss": 1.0371, "step": 92455 }, { "epoch": 2.7262273330384783, "grad_norm": 3.539935570749934, "learning_rate": 1.886612937359558e-07, "loss": 1.0338, "step": 92460 }, { "epoch": 2.726374760430488, "grad_norm": 3.476261700441954, "learning_rate": 1.884598779293867e-07, "loss": 1.0683, "step": 92465 }, { "epoch": 2.7265221878224972, "grad_norm": 3.5603495954202233, "learning_rate": 1.882585669252018e-07, "loss": 1.0808, "step": 92470 }, { "epoch": 2.726669615214507, "grad_norm": 3.5230334954549543, "learning_rate": 1.880573607293265e-07, "loss": 1.0251, "step": 92475 }, { "epoch": 2.726817042606516, "grad_norm": 3.5554809110514056, "learning_rate": 1.8785625934768013e-07, "loss": 1.0351, "step": 92480 }, { "epoch": 2.7269644699985256, "grad_norm": 3.5122528407216658, "learning_rate": 1.8765526278618093e-07, "loss": 1.0611, "step": 92485 }, { "epoch": 2.727111897390535, "grad_norm": 3.6113301309782257, "learning_rate": 1.8745437105074328e-07, "loss": 1.0282, "step": 92490 }, { "epoch": 2.7272593247825445, "grad_norm": 3.431086155028121, "learning_rate": 1.8725358414727874e-07, "loss": 1.0638, "step": 92495 }, { "epoch": 2.727406752174554, "grad_norm": 3.620854098711792, "learning_rate": 1.8705290208169634e-07, "loss": 1.0576, "step": 92500 }, { "epoch": 2.727406752174554, "eval_loss": 1.0772241353988647, "eval_runtime": 4.1582, "eval_samples_per_second": 95.233, "eval_steps_per_second": 3.126, "step": 92500 }, { "epoch": 2.7275541795665634, "grad_norm": 3.5680771633538626, "learning_rate": 1.8685232485989966e-07, "loss": 1.0421, "step": 92505 }, { "epoch": 2.727701606958573, "grad_norm": 3.5905540414326804, "learning_rate": 1.8665185248779276e-07, "loss": 1.0807, "step": 92510 }, { "epoch": 2.7278490343505823, "grad_norm": 3.5385453205147535, "learning_rate": 1.8645148497127382e-07, "loss": 1.0156, "step": 92515 }, { "epoch": 2.727996461742592, "grad_norm": 3.3452656799681537, "learning_rate": 1.8625122231623896e-07, "loss": 1.0222, "step": 92520 }, { "epoch": 2.7281438891346013, "grad_norm": 3.6430968938753736, "learning_rate": 1.8605106452858138e-07, "loss": 1.0581, "step": 92525 }, { "epoch": 2.7282913165266107, "grad_norm": 3.4592871683261204, "learning_rate": 1.8585101161419092e-07, "loss": 1.0436, "step": 92530 }, { "epoch": 2.72843874391862, "grad_norm": 3.7110747442984593, "learning_rate": 1.856510635789542e-07, "loss": 1.0734, "step": 92535 }, { "epoch": 2.7285861713106296, "grad_norm": 3.674030972119026, "learning_rate": 1.8545122042875478e-07, "loss": 1.0766, "step": 92540 }, { "epoch": 2.728733598702639, "grad_norm": 3.4553311296471563, "learning_rate": 1.8525148216947424e-07, "loss": 1.0437, "step": 92545 }, { "epoch": 2.7288810260946486, "grad_norm": 3.500825862904269, "learning_rate": 1.8505184880698868e-07, "loss": 1.0595, "step": 92550 }, { "epoch": 2.7290284534866576, "grad_norm": 3.595631680483067, "learning_rate": 1.8485232034717344e-07, "loss": 1.0262, "step": 92555 }, { "epoch": 2.7291758808786675, "grad_norm": 3.4509238019201613, "learning_rate": 1.8465289679589963e-07, "loss": 1.0396, "step": 92560 }, { "epoch": 2.7293233082706765, "grad_norm": 3.6445115129182892, "learning_rate": 1.8445357815903506e-07, "loss": 1.0126, "step": 92565 }, { "epoch": 2.7294707356626864, "grad_norm": 3.504643756205321, "learning_rate": 1.842543644424459e-07, "loss": 1.0455, "step": 92570 }, { "epoch": 2.7296181630546954, "grad_norm": 3.4490699251951367, "learning_rate": 1.8405525565199407e-07, "loss": 1.0387, "step": 92575 }, { "epoch": 2.729765590446705, "grad_norm": 3.427213111682574, "learning_rate": 1.83856251793537e-07, "loss": 0.9877, "step": 92580 }, { "epoch": 2.7299130178387143, "grad_norm": 3.5703145323156487, "learning_rate": 1.8365735287293336e-07, "loss": 0.9972, "step": 92585 }, { "epoch": 2.7300604452307238, "grad_norm": 3.53741163297427, "learning_rate": 1.8345855889603345e-07, "loss": 1.0217, "step": 92590 }, { "epoch": 2.7302078726227332, "grad_norm": 3.379481205048978, "learning_rate": 1.8325986986868797e-07, "loss": 1.0206, "step": 92595 }, { "epoch": 2.7303553000147427, "grad_norm": 3.4625665412298496, "learning_rate": 1.8306128579674397e-07, "loss": 1.0161, "step": 92600 }, { "epoch": 2.730502727406752, "grad_norm": 3.5301942918970703, "learning_rate": 1.8286280668604465e-07, "loss": 1.0365, "step": 92605 }, { "epoch": 2.7306501547987616, "grad_norm": 3.523729056444909, "learning_rate": 1.826644325424308e-07, "loss": 1.0306, "step": 92610 }, { "epoch": 2.730797582190771, "grad_norm": 3.5419194376176732, "learning_rate": 1.824661633717385e-07, "loss": 1.0511, "step": 92615 }, { "epoch": 2.7309450095827805, "grad_norm": 3.429301876684766, "learning_rate": 1.8226799917980402e-07, "loss": 1.0352, "step": 92620 }, { "epoch": 2.73109243697479, "grad_norm": 3.605939507804871, "learning_rate": 1.8206993997245722e-07, "loss": 1.0482, "step": 92625 }, { "epoch": 2.7312398643667994, "grad_norm": 3.6522698036483585, "learning_rate": 1.8187198575552636e-07, "loss": 1.022, "step": 92630 }, { "epoch": 2.731387291758809, "grad_norm": 3.4752363887274687, "learning_rate": 1.8167413653483678e-07, "loss": 1.0568, "step": 92635 }, { "epoch": 2.7315347191508184, "grad_norm": 3.597153702680183, "learning_rate": 1.814763923162105e-07, "loss": 1.0198, "step": 92640 }, { "epoch": 2.731682146542828, "grad_norm": 3.69490012626922, "learning_rate": 1.8127875310546619e-07, "loss": 1.0965, "step": 92645 }, { "epoch": 2.731829573934837, "grad_norm": 3.6544781491282112, "learning_rate": 1.8108121890842002e-07, "loss": 1.0538, "step": 92650 }, { "epoch": 2.7319770013268467, "grad_norm": 3.518477470899383, "learning_rate": 1.808837897308832e-07, "loss": 1.0448, "step": 92655 }, { "epoch": 2.7321244287188557, "grad_norm": 3.540074601016933, "learning_rate": 1.8068646557866734e-07, "loss": 1.014, "step": 92660 }, { "epoch": 2.7322718561108656, "grad_norm": 3.5202038163916503, "learning_rate": 1.8048924645757693e-07, "loss": 1.0017, "step": 92665 }, { "epoch": 2.7324192835028747, "grad_norm": 3.7033352682873284, "learning_rate": 1.8029213237341774e-07, "loss": 1.0737, "step": 92670 }, { "epoch": 2.732566710894884, "grad_norm": 3.511942629568736, "learning_rate": 1.8009512333198807e-07, "loss": 1.0483, "step": 92675 }, { "epoch": 2.7327141382868936, "grad_norm": 3.485691147243461, "learning_rate": 1.7989821933908538e-07, "loss": 1.079, "step": 92680 }, { "epoch": 2.732861565678903, "grad_norm": 3.429637005943698, "learning_rate": 1.797014204005054e-07, "loss": 1.0207, "step": 92685 }, { "epoch": 2.7330089930709125, "grad_norm": 3.602337831933517, "learning_rate": 1.7950472652203647e-07, "loss": 1.0611, "step": 92690 }, { "epoch": 2.733156420462922, "grad_norm": 3.4147257294933655, "learning_rate": 1.7930813770946934e-07, "loss": 1.0523, "step": 92695 }, { "epoch": 2.7333038478549314, "grad_norm": 3.4851684189826195, "learning_rate": 1.7911165396858692e-07, "loss": 1.0562, "step": 92700 }, { "epoch": 2.733451275246941, "grad_norm": 3.349054045682793, "learning_rate": 1.7891527530517163e-07, "loss": 1.0058, "step": 92705 }, { "epoch": 2.7335987026389503, "grad_norm": 3.6807436945592014, "learning_rate": 1.7871900172500219e-07, "loss": 1.0293, "step": 92710 }, { "epoch": 2.73374613003096, "grad_norm": 3.472969774324728, "learning_rate": 1.7852283323385398e-07, "loss": 1.0044, "step": 92715 }, { "epoch": 2.7338935574229692, "grad_norm": 3.605399048550241, "learning_rate": 1.7832676983749947e-07, "loss": 1.0547, "step": 92720 }, { "epoch": 2.7340409848149787, "grad_norm": 3.5587462401606045, "learning_rate": 1.7813081154170907e-07, "loss": 1.0279, "step": 92725 }, { "epoch": 2.734188412206988, "grad_norm": 3.6056807904432935, "learning_rate": 1.7793495835224687e-07, "loss": 1.0516, "step": 92730 }, { "epoch": 2.7343358395989976, "grad_norm": 3.460599447400226, "learning_rate": 1.777392102748783e-07, "loss": 1.0093, "step": 92735 }, { "epoch": 2.734483266991007, "grad_norm": 3.3887441519217285, "learning_rate": 1.7754356731536247e-07, "loss": 1.0479, "step": 92740 }, { "epoch": 2.734630694383016, "grad_norm": 3.402018699315045, "learning_rate": 1.7734802947945605e-07, "loss": 1.0139, "step": 92745 }, { "epoch": 2.734778121775026, "grad_norm": 3.5738964399824784, "learning_rate": 1.7715259677291358e-07, "loss": 1.0562, "step": 92750 }, { "epoch": 2.734925549167035, "grad_norm": 3.53123855699905, "learning_rate": 1.769572692014859e-07, "loss": 1.0377, "step": 92755 }, { "epoch": 2.735072976559045, "grad_norm": 3.4888484119622682, "learning_rate": 1.767620467709209e-07, "loss": 1.0389, "step": 92760 }, { "epoch": 2.735220403951054, "grad_norm": 3.643169513977011, "learning_rate": 1.7656692948696194e-07, "loss": 1.0243, "step": 92765 }, { "epoch": 2.7353678313430634, "grad_norm": 3.5122350688816093, "learning_rate": 1.763719173553527e-07, "loss": 1.0954, "step": 92770 }, { "epoch": 2.735515258735073, "grad_norm": 3.449184877860631, "learning_rate": 1.7617701038182942e-07, "loss": 1.0214, "step": 92775 }, { "epoch": 2.7356626861270823, "grad_norm": 3.5854090146322237, "learning_rate": 1.7598220857212883e-07, "loss": 1.0331, "step": 92780 }, { "epoch": 2.7358101135190918, "grad_norm": 3.4413995396956545, "learning_rate": 1.7578751193198295e-07, "loss": 1.042, "step": 92785 }, { "epoch": 2.735957540911101, "grad_norm": 3.535287992211286, "learning_rate": 1.7559292046712054e-07, "loss": 1.0689, "step": 92790 }, { "epoch": 2.7361049683031107, "grad_norm": 3.5059468832124585, "learning_rate": 1.7539843418326832e-07, "loss": 1.0694, "step": 92795 }, { "epoch": 2.73625239569512, "grad_norm": 3.7122099287618258, "learning_rate": 1.7520405308614872e-07, "loss": 1.0542, "step": 92800 }, { "epoch": 2.7363998230871296, "grad_norm": 3.582316859750868, "learning_rate": 1.7500977718148265e-07, "loss": 1.0253, "step": 92805 }, { "epoch": 2.736547250479139, "grad_norm": 3.7001611038081066, "learning_rate": 1.7481560647498508e-07, "loss": 1.065, "step": 92810 }, { "epoch": 2.7366946778711485, "grad_norm": 3.5629888724694867, "learning_rate": 1.7462154097237102e-07, "loss": 1.0666, "step": 92815 }, { "epoch": 2.736842105263158, "grad_norm": 3.651924840674874, "learning_rate": 1.744275806793509e-07, "loss": 1.0735, "step": 92820 }, { "epoch": 2.7369895326551674, "grad_norm": 3.4326271540888817, "learning_rate": 1.7423372560163186e-07, "loss": 1.05, "step": 92825 }, { "epoch": 2.737136960047177, "grad_norm": 3.5811567518598832, "learning_rate": 1.7403997574491847e-07, "loss": 1.017, "step": 92830 }, { "epoch": 2.7372843874391863, "grad_norm": 3.52171216647252, "learning_rate": 1.7384633111491285e-07, "loss": 1.0548, "step": 92835 }, { "epoch": 2.737431814831196, "grad_norm": 3.4343041651204556, "learning_rate": 1.736527917173117e-07, "loss": 1.0298, "step": 92840 }, { "epoch": 2.7375792422232053, "grad_norm": 3.6086970673542296, "learning_rate": 1.7345935755781126e-07, "loss": 1.0877, "step": 92845 }, { "epoch": 2.7377266696152143, "grad_norm": 3.7335174816610714, "learning_rate": 1.7326602864210326e-07, "loss": 1.0315, "step": 92850 }, { "epoch": 2.737874097007224, "grad_norm": 3.4977276489089806, "learning_rate": 1.7307280497587646e-07, "loss": 1.0185, "step": 92855 }, { "epoch": 2.738021524399233, "grad_norm": 3.5873881257266143, "learning_rate": 1.7287968656481634e-07, "loss": 1.0424, "step": 92860 }, { "epoch": 2.738168951791243, "grad_norm": 3.4056346783063485, "learning_rate": 1.7268667341460666e-07, "loss": 1.067, "step": 92865 }, { "epoch": 2.738316379183252, "grad_norm": 3.4868309747949757, "learning_rate": 1.7249376553092666e-07, "loss": 1.0312, "step": 92870 }, { "epoch": 2.7384638065752616, "grad_norm": 3.419240321630369, "learning_rate": 1.7230096291945177e-07, "loss": 0.9971, "step": 92875 }, { "epoch": 2.738611233967271, "grad_norm": 3.4044185797855198, "learning_rate": 1.7210826558585746e-07, "loss": 1.0421, "step": 92880 }, { "epoch": 2.7387586613592805, "grad_norm": 3.4637099471686246, "learning_rate": 1.7191567353581252e-07, "loss": 1.0539, "step": 92885 }, { "epoch": 2.73890608875129, "grad_norm": 3.2460703155115462, "learning_rate": 1.7172318677498453e-07, "loss": 1.0114, "step": 92890 }, { "epoch": 2.7390535161432994, "grad_norm": 3.472865694030137, "learning_rate": 1.7153080530903768e-07, "loss": 1.0204, "step": 92895 }, { "epoch": 2.739200943535309, "grad_norm": 3.5417496483613777, "learning_rate": 1.7133852914363284e-07, "loss": 1.0499, "step": 92900 }, { "epoch": 2.7393483709273183, "grad_norm": 3.5638961319898894, "learning_rate": 1.7114635828442847e-07, "loss": 1.0295, "step": 92905 }, { "epoch": 2.7394957983193278, "grad_norm": 4.023573752229349, "learning_rate": 1.7095429273707956e-07, "loss": 1.0397, "step": 92910 }, { "epoch": 2.7396432257113372, "grad_norm": 3.522778098806641, "learning_rate": 1.7076233250723662e-07, "loss": 1.0365, "step": 92915 }, { "epoch": 2.7397906531033467, "grad_norm": 3.5383429724483513, "learning_rate": 1.705704776005497e-07, "loss": 1.0404, "step": 92920 }, { "epoch": 2.739938080495356, "grad_norm": 3.5592521689334022, "learning_rate": 1.7037872802266304e-07, "loss": 1.0455, "step": 92925 }, { "epoch": 2.7400855078873656, "grad_norm": 3.5256844347107936, "learning_rate": 1.7018708377922092e-07, "loss": 1.0447, "step": 92930 }, { "epoch": 2.740232935279375, "grad_norm": 3.510651163488448, "learning_rate": 1.6999554487586084e-07, "loss": 1.0357, "step": 92935 }, { "epoch": 2.7403803626713845, "grad_norm": 3.35012064633201, "learning_rate": 1.6980411131822e-07, "loss": 1.0245, "step": 92940 }, { "epoch": 2.7405277900633935, "grad_norm": 3.503172948728189, "learning_rate": 1.6961278311193139e-07, "loss": 1.0113, "step": 92945 }, { "epoch": 2.7406752174554034, "grad_norm": 3.415172896646007, "learning_rate": 1.6942156026262464e-07, "loss": 1.0441, "step": 92950 }, { "epoch": 2.7408226448474124, "grad_norm": 3.4991193972404693, "learning_rate": 1.692304427759274e-07, "loss": 1.0838, "step": 92955 }, { "epoch": 2.7409700722394224, "grad_norm": 3.4898881059879794, "learning_rate": 1.69039430657463e-07, "loss": 1.0526, "step": 92960 }, { "epoch": 2.7411174996314314, "grad_norm": 3.6852970892516486, "learning_rate": 1.6884852391285246e-07, "loss": 1.0588, "step": 92965 }, { "epoch": 2.741264927023441, "grad_norm": 3.5706361682314824, "learning_rate": 1.686577225477133e-07, "loss": 1.0442, "step": 92970 }, { "epoch": 2.7414123544154503, "grad_norm": 3.583638614832018, "learning_rate": 1.684670265676602e-07, "loss": 1.0165, "step": 92975 }, { "epoch": 2.7415597818074597, "grad_norm": 3.530639522547661, "learning_rate": 1.6827643597830456e-07, "loss": 1.0482, "step": 92980 }, { "epoch": 2.741707209199469, "grad_norm": 3.547384508441826, "learning_rate": 1.6808595078525515e-07, "loss": 1.0558, "step": 92985 }, { "epoch": 2.7418546365914787, "grad_norm": 3.4247609674289246, "learning_rate": 1.6789557099411547e-07, "loss": 0.9882, "step": 92990 }, { "epoch": 2.742002063983488, "grad_norm": 3.4554540986200646, "learning_rate": 1.6770529661049055e-07, "loss": 1.0335, "step": 92995 }, { "epoch": 2.7421494913754976, "grad_norm": 3.390741566246577, "learning_rate": 1.6751512763997681e-07, "loss": 1.0058, "step": 93000 }, { "epoch": 2.7421494913754976, "eval_loss": 1.07720148563385, "eval_runtime": 4.2906, "eval_samples_per_second": 92.295, "eval_steps_per_second": 3.03, "step": 93000 }, { "epoch": 2.742296918767507, "grad_norm": 3.520005744505191, "learning_rate": 1.673250640881714e-07, "loss": 1.0525, "step": 93005 }, { "epoch": 2.7424443461595165, "grad_norm": 3.4423163913817705, "learning_rate": 1.6713510596066697e-07, "loss": 1.0428, "step": 93010 }, { "epoch": 2.742591773551526, "grad_norm": 3.3854083458708364, "learning_rate": 1.6694525326305318e-07, "loss": 1.0638, "step": 93015 }, { "epoch": 2.7427392009435354, "grad_norm": 3.4531301861941484, "learning_rate": 1.6675550600091725e-07, "loss": 1.0398, "step": 93020 }, { "epoch": 2.742886628335545, "grad_norm": 3.6082878676131545, "learning_rate": 1.6656586417984094e-07, "loss": 1.0245, "step": 93025 }, { "epoch": 2.7430340557275543, "grad_norm": 3.4712774316078105, "learning_rate": 1.663763278054073e-07, "loss": 1.0547, "step": 93030 }, { "epoch": 2.743181483119564, "grad_norm": 3.5639184269682636, "learning_rate": 1.6618689688319143e-07, "loss": 1.0174, "step": 93035 }, { "epoch": 2.743328910511573, "grad_norm": 3.621228942194471, "learning_rate": 1.6599757141876891e-07, "loss": 1.0309, "step": 93040 }, { "epoch": 2.7434763379035827, "grad_norm": 3.6601252333436984, "learning_rate": 1.6580835141770985e-07, "loss": 1.028, "step": 93045 }, { "epoch": 2.7436237652955917, "grad_norm": 3.509946367354876, "learning_rate": 1.6561923688558314e-07, "loss": 1.0498, "step": 93050 }, { "epoch": 2.7437711926876016, "grad_norm": 3.547735618589601, "learning_rate": 1.6543022782795305e-07, "loss": 1.0092, "step": 93055 }, { "epoch": 2.7439186200796106, "grad_norm": 3.5654058179929344, "learning_rate": 1.6524132425038183e-07, "loss": 1.0627, "step": 93060 }, { "epoch": 2.74406604747162, "grad_norm": 3.453535597906672, "learning_rate": 1.6505252615842834e-07, "loss": 1.0805, "step": 93065 }, { "epoch": 2.7442134748636295, "grad_norm": 3.371757537171932, "learning_rate": 1.6486383355764773e-07, "loss": 1.0424, "step": 93070 }, { "epoch": 2.744360902255639, "grad_norm": 3.5952644343391946, "learning_rate": 1.646752464535922e-07, "loss": 1.0355, "step": 93075 }, { "epoch": 2.7445083296476485, "grad_norm": 3.5358559094765383, "learning_rate": 1.6448676485181154e-07, "loss": 1.027, "step": 93080 }, { "epoch": 2.744655757039658, "grad_norm": 3.6033937931366467, "learning_rate": 1.642983887578525e-07, "loss": 1.0734, "step": 93085 }, { "epoch": 2.7448031844316674, "grad_norm": 3.589150054079055, "learning_rate": 1.6411011817725738e-07, "loss": 1.0565, "step": 93090 }, { "epoch": 2.744950611823677, "grad_norm": 3.5437652646835587, "learning_rate": 1.6392195311556753e-07, "loss": 1.0302, "step": 93095 }, { "epoch": 2.7450980392156863, "grad_norm": 3.3964016186736066, "learning_rate": 1.6373389357831766e-07, "loss": 1.0331, "step": 93100 }, { "epoch": 2.7452454666076958, "grad_norm": 3.3249170544223055, "learning_rate": 1.6354593957104425e-07, "loss": 1.0449, "step": 93105 }, { "epoch": 2.745392893999705, "grad_norm": 3.5850467861232667, "learning_rate": 1.6335809109927656e-07, "loss": 1.0101, "step": 93110 }, { "epoch": 2.7455403213917147, "grad_norm": 3.519416384004641, "learning_rate": 1.6317034816854231e-07, "loss": 1.0738, "step": 93115 }, { "epoch": 2.745687748783724, "grad_norm": 3.5126399462092257, "learning_rate": 1.6298271078436623e-07, "loss": 1.0387, "step": 93120 }, { "epoch": 2.7458351761757336, "grad_norm": 3.580154623285168, "learning_rate": 1.627951789522701e-07, "loss": 1.0535, "step": 93125 }, { "epoch": 2.745982603567743, "grad_norm": 3.4452098744601756, "learning_rate": 1.626077526777725e-07, "loss": 1.0002, "step": 93130 }, { "epoch": 2.746130030959752, "grad_norm": 3.584864570941228, "learning_rate": 1.624204319663869e-07, "loss": 1.0388, "step": 93135 }, { "epoch": 2.746277458351762, "grad_norm": 3.514498718394439, "learning_rate": 1.6223321682362764e-07, "loss": 1.005, "step": 93140 }, { "epoch": 2.746424885743771, "grad_norm": 3.4758677586980338, "learning_rate": 1.620461072550028e-07, "loss": 1.0497, "step": 93145 }, { "epoch": 2.746572313135781, "grad_norm": 3.3527142372957095, "learning_rate": 1.6185910326601758e-07, "loss": 1.0335, "step": 93150 }, { "epoch": 2.74671974052779, "grad_norm": 3.523801966691784, "learning_rate": 1.616722048621759e-07, "loss": 1.0316, "step": 93155 }, { "epoch": 2.7468671679197993, "grad_norm": 3.5370069112223828, "learning_rate": 1.6148541204897712e-07, "loss": 1.0509, "step": 93160 }, { "epoch": 2.747014595311809, "grad_norm": 3.491576427586983, "learning_rate": 1.6129872483191727e-07, "loss": 1.023, "step": 93165 }, { "epoch": 2.7471620227038183, "grad_norm": 3.5427550049639756, "learning_rate": 1.611121432164911e-07, "loss": 1.0725, "step": 93170 }, { "epoch": 2.7473094500958277, "grad_norm": 3.3438973182167966, "learning_rate": 1.6092566720818712e-07, "loss": 1.0534, "step": 93175 }, { "epoch": 2.747456877487837, "grad_norm": 3.5590851846959657, "learning_rate": 1.6073929681249474e-07, "loss": 1.0569, "step": 93180 }, { "epoch": 2.7476043048798466, "grad_norm": 3.5728415952268087, "learning_rate": 1.605530320348962e-07, "loss": 1.0376, "step": 93185 }, { "epoch": 2.747751732271856, "grad_norm": 3.455987866777366, "learning_rate": 1.6036687288087422e-07, "loss": 1.0233, "step": 93190 }, { "epoch": 2.7478991596638656, "grad_norm": 3.649919007681306, "learning_rate": 1.6018081935590567e-07, "loss": 1.0482, "step": 93195 }, { "epoch": 2.748046587055875, "grad_norm": 3.5046067083898143, "learning_rate": 1.599948714654653e-07, "loss": 1.0129, "step": 93200 }, { "epoch": 2.7481940144478845, "grad_norm": 3.5781698561035244, "learning_rate": 1.5980902921502588e-07, "loss": 1.034, "step": 93205 }, { "epoch": 2.748341441839894, "grad_norm": 3.3492912855844907, "learning_rate": 1.5962329261005422e-07, "loss": 1.0249, "step": 93210 }, { "epoch": 2.7484888692319034, "grad_norm": 3.3985081458490347, "learning_rate": 1.5943766165601809e-07, "loss": 1.0308, "step": 93215 }, { "epoch": 2.748636296623913, "grad_norm": 3.6137176557472226, "learning_rate": 1.5925213635837807e-07, "loss": 1.0616, "step": 93220 }, { "epoch": 2.7487837240159223, "grad_norm": 3.611962600308486, "learning_rate": 1.590667167225944e-07, "loss": 1.0619, "step": 93225 }, { "epoch": 2.7489311514079313, "grad_norm": 3.585022202853099, "learning_rate": 1.5888140275412313e-07, "loss": 1.0699, "step": 93230 }, { "epoch": 2.749078578799941, "grad_norm": 3.5312875800856744, "learning_rate": 1.5869619445841698e-07, "loss": 1.0787, "step": 93235 }, { "epoch": 2.7492260061919502, "grad_norm": 3.5040771119774, "learning_rate": 1.5851109184092618e-07, "loss": 1.0192, "step": 93240 }, { "epoch": 2.74937343358396, "grad_norm": 3.4308114272476278, "learning_rate": 1.5832609490709762e-07, "loss": 1.0679, "step": 93245 }, { "epoch": 2.749520860975969, "grad_norm": 3.537078056780405, "learning_rate": 1.5814120366237445e-07, "loss": 1.0081, "step": 93250 }, { "epoch": 2.7496682883679786, "grad_norm": 3.604085369395918, "learning_rate": 1.5795641811219896e-07, "loss": 1.0222, "step": 93255 }, { "epoch": 2.749815715759988, "grad_norm": 3.5515572373524398, "learning_rate": 1.5777173826200682e-07, "loss": 1.0253, "step": 93260 }, { "epoch": 2.7499631431519975, "grad_norm": 3.4761699470911687, "learning_rate": 1.5758716411723328e-07, "loss": 1.0625, "step": 93265 }, { "epoch": 2.750110570544007, "grad_norm": 3.406890194099508, "learning_rate": 1.574026956833094e-07, "loss": 1.0465, "step": 93270 }, { "epoch": 2.7502579979360164, "grad_norm": 3.3452562146282148, "learning_rate": 1.5721833296566374e-07, "loss": 1.0158, "step": 93275 }, { "epoch": 2.750405425328026, "grad_norm": 3.35239144420869, "learning_rate": 1.5703407596972113e-07, "loss": 0.999, "step": 93280 }, { "epoch": 2.7505528527200354, "grad_norm": 3.5528786455934283, "learning_rate": 1.5684992470090308e-07, "loss": 1.0429, "step": 93285 }, { "epoch": 2.750700280112045, "grad_norm": 3.579177694074346, "learning_rate": 1.5666587916462985e-07, "loss": 1.0389, "step": 93290 }, { "epoch": 2.7508477075040543, "grad_norm": 3.52419594208422, "learning_rate": 1.5648193936631584e-07, "loss": 1.0055, "step": 93295 }, { "epoch": 2.7509951348960637, "grad_norm": 3.37992708098679, "learning_rate": 1.5629810531137378e-07, "loss": 1.0533, "step": 93300 }, { "epoch": 2.751142562288073, "grad_norm": 3.336412538695273, "learning_rate": 1.5611437700521397e-07, "loss": 1.0372, "step": 93305 }, { "epoch": 2.7512899896800826, "grad_norm": 3.42754985261227, "learning_rate": 1.5593075445324207e-07, "loss": 1.0142, "step": 93310 }, { "epoch": 2.751437417072092, "grad_norm": 3.6656131225366404, "learning_rate": 1.5574723766086208e-07, "loss": 1.0185, "step": 93315 }, { "epoch": 2.7515848444641016, "grad_norm": 3.6587531943692158, "learning_rate": 1.5556382663347342e-07, "loss": 1.0428, "step": 93320 }, { "epoch": 2.751732271856111, "grad_norm": 3.5900346105367507, "learning_rate": 1.5538052137647432e-07, "loss": 1.0418, "step": 93325 }, { "epoch": 2.7518796992481205, "grad_norm": 3.4365423570690776, "learning_rate": 1.551973218952575e-07, "loss": 1.0315, "step": 93330 }, { "epoch": 2.7520271266401295, "grad_norm": 3.4013390862464594, "learning_rate": 1.550142281952145e-07, "loss": 1.0514, "step": 93335 }, { "epoch": 2.7521745540321394, "grad_norm": 3.3909017884774917, "learning_rate": 1.548312402817327e-07, "loss": 1.0378, "step": 93340 }, { "epoch": 2.7523219814241484, "grad_norm": 3.570895175985599, "learning_rate": 1.5464835816019693e-07, "loss": 1.0032, "step": 93345 }, { "epoch": 2.7524694088161583, "grad_norm": 3.50536420236681, "learning_rate": 1.544655818359883e-07, "loss": 0.9923, "step": 93350 }, { "epoch": 2.7526168362081673, "grad_norm": 3.5508015091321306, "learning_rate": 1.5428291131448668e-07, "loss": 1.0253, "step": 93355 }, { "epoch": 2.752764263600177, "grad_norm": 3.5997568963367894, "learning_rate": 1.5410034660106487e-07, "loss": 0.9918, "step": 93360 }, { "epoch": 2.7529116909921862, "grad_norm": 3.630143545022606, "learning_rate": 1.5391788770109772e-07, "loss": 1.0179, "step": 93365 }, { "epoch": 2.7530591183841957, "grad_norm": 3.49697184588656, "learning_rate": 1.5373553461995217e-07, "loss": 1.0412, "step": 93370 }, { "epoch": 2.753206545776205, "grad_norm": 3.6401214109726556, "learning_rate": 1.5355328736299524e-07, "loss": 1.0264, "step": 93375 }, { "epoch": 2.7533539731682146, "grad_norm": 3.3497774322576994, "learning_rate": 1.5337114593558966e-07, "loss": 1.0325, "step": 93380 }, { "epoch": 2.753501400560224, "grad_norm": 3.4829058272437874, "learning_rate": 1.5318911034309492e-07, "loss": 1.0493, "step": 93385 }, { "epoch": 2.7536488279522335, "grad_norm": 3.4264117462763153, "learning_rate": 1.5300718059086796e-07, "loss": 1.0431, "step": 93390 }, { "epoch": 2.753796255344243, "grad_norm": 3.453387258581825, "learning_rate": 1.5282535668426163e-07, "loss": 1.0115, "step": 93395 }, { "epoch": 2.7539436827362525, "grad_norm": 3.8041785760946154, "learning_rate": 1.5264363862862742e-07, "loss": 1.0765, "step": 93400 }, { "epoch": 2.754091110128262, "grad_norm": 3.523020136929309, "learning_rate": 1.5246202642931113e-07, "loss": 1.0321, "step": 93405 }, { "epoch": 2.7542385375202714, "grad_norm": 3.533386552347946, "learning_rate": 1.5228052009165798e-07, "loss": 1.0681, "step": 93410 }, { "epoch": 2.754385964912281, "grad_norm": 3.574168580713541, "learning_rate": 1.5209911962100877e-07, "loss": 0.9958, "step": 93415 }, { "epoch": 2.7545333923042903, "grad_norm": 3.5690261114824513, "learning_rate": 1.519178250227013e-07, "loss": 1.0091, "step": 93420 }, { "epoch": 2.7546808196962997, "grad_norm": 3.657406485357942, "learning_rate": 1.5173663630207e-07, "loss": 1.0545, "step": 93425 }, { "epoch": 2.7548282470883088, "grad_norm": 3.6471256505457634, "learning_rate": 1.5155555346444778e-07, "loss": 1.0611, "step": 93430 }, { "epoch": 2.7549756744803187, "grad_norm": 3.3682895515128113, "learning_rate": 1.5137457651516154e-07, "loss": 1.035, "step": 93435 }, { "epoch": 2.7551231018723277, "grad_norm": 3.796399075048437, "learning_rate": 1.511937054595383e-07, "loss": 1.0772, "step": 93440 }, { "epoch": 2.7552705292643376, "grad_norm": 3.6300604002753394, "learning_rate": 1.5101294030289882e-07, "loss": 1.0479, "step": 93445 }, { "epoch": 2.7554179566563466, "grad_norm": 3.519019264631119, "learning_rate": 1.5083228105056426e-07, "loss": 1.0535, "step": 93450 }, { "epoch": 2.755565384048356, "grad_norm": 3.499333641757158, "learning_rate": 1.506517277078491e-07, "loss": 1.0681, "step": 93455 }, { "epoch": 2.7557128114403655, "grad_norm": 3.6096171942453523, "learning_rate": 1.504712802800666e-07, "loss": 1.0615, "step": 93460 }, { "epoch": 2.755860238832375, "grad_norm": 3.522580091668315, "learning_rate": 1.5029093877252792e-07, "loss": 0.9991, "step": 93465 }, { "epoch": 2.7560076662243844, "grad_norm": 3.3886164491975284, "learning_rate": 1.5011070319053714e-07, "loss": 1.0278, "step": 93470 }, { "epoch": 2.756155093616394, "grad_norm": 3.514406822442316, "learning_rate": 1.499305735394013e-07, "loss": 1.0139, "step": 93475 }, { "epoch": 2.7563025210084033, "grad_norm": 3.7939305461179273, "learning_rate": 1.4975054982441823e-07, "loss": 1.0583, "step": 93480 }, { "epoch": 2.756449948400413, "grad_norm": 3.4972542692466235, "learning_rate": 1.495706320508862e-07, "loss": 1.0138, "step": 93485 }, { "epoch": 2.7565973757924223, "grad_norm": 3.567675000448873, "learning_rate": 1.493908202240997e-07, "loss": 1.0957, "step": 93490 }, { "epoch": 2.7567448031844317, "grad_norm": 3.493534669395583, "learning_rate": 1.492111143493499e-07, "loss": 0.9923, "step": 93495 }, { "epoch": 2.756892230576441, "grad_norm": 3.3591901542567486, "learning_rate": 1.4903151443192472e-07, "loss": 1.061, "step": 93500 }, { "epoch": 2.756892230576441, "eval_loss": 1.076792597770691, "eval_runtime": 4.1774, "eval_samples_per_second": 94.796, "eval_steps_per_second": 3.112, "step": 93500 }, { "epoch": 2.7570396579684506, "grad_norm": 3.515249879842881, "learning_rate": 1.4885202047710947e-07, "loss": 1.0401, "step": 93505 }, { "epoch": 2.75718708536046, "grad_norm": 3.4000764509976804, "learning_rate": 1.4867263249018491e-07, "loss": 1.0422, "step": 93510 }, { "epoch": 2.7573345127524695, "grad_norm": 3.478598713426413, "learning_rate": 1.4849335047643102e-07, "loss": 1.0429, "step": 93515 }, { "epoch": 2.757481940144479, "grad_norm": 3.479851424723053, "learning_rate": 1.4831417444112233e-07, "loss": 1.0428, "step": 93520 }, { "epoch": 2.757629367536488, "grad_norm": 3.536182353281019, "learning_rate": 1.4813510438953209e-07, "loss": 1.0122, "step": 93525 }, { "epoch": 2.757776794928498, "grad_norm": 3.374417543082539, "learning_rate": 1.4795614032692903e-07, "loss": 0.998, "step": 93530 }, { "epoch": 2.757924222320507, "grad_norm": 3.2714351305547744, "learning_rate": 1.4777728225857976e-07, "loss": 1.0071, "step": 93535 }, { "epoch": 2.758071649712517, "grad_norm": 3.5235229365122467, "learning_rate": 1.4759853018974757e-07, "loss": 1.0403, "step": 93540 }, { "epoch": 2.758219077104526, "grad_norm": 3.6406191193454065, "learning_rate": 1.4741988412569157e-07, "loss": 1.0534, "step": 93545 }, { "epoch": 2.7583665044965353, "grad_norm": 3.608370185781136, "learning_rate": 1.4724134407166967e-07, "loss": 1.0583, "step": 93550 }, { "epoch": 2.7585139318885448, "grad_norm": 3.5579406511763625, "learning_rate": 1.4706291003293472e-07, "loss": 1.0542, "step": 93555 }, { "epoch": 2.7586613592805542, "grad_norm": 3.4644088058144655, "learning_rate": 1.4688458201473798e-07, "loss": 1.0282, "step": 93560 }, { "epoch": 2.7588087866725637, "grad_norm": 3.3436520012336683, "learning_rate": 1.4670636002232686e-07, "loss": 1.0421, "step": 93565 }, { "epoch": 2.758956214064573, "grad_norm": 3.4920979787737307, "learning_rate": 1.4652824406094557e-07, "loss": 1.017, "step": 93570 }, { "epoch": 2.7591036414565826, "grad_norm": 3.578396539119189, "learning_rate": 1.4635023413583527e-07, "loss": 1.0667, "step": 93575 }, { "epoch": 2.759251068848592, "grad_norm": 3.647013242431879, "learning_rate": 1.461723302522343e-07, "loss": 1.0867, "step": 93580 }, { "epoch": 2.7593984962406015, "grad_norm": 3.5376260971875837, "learning_rate": 1.4599453241537802e-07, "loss": 1.0183, "step": 93585 }, { "epoch": 2.759545923632611, "grad_norm": 3.3800931668094347, "learning_rate": 1.4581684063049773e-07, "loss": 1.0195, "step": 93590 }, { "epoch": 2.7596933510246204, "grad_norm": 3.4242390423295728, "learning_rate": 1.456392549028221e-07, "loss": 1.0649, "step": 93595 }, { "epoch": 2.75984077841663, "grad_norm": 3.546433132808597, "learning_rate": 1.454617752375774e-07, "loss": 1.0798, "step": 93600 }, { "epoch": 2.7599882058086393, "grad_norm": 3.592610044633786, "learning_rate": 1.452844016399861e-07, "loss": 1.0544, "step": 93605 }, { "epoch": 2.760135633200649, "grad_norm": 3.474020583743702, "learning_rate": 1.4510713411526696e-07, "loss": 1.0453, "step": 93610 }, { "epoch": 2.7602830605926583, "grad_norm": 3.537839813313158, "learning_rate": 1.4492997266863748e-07, "loss": 1.0041, "step": 93615 }, { "epoch": 2.7604304879846673, "grad_norm": 3.4461459199477855, "learning_rate": 1.4475291730530928e-07, "loss": 1.0309, "step": 93620 }, { "epoch": 2.760577915376677, "grad_norm": 3.441104641158645, "learning_rate": 1.4457596803049409e-07, "loss": 1.0346, "step": 93625 }, { "epoch": 2.760725342768686, "grad_norm": 3.5230932746734753, "learning_rate": 1.4439912484939728e-07, "loss": 1.0296, "step": 93630 }, { "epoch": 2.760872770160696, "grad_norm": 3.59017844616813, "learning_rate": 1.4422238776722384e-07, "loss": 1.0505, "step": 93635 }, { "epoch": 2.761020197552705, "grad_norm": 3.5209278250763827, "learning_rate": 1.4404575678917383e-07, "loss": 1.0477, "step": 93640 }, { "epoch": 2.7611676249447146, "grad_norm": 3.549810488403268, "learning_rate": 1.4386923192044473e-07, "loss": 1.0168, "step": 93645 }, { "epoch": 2.761315052336724, "grad_norm": 3.5353934758231063, "learning_rate": 1.43692813166232e-07, "loss": 1.0436, "step": 93650 }, { "epoch": 2.7614624797287335, "grad_norm": 3.5134274610696714, "learning_rate": 1.4351650053172516e-07, "loss": 1.0582, "step": 93655 }, { "epoch": 2.761609907120743, "grad_norm": 3.7316604334640315, "learning_rate": 1.433402940221147e-07, "loss": 1.0377, "step": 93660 }, { "epoch": 2.7617573345127524, "grad_norm": 3.3778387693413743, "learning_rate": 1.4316419364258356e-07, "loss": 1.01, "step": 93665 }, { "epoch": 2.761904761904762, "grad_norm": 3.583539648403351, "learning_rate": 1.429881993983151e-07, "loss": 1.0535, "step": 93670 }, { "epoch": 2.7620521892967713, "grad_norm": 3.42983278780258, "learning_rate": 1.4281231129448722e-07, "loss": 1.0404, "step": 93675 }, { "epoch": 2.762199616688781, "grad_norm": 3.5905087837768437, "learning_rate": 1.4263652933627623e-07, "loss": 1.0013, "step": 93680 }, { "epoch": 2.7623470440807902, "grad_norm": 3.35868603876029, "learning_rate": 1.4246085352885422e-07, "loss": 1.0333, "step": 93685 }, { "epoch": 2.7624944714727997, "grad_norm": 3.4767852632000817, "learning_rate": 1.4228528387739206e-07, "loss": 1.0313, "step": 93690 }, { "epoch": 2.762641898864809, "grad_norm": 3.3952809769035293, "learning_rate": 1.4210982038705356e-07, "loss": 1.0014, "step": 93695 }, { "epoch": 2.7627893262568186, "grad_norm": 3.6068177357299054, "learning_rate": 1.4193446306300455e-07, "loss": 1.1011, "step": 93700 }, { "epoch": 2.762936753648828, "grad_norm": 3.5691168888289533, "learning_rate": 1.4175921191040302e-07, "loss": 1.0227, "step": 93705 }, { "epoch": 2.7630841810408375, "grad_norm": 3.67599081065219, "learning_rate": 1.4158406693440734e-07, "loss": 1.0637, "step": 93710 }, { "epoch": 2.763231608432847, "grad_norm": 3.572106569963471, "learning_rate": 1.4140902814017086e-07, "loss": 1.034, "step": 93715 }, { "epoch": 2.7633790358248564, "grad_norm": 3.4707243416301115, "learning_rate": 1.4123409553284408e-07, "loss": 1.0013, "step": 93720 }, { "epoch": 2.7635264632168655, "grad_norm": 3.7085182866943627, "learning_rate": 1.4105926911757579e-07, "loss": 1.0652, "step": 93725 }, { "epoch": 2.7636738906088754, "grad_norm": 3.6454008445761117, "learning_rate": 1.4088454889950808e-07, "loss": 1.0386, "step": 93730 }, { "epoch": 2.7638213180008844, "grad_norm": 3.475161136447749, "learning_rate": 1.407099348837844e-07, "loss": 1.0188, "step": 93735 }, { "epoch": 2.763968745392894, "grad_norm": 3.475840608207796, "learning_rate": 1.4053542707554226e-07, "loss": 1.0154, "step": 93740 }, { "epoch": 2.7641161727849033, "grad_norm": 3.6897136273611664, "learning_rate": 1.4036102547991675e-07, "loss": 1.0338, "step": 93745 }, { "epoch": 2.7642636001769127, "grad_norm": 3.4658227727405224, "learning_rate": 1.4018673010203958e-07, "loss": 1.0698, "step": 93750 }, { "epoch": 2.764411027568922, "grad_norm": 3.2828250460067823, "learning_rate": 1.4001254094704e-07, "loss": 1.0134, "step": 93755 }, { "epoch": 2.7645584549609317, "grad_norm": 3.5565117897467737, "learning_rate": 1.3983845802004348e-07, "loss": 1.0852, "step": 93760 }, { "epoch": 2.764705882352941, "grad_norm": 3.525715253860948, "learning_rate": 1.396644813261734e-07, "loss": 1.0167, "step": 93765 }, { "epoch": 2.7648533097449506, "grad_norm": 3.6367167606328255, "learning_rate": 1.3949061087054738e-07, "loss": 1.0457, "step": 93770 }, { "epoch": 2.76500073713696, "grad_norm": 3.537560527198942, "learning_rate": 1.3931684665828378e-07, "loss": 1.0656, "step": 93775 }, { "epoch": 2.7651481645289695, "grad_norm": 3.5908996979251016, "learning_rate": 1.391431886944948e-07, "loss": 1.045, "step": 93780 }, { "epoch": 2.765295591920979, "grad_norm": 3.53426582278725, "learning_rate": 1.3896963698429007e-07, "loss": 1.0298, "step": 93785 }, { "epoch": 2.7654430193129884, "grad_norm": 3.438691882314522, "learning_rate": 1.387961915327776e-07, "loss": 1.0741, "step": 93790 }, { "epoch": 2.765590446704998, "grad_norm": 3.6602751573408083, "learning_rate": 1.3862285234506084e-07, "loss": 1.0233, "step": 93795 }, { "epoch": 2.7657378740970073, "grad_norm": 3.557520343263557, "learning_rate": 1.3844961942624066e-07, "loss": 1.0534, "step": 93800 }, { "epoch": 2.765885301489017, "grad_norm": 3.459957389915352, "learning_rate": 1.382764927814134e-07, "loss": 1.0112, "step": 93805 }, { "epoch": 2.7660327288810262, "grad_norm": 3.4732664650187566, "learning_rate": 1.3810347241567546e-07, "loss": 1.0336, "step": 93810 }, { "epoch": 2.7661801562730357, "grad_norm": 3.404686782201433, "learning_rate": 1.3793055833411686e-07, "loss": 1.0317, "step": 93815 }, { "epoch": 2.7663275836650447, "grad_norm": 3.616642215098465, "learning_rate": 1.3775775054182607e-07, "loss": 1.0618, "step": 93820 }, { "epoch": 2.7664750110570546, "grad_norm": 3.512560552758406, "learning_rate": 1.3758504904388818e-07, "loss": 1.0803, "step": 93825 }, { "epoch": 2.7666224384490636, "grad_norm": 3.42511766442409, "learning_rate": 1.374124538453854e-07, "loss": 1.0556, "step": 93830 }, { "epoch": 2.7667698658410735, "grad_norm": 3.5780042706060127, "learning_rate": 1.3723996495139612e-07, "loss": 1.0557, "step": 93835 }, { "epoch": 2.7669172932330826, "grad_norm": 3.439479429418921, "learning_rate": 1.370675823669959e-07, "loss": 1.0314, "step": 93840 }, { "epoch": 2.767064720625092, "grad_norm": 3.5662173281819243, "learning_rate": 1.3689530609725858e-07, "loss": 1.0393, "step": 93845 }, { "epoch": 2.7672121480171015, "grad_norm": 3.4701315321654214, "learning_rate": 1.367231361472518e-07, "loss": 1.022, "step": 93850 }, { "epoch": 2.767359575409111, "grad_norm": 3.469392008516489, "learning_rate": 1.3655107252204272e-07, "loss": 1.0449, "step": 93855 }, { "epoch": 2.7675070028011204, "grad_norm": 3.517139356939242, "learning_rate": 1.363791152266948e-07, "loss": 1.0614, "step": 93860 }, { "epoch": 2.76765443019313, "grad_norm": 3.606272919973111, "learning_rate": 1.3620726426626735e-07, "loss": 1.0577, "step": 93865 }, { "epoch": 2.7678018575851393, "grad_norm": 3.5088432758870445, "learning_rate": 1.3603551964581753e-07, "loss": 1.0376, "step": 93870 }, { "epoch": 2.7679492849771488, "grad_norm": 3.438375546382211, "learning_rate": 1.358638813704001e-07, "loss": 1.0281, "step": 93875 }, { "epoch": 2.768096712369158, "grad_norm": 3.425110407913499, "learning_rate": 1.356923494450639e-07, "loss": 1.0326, "step": 93880 }, { "epoch": 2.7682441397611677, "grad_norm": 3.6714097010546127, "learning_rate": 1.3552092387485824e-07, "loss": 1.0692, "step": 93885 }, { "epoch": 2.768391567153177, "grad_norm": 3.4615280164875504, "learning_rate": 1.3534960466482615e-07, "loss": 1.0448, "step": 93890 }, { "epoch": 2.7685389945451866, "grad_norm": 3.48692688373152, "learning_rate": 1.3517839182000988e-07, "loss": 0.9845, "step": 93895 }, { "epoch": 2.768686421937196, "grad_norm": 3.392228435957883, "learning_rate": 1.3500728534544663e-07, "loss": 0.9879, "step": 93900 }, { "epoch": 2.7688338493292055, "grad_norm": 3.699127955478102, "learning_rate": 1.3483628524617195e-07, "loss": 1.0277, "step": 93905 }, { "epoch": 2.768981276721215, "grad_norm": 3.378865881261118, "learning_rate": 1.346653915272185e-07, "loss": 1.0533, "step": 93910 }, { "epoch": 2.769128704113224, "grad_norm": 3.6556901127470622, "learning_rate": 1.3449460419361305e-07, "loss": 1.0644, "step": 93915 }, { "epoch": 2.769276131505234, "grad_norm": 3.613286345316927, "learning_rate": 1.343239232503829e-07, "loss": 1.0905, "step": 93920 }, { "epoch": 2.769423558897243, "grad_norm": 3.457165304313267, "learning_rate": 1.3415334870254981e-07, "loss": 1.0514, "step": 93925 }, { "epoch": 2.769570986289253, "grad_norm": 3.572354155092636, "learning_rate": 1.339828805551335e-07, "loss": 1.0468, "step": 93930 }, { "epoch": 2.769718413681262, "grad_norm": 3.388409160996414, "learning_rate": 1.3381251881314962e-07, "loss": 1.0368, "step": 93935 }, { "epoch": 2.7698658410732713, "grad_norm": 3.732286962664087, "learning_rate": 1.33642263481612e-07, "loss": 1.0192, "step": 93940 }, { "epoch": 2.7700132684652807, "grad_norm": 3.484682405968129, "learning_rate": 1.3347211456553003e-07, "loss": 1.0972, "step": 93945 }, { "epoch": 2.77016069585729, "grad_norm": 3.5103856585215363, "learning_rate": 1.333020720699113e-07, "loss": 1.0244, "step": 93950 }, { "epoch": 2.7703081232492996, "grad_norm": 3.485853036485851, "learning_rate": 1.331321359997577e-07, "loss": 1.0462, "step": 93955 }, { "epoch": 2.770455550641309, "grad_norm": 3.8118311422249085, "learning_rate": 1.3296230636007224e-07, "loss": 1.0259, "step": 93960 }, { "epoch": 2.7706029780333186, "grad_norm": 3.5759138442761693, "learning_rate": 1.3279258315585018e-07, "loss": 1.0526, "step": 93965 }, { "epoch": 2.770750405425328, "grad_norm": 3.4420101809157573, "learning_rate": 1.326229663920879e-07, "loss": 1.0017, "step": 93970 }, { "epoch": 2.7708978328173375, "grad_norm": 3.462555258970908, "learning_rate": 1.3245345607377473e-07, "loss": 1.039, "step": 93975 }, { "epoch": 2.771045260209347, "grad_norm": 3.5877788743368333, "learning_rate": 1.322840522058992e-07, "loss": 1.0354, "step": 93980 }, { "epoch": 2.7711926876013564, "grad_norm": 3.4298858950246904, "learning_rate": 1.321147547934473e-07, "loss": 1.0199, "step": 93985 }, { "epoch": 2.771340114993366, "grad_norm": 3.7294640317595302, "learning_rate": 1.3194556384139884e-07, "loss": 1.075, "step": 93990 }, { "epoch": 2.7714875423853753, "grad_norm": 3.606669276122207, "learning_rate": 1.3177647935473435e-07, "loss": 1.0158, "step": 93995 }, { "epoch": 2.7716349697773848, "grad_norm": 3.4591577148436716, "learning_rate": 1.3160750133842824e-07, "loss": 1.0237, "step": 94000 }, { "epoch": 2.7716349697773848, "eval_loss": 1.0767321586608887, "eval_runtime": 5.1414, "eval_samples_per_second": 77.022, "eval_steps_per_second": 2.528, "step": 94000 }, { "epoch": 2.7717823971693942, "grad_norm": 3.5849892829956795, "learning_rate": 1.314386297974532e-07, "loss": 1.0716, "step": 94005 }, { "epoch": 2.7719298245614032, "grad_norm": 3.540542710045314, "learning_rate": 1.3126986473677813e-07, "loss": 1.0567, "step": 94010 }, { "epoch": 2.772077251953413, "grad_norm": 3.6099714830169702, "learning_rate": 1.3110120616136992e-07, "loss": 1.077, "step": 94015 }, { "epoch": 2.772224679345422, "grad_norm": 3.510731547464173, "learning_rate": 1.3093265407619084e-07, "loss": 1.0222, "step": 94020 }, { "epoch": 2.772372106737432, "grad_norm": 3.471514294467056, "learning_rate": 1.3076420848620152e-07, "loss": 1.0315, "step": 94025 }, { "epoch": 2.772519534129441, "grad_norm": 3.4288729337271486, "learning_rate": 1.3059586939635672e-07, "loss": 1.0481, "step": 94030 }, { "epoch": 2.7726669615214505, "grad_norm": 3.3713813588619828, "learning_rate": 1.3042763681161293e-07, "loss": 1.0218, "step": 94035 }, { "epoch": 2.77281438891346, "grad_norm": 3.544137474758694, "learning_rate": 1.3025951073691824e-07, "loss": 1.051, "step": 94040 }, { "epoch": 2.7729618163054695, "grad_norm": 3.515061816595191, "learning_rate": 1.300914911772208e-07, "loss": 1.0485, "step": 94045 }, { "epoch": 2.773109243697479, "grad_norm": 3.474511265705458, "learning_rate": 1.2992357813746455e-07, "loss": 1.0434, "step": 94050 }, { "epoch": 2.7732566710894884, "grad_norm": 3.630903436085025, "learning_rate": 1.2975577162259098e-07, "loss": 1.0299, "step": 94055 }, { "epoch": 2.773404098481498, "grad_norm": 3.494892167506892, "learning_rate": 1.295880716375382e-07, "loss": 1.1001, "step": 94060 }, { "epoch": 2.7735515258735073, "grad_norm": 3.550197141047496, "learning_rate": 1.2942047818723977e-07, "loss": 1.0287, "step": 94065 }, { "epoch": 2.7736989532655167, "grad_norm": 3.6593055791952502, "learning_rate": 1.292529912766284e-07, "loss": 1.0458, "step": 94070 }, { "epoch": 2.773846380657526, "grad_norm": 3.675805442134793, "learning_rate": 1.2908561091063227e-07, "loss": 1.0068, "step": 94075 }, { "epoch": 2.7739938080495357, "grad_norm": 3.5173810844897044, "learning_rate": 1.2891833709417614e-07, "loss": 1.0731, "step": 94080 }, { "epoch": 2.774141235441545, "grad_norm": 3.542542902659565, "learning_rate": 1.2875116983218318e-07, "loss": 1.0908, "step": 94085 }, { "epoch": 2.7742886628335546, "grad_norm": 3.535087558326168, "learning_rate": 1.2858410912957196e-07, "loss": 1.0318, "step": 94090 }, { "epoch": 2.774436090225564, "grad_norm": 3.5746801794106196, "learning_rate": 1.2841715499125855e-07, "loss": 1.0311, "step": 94095 }, { "epoch": 2.7745835176175735, "grad_norm": 3.696569020094077, "learning_rate": 1.282503074221561e-07, "loss": 1.0526, "step": 94100 }, { "epoch": 2.7747309450095825, "grad_norm": 3.3662841316718564, "learning_rate": 1.28083566427174e-07, "loss": 1.0444, "step": 94105 }, { "epoch": 2.7748783724015924, "grad_norm": 3.441901759386502, "learning_rate": 1.2791693201121835e-07, "loss": 1.0224, "step": 94110 }, { "epoch": 2.7750257997936014, "grad_norm": 3.613056708448894, "learning_rate": 1.2775040417919313e-07, "loss": 1.0409, "step": 94115 }, { "epoch": 2.7751732271856113, "grad_norm": 3.3810330920631566, "learning_rate": 1.2758398293599817e-07, "loss": 0.9952, "step": 94120 }, { "epoch": 2.7753206545776203, "grad_norm": 3.6285094340338144, "learning_rate": 1.274176682865312e-07, "loss": 1.0302, "step": 94125 }, { "epoch": 2.77546808196963, "grad_norm": 3.4785856022864716, "learning_rate": 1.2725146023568584e-07, "loss": 1.0155, "step": 94130 }, { "epoch": 2.7756155093616393, "grad_norm": 3.4916701522582034, "learning_rate": 1.2708535878835318e-07, "loss": 1.0639, "step": 94135 }, { "epoch": 2.7757629367536487, "grad_norm": 3.3854903188839, "learning_rate": 1.269193639494201e-07, "loss": 1.027, "step": 94140 }, { "epoch": 2.775910364145658, "grad_norm": 3.5565130716917204, "learning_rate": 1.2675347572377274e-07, "loss": 1.0336, "step": 94145 }, { "epoch": 2.7760577915376676, "grad_norm": 3.4599292064656217, "learning_rate": 1.2658769411629135e-07, "loss": 1.0369, "step": 94150 }, { "epoch": 2.776205218929677, "grad_norm": 3.4339698316162774, "learning_rate": 1.2642201913185408e-07, "loss": 1.0331, "step": 94155 }, { "epoch": 2.7763526463216865, "grad_norm": 3.5521041817449674, "learning_rate": 1.2625645077533707e-07, "loss": 1.0209, "step": 94160 }, { "epoch": 2.776500073713696, "grad_norm": 3.3909293206525106, "learning_rate": 1.260909890516118e-07, "loss": 1.0464, "step": 94165 }, { "epoch": 2.7766475011057055, "grad_norm": 3.4376968857896086, "learning_rate": 1.2592563396554734e-07, "loss": 1.035, "step": 94170 }, { "epoch": 2.776794928497715, "grad_norm": 3.4408802039375526, "learning_rate": 1.2576038552200894e-07, "loss": 1.0839, "step": 94175 }, { "epoch": 2.7769423558897244, "grad_norm": 3.539947847290783, "learning_rate": 1.2559524372586021e-07, "loss": 1.0455, "step": 94180 }, { "epoch": 2.777089783281734, "grad_norm": 3.472931851493925, "learning_rate": 1.2543020858195975e-07, "loss": 1.0661, "step": 94185 }, { "epoch": 2.7772372106737433, "grad_norm": 3.487715881716744, "learning_rate": 1.2526528009516454e-07, "loss": 0.9973, "step": 94190 }, { "epoch": 2.7773846380657528, "grad_norm": 3.344856329094235, "learning_rate": 1.251004582703269e-07, "loss": 1.0714, "step": 94195 }, { "epoch": 2.777532065457762, "grad_norm": 3.4320743121872517, "learning_rate": 1.2493574311229798e-07, "loss": 1.0087, "step": 94200 }, { "epoch": 2.7776794928497717, "grad_norm": 3.4517520543481024, "learning_rate": 1.2477113462592393e-07, "loss": 1.0356, "step": 94205 }, { "epoch": 2.7778269202417807, "grad_norm": 3.42565517137209, "learning_rate": 1.2460663281604918e-07, "loss": 1.0223, "step": 94210 }, { "epoch": 2.7779743476337906, "grad_norm": 3.426952502142694, "learning_rate": 1.2444223768751317e-07, "loss": 1.0786, "step": 94215 }, { "epoch": 2.7781217750257996, "grad_norm": 3.462658340818515, "learning_rate": 1.242779492451554e-07, "loss": 1.0131, "step": 94220 }, { "epoch": 2.7782692024178095, "grad_norm": 3.2587274770782955, "learning_rate": 1.2411376749380824e-07, "loss": 1.0143, "step": 94225 }, { "epoch": 2.7784166298098185, "grad_norm": 3.56020149359179, "learning_rate": 1.2394969243830447e-07, "loss": 1.0432, "step": 94230 }, { "epoch": 2.778564057201828, "grad_norm": 3.602256748725425, "learning_rate": 1.237857240834711e-07, "loss": 1.0284, "step": 94235 }, { "epoch": 2.7787114845938374, "grad_norm": 3.5388721639175844, "learning_rate": 1.2362186243413342e-07, "loss": 1.0291, "step": 94240 }, { "epoch": 2.778858911985847, "grad_norm": 3.4303749598168674, "learning_rate": 1.2345810749511382e-07, "loss": 1.0353, "step": 94245 }, { "epoch": 2.7790063393778563, "grad_norm": 3.368719428439307, "learning_rate": 1.232944592712297e-07, "loss": 0.9825, "step": 94250 }, { "epoch": 2.779153766769866, "grad_norm": 3.3763158268149813, "learning_rate": 1.2313091776729804e-07, "loss": 1.0113, "step": 94255 }, { "epoch": 2.7793011941618753, "grad_norm": 3.4451566463210304, "learning_rate": 1.229674829881304e-07, "loss": 1.0333, "step": 94260 }, { "epoch": 2.7794486215538847, "grad_norm": 3.6051344003918286, "learning_rate": 1.2280415493853627e-07, "loss": 1.062, "step": 94265 }, { "epoch": 2.779596048945894, "grad_norm": 3.42965023827595, "learning_rate": 1.226409336233214e-07, "loss": 1.0443, "step": 94270 }, { "epoch": 2.7797434763379036, "grad_norm": 3.4935584223167053, "learning_rate": 1.2247781904728945e-07, "loss": 1.0233, "step": 94275 }, { "epoch": 2.779890903729913, "grad_norm": 3.6423406215629797, "learning_rate": 1.2231481121523946e-07, "loss": 1.0716, "step": 94280 }, { "epoch": 2.7800383311219226, "grad_norm": 3.523397292971956, "learning_rate": 1.221519101319693e-07, "loss": 1.0576, "step": 94285 }, { "epoch": 2.780185758513932, "grad_norm": 3.3491145259468142, "learning_rate": 1.2198911580227055e-07, "loss": 1.0139, "step": 94290 }, { "epoch": 2.7803331859059415, "grad_norm": 3.5767752556169117, "learning_rate": 1.21826428230936e-07, "loss": 1.0609, "step": 94295 }, { "epoch": 2.780480613297951, "grad_norm": 3.6196711468427276, "learning_rate": 1.2166384742275104e-07, "loss": 1.0432, "step": 94300 }, { "epoch": 2.78062804068996, "grad_norm": 3.5860157487813287, "learning_rate": 1.215013733825006e-07, "loss": 1.03, "step": 94305 }, { "epoch": 2.78077546808197, "grad_norm": 3.5004017893927277, "learning_rate": 1.213390061149658e-07, "loss": 1.0363, "step": 94310 }, { "epoch": 2.780922895473979, "grad_norm": 3.5392883437481806, "learning_rate": 1.211767456249241e-07, "loss": 1.0303, "step": 94315 }, { "epoch": 2.7810703228659888, "grad_norm": 3.3810056758014007, "learning_rate": 1.2101459191715041e-07, "loss": 1.0153, "step": 94320 }, { "epoch": 2.781217750257998, "grad_norm": 3.4217631049127784, "learning_rate": 1.2085254499641596e-07, "loss": 1.0267, "step": 94325 }, { "epoch": 2.7813651776500072, "grad_norm": 3.4288000996517485, "learning_rate": 1.2069060486748982e-07, "loss": 1.016, "step": 94330 }, { "epoch": 2.7815126050420167, "grad_norm": 3.544028943898757, "learning_rate": 1.2052877153513651e-07, "loss": 1.034, "step": 94335 }, { "epoch": 2.781660032434026, "grad_norm": 3.5546985557201793, "learning_rate": 1.2036704500411847e-07, "loss": 1.078, "step": 94340 }, { "epoch": 2.7818074598260356, "grad_norm": 3.497608613786878, "learning_rate": 1.2020542527919523e-07, "loss": 1.0359, "step": 94345 }, { "epoch": 2.781954887218045, "grad_norm": 3.424729338851542, "learning_rate": 1.2004391236512174e-07, "loss": 1.0556, "step": 94350 }, { "epoch": 2.7821023146100545, "grad_norm": 3.44989821811829, "learning_rate": 1.1988250626665087e-07, "loss": 1.0472, "step": 94355 }, { "epoch": 2.782249742002064, "grad_norm": 3.5172571300347593, "learning_rate": 1.1972120698853297e-07, "loss": 1.0692, "step": 94360 }, { "epoch": 2.7823971693940734, "grad_norm": 3.522151361793528, "learning_rate": 1.1956001453551386e-07, "loss": 1.0406, "step": 94365 }, { "epoch": 2.782544596786083, "grad_norm": 3.6400510426602453, "learning_rate": 1.1939892891233678e-07, "loss": 1.0188, "step": 94370 }, { "epoch": 2.7826920241780924, "grad_norm": 3.459512161153557, "learning_rate": 1.1923795012374172e-07, "loss": 1.0531, "step": 94375 }, { "epoch": 2.782839451570102, "grad_norm": 3.565296699528274, "learning_rate": 1.1907707817446573e-07, "loss": 1.0461, "step": 94380 }, { "epoch": 2.7829868789621113, "grad_norm": 3.516887799516094, "learning_rate": 1.1891631306924292e-07, "loss": 1.0475, "step": 94385 }, { "epoch": 2.7831343063541207, "grad_norm": 3.578201880999852, "learning_rate": 1.1875565481280409e-07, "loss": 1.0428, "step": 94390 }, { "epoch": 2.78328173374613, "grad_norm": 3.6214168845192596, "learning_rate": 1.1859510340987673e-07, "loss": 1.0685, "step": 94395 }, { "epoch": 2.783429161138139, "grad_norm": 3.557074067903266, "learning_rate": 1.1843465886518454e-07, "loss": 1.0431, "step": 94400 }, { "epoch": 2.783576588530149, "grad_norm": 3.486490726564699, "learning_rate": 1.1827432118344958e-07, "loss": 1.0174, "step": 94405 }, { "epoch": 2.783724015922158, "grad_norm": 3.498761553870353, "learning_rate": 1.1811409036938975e-07, "loss": 1.0269, "step": 94410 }, { "epoch": 2.783871443314168, "grad_norm": 3.5555504877361894, "learning_rate": 1.1795396642772003e-07, "loss": 1.0358, "step": 94415 }, { "epoch": 2.784018870706177, "grad_norm": 3.6079999600282884, "learning_rate": 1.1779394936315205e-07, "loss": 1.0472, "step": 94420 }, { "epoch": 2.7841662980981865, "grad_norm": 3.6597850138876344, "learning_rate": 1.1763403918039455e-07, "loss": 1.0483, "step": 94425 }, { "epoch": 2.784313725490196, "grad_norm": 3.5613750651444684, "learning_rate": 1.1747423588415378e-07, "loss": 1.0364, "step": 94430 }, { "epoch": 2.7844611528822054, "grad_norm": 3.59691575742404, "learning_rate": 1.1731453947913056e-07, "loss": 1.0778, "step": 94435 }, { "epoch": 2.784608580274215, "grad_norm": 3.4325366912618067, "learning_rate": 1.1715494997002612e-07, "loss": 1.0735, "step": 94440 }, { "epoch": 2.7847560076662243, "grad_norm": 3.6632488422037777, "learning_rate": 1.1699546736153505e-07, "loss": 0.9973, "step": 94445 }, { "epoch": 2.784903435058234, "grad_norm": 3.519652438899417, "learning_rate": 1.1683609165835065e-07, "loss": 1.046, "step": 94450 }, { "epoch": 2.7850508624502432, "grad_norm": 3.519902329897751, "learning_rate": 1.1667682286516338e-07, "loss": 1.0309, "step": 94455 }, { "epoch": 2.7851982898422527, "grad_norm": 3.5366450141077634, "learning_rate": 1.1651766098665903e-07, "loss": 1.0589, "step": 94460 }, { "epoch": 2.785345717234262, "grad_norm": 3.510623953844382, "learning_rate": 1.1635860602752137e-07, "loss": 1.0536, "step": 94465 }, { "epoch": 2.7854931446262716, "grad_norm": 3.401070556037861, "learning_rate": 1.1619965799243168e-07, "loss": 1.0196, "step": 94470 }, { "epoch": 2.785640572018281, "grad_norm": 3.4649613699722, "learning_rate": 1.1604081688606536e-07, "loss": 1.0282, "step": 94475 }, { "epoch": 2.7857879994102905, "grad_norm": 3.606143416600989, "learning_rate": 1.1588208271309824e-07, "loss": 1.0721, "step": 94480 }, { "epoch": 2.7859354268023, "grad_norm": 3.4057909603485252, "learning_rate": 1.1572345547819996e-07, "loss": 1.0539, "step": 94485 }, { "epoch": 2.7860828541943095, "grad_norm": 3.609908986447855, "learning_rate": 1.1556493518603966e-07, "loss": 0.9961, "step": 94490 }, { "epoch": 2.7862302815863185, "grad_norm": 3.4767024434408094, "learning_rate": 1.1540652184128115e-07, "loss": 1.0167, "step": 94495 }, { "epoch": 2.7863777089783284, "grad_norm": 3.5746310018431346, "learning_rate": 1.1524821544858566e-07, "loss": 1.0262, "step": 94500 }, { "epoch": 2.7863777089783284, "eval_loss": 1.0767216682434082, "eval_runtime": 4.2556, "eval_samples_per_second": 93.054, "eval_steps_per_second": 3.055, "step": 94500 }, { "epoch": 2.7865251363703374, "grad_norm": 3.4906385209941027, "learning_rate": 1.150900160126124e-07, "loss": 1.0412, "step": 94505 }, { "epoch": 2.7866725637623473, "grad_norm": 3.430039873887064, "learning_rate": 1.1493192353801516e-07, "loss": 1.0479, "step": 94510 }, { "epoch": 2.7868199911543563, "grad_norm": 3.458346526478067, "learning_rate": 1.1477393802944769e-07, "loss": 1.0148, "step": 94515 }, { "epoch": 2.7869674185463658, "grad_norm": 3.5181616027310025, "learning_rate": 1.1461605949155755e-07, "loss": 1.0661, "step": 94520 }, { "epoch": 2.787114845938375, "grad_norm": 3.478633743367127, "learning_rate": 1.144582879289914e-07, "loss": 0.9866, "step": 94525 }, { "epoch": 2.7872622733303847, "grad_norm": 3.6258215774683453, "learning_rate": 1.14300623346391e-07, "loss": 1.0507, "step": 94530 }, { "epoch": 2.787409700722394, "grad_norm": 3.646176727213472, "learning_rate": 1.1414306574839676e-07, "loss": 1.0455, "step": 94535 }, { "epoch": 2.7875571281144036, "grad_norm": 3.4170837833060337, "learning_rate": 1.1398561513964415e-07, "loss": 1.0489, "step": 94540 }, { "epoch": 2.787704555506413, "grad_norm": 3.3363961859467, "learning_rate": 1.1382827152476739e-07, "loss": 1.011, "step": 94545 }, { "epoch": 2.7878519828984225, "grad_norm": 3.4780424460692836, "learning_rate": 1.1367103490839484e-07, "loss": 1.0434, "step": 94550 }, { "epoch": 2.787999410290432, "grad_norm": 3.5055250481651092, "learning_rate": 1.1351390529515532e-07, "loss": 1.0604, "step": 94555 }, { "epoch": 2.7881468376824414, "grad_norm": 3.477530791991064, "learning_rate": 1.1335688268967054e-07, "loss": 1.0527, "step": 94560 }, { "epoch": 2.788294265074451, "grad_norm": 3.3780241614973163, "learning_rate": 1.1319996709656305e-07, "loss": 1.0468, "step": 94565 }, { "epoch": 2.7884416924664603, "grad_norm": 3.5603131983762353, "learning_rate": 1.1304315852044875e-07, "loss": 1.0862, "step": 94570 }, { "epoch": 2.78858911985847, "grad_norm": 3.7821256218429613, "learning_rate": 1.1288645696594268e-07, "loss": 1.0732, "step": 94575 }, { "epoch": 2.7887365472504793, "grad_norm": 3.5120412277561606, "learning_rate": 1.1272986243765617e-07, "loss": 1.0466, "step": 94580 }, { "epoch": 2.7888839746424887, "grad_norm": 3.4958245514534134, "learning_rate": 1.1257337494019596e-07, "loss": 1.0573, "step": 94585 }, { "epoch": 2.789031402034498, "grad_norm": 3.548230098876014, "learning_rate": 1.1241699447816833e-07, "loss": 1.0555, "step": 94590 }, { "epoch": 2.7891788294265076, "grad_norm": 3.4710739043139816, "learning_rate": 1.1226072105617421e-07, "loss": 1.031, "step": 94595 }, { "epoch": 2.7893262568185166, "grad_norm": 3.5057496846719793, "learning_rate": 1.12104554678812e-07, "loss": 1.0196, "step": 94600 }, { "epoch": 2.7894736842105265, "grad_norm": 3.515921530894497, "learning_rate": 1.119484953506772e-07, "loss": 1.0515, "step": 94605 }, { "epoch": 2.7896211116025356, "grad_norm": 3.6223264760108824, "learning_rate": 1.1179254307636236e-07, "loss": 1.0545, "step": 94610 }, { "epoch": 2.789768538994545, "grad_norm": 3.532529126829767, "learning_rate": 1.1163669786045632e-07, "loss": 1.0624, "step": 94615 }, { "epoch": 2.7899159663865545, "grad_norm": 3.291406947490734, "learning_rate": 1.1148095970754502e-07, "loss": 1.0058, "step": 94620 }, { "epoch": 2.790063393778564, "grad_norm": 3.6092446020271023, "learning_rate": 1.1132532862221142e-07, "loss": 1.0563, "step": 94625 }, { "epoch": 2.7902108211705734, "grad_norm": 3.488204094629832, "learning_rate": 1.1116980460903523e-07, "loss": 1.0333, "step": 94630 }, { "epoch": 2.790358248562583, "grad_norm": 3.5539029350551314, "learning_rate": 1.1101438767259192e-07, "loss": 0.9966, "step": 94635 }, { "epoch": 2.7905056759545923, "grad_norm": 3.4641760695887682, "learning_rate": 1.1085907781745578e-07, "loss": 1.0572, "step": 94640 }, { "epoch": 2.7906531033466018, "grad_norm": 3.5949575358991646, "learning_rate": 1.1070387504819687e-07, "loss": 1.0298, "step": 94645 }, { "epoch": 2.7908005307386112, "grad_norm": 3.6804162343985753, "learning_rate": 1.1054877936938199e-07, "loss": 1.0656, "step": 94650 }, { "epoch": 2.7909479581306207, "grad_norm": 3.38495357841213, "learning_rate": 1.1039379078557538e-07, "loss": 1.0504, "step": 94655 }, { "epoch": 2.79109538552263, "grad_norm": 3.545822477931055, "learning_rate": 1.1023890930133674e-07, "loss": 1.0446, "step": 94660 }, { "epoch": 2.7912428129146396, "grad_norm": 3.5190701720251116, "learning_rate": 1.100841349212249e-07, "loss": 1.0238, "step": 94665 }, { "epoch": 2.791390240306649, "grad_norm": 3.446562892501878, "learning_rate": 1.0992946764979375e-07, "loss": 1.0332, "step": 94670 }, { "epoch": 2.7915376676986585, "grad_norm": 3.6320766001679794, "learning_rate": 1.0977490749159464e-07, "loss": 1.0509, "step": 94675 }, { "epoch": 2.791685095090668, "grad_norm": 3.304572362387764, "learning_rate": 1.0962045445117516e-07, "loss": 1.0176, "step": 94680 }, { "epoch": 2.7918325224826774, "grad_norm": 3.6170541630083077, "learning_rate": 1.0946610853308086e-07, "loss": 1.0036, "step": 94685 }, { "epoch": 2.791979949874687, "grad_norm": 3.510773413169027, "learning_rate": 1.0931186974185312e-07, "loss": 1.0374, "step": 94690 }, { "epoch": 2.792127377266696, "grad_norm": 3.559997206273237, "learning_rate": 1.0915773808203119e-07, "loss": 1.0112, "step": 94695 }, { "epoch": 2.792274804658706, "grad_norm": 3.4244037557351668, "learning_rate": 1.0900371355815065e-07, "loss": 1.0633, "step": 94700 }, { "epoch": 2.792422232050715, "grad_norm": 3.6035898872191603, "learning_rate": 1.0884979617474283e-07, "loss": 1.0544, "step": 94705 }, { "epoch": 2.7925696594427247, "grad_norm": 3.5828514968821326, "learning_rate": 1.0869598593633748e-07, "loss": 1.0179, "step": 94710 }, { "epoch": 2.7927170868347337, "grad_norm": 3.505020594540873, "learning_rate": 1.0854228284746096e-07, "loss": 1.1002, "step": 94715 }, { "epoch": 2.792864514226743, "grad_norm": 3.6265373028234005, "learning_rate": 1.0838868691263548e-07, "loss": 1.0541, "step": 94720 }, { "epoch": 2.7930119416187527, "grad_norm": 3.423159939980973, "learning_rate": 1.082351981363816e-07, "loss": 1.0123, "step": 94725 }, { "epoch": 2.793159369010762, "grad_norm": 3.559755977204344, "learning_rate": 1.0808181652321571e-07, "loss": 1.034, "step": 94730 }, { "epoch": 2.7933067964027716, "grad_norm": 3.5422105066106333, "learning_rate": 1.0792854207765004e-07, "loss": 1.0715, "step": 94735 }, { "epoch": 2.793454223794781, "grad_norm": 3.445543755402057, "learning_rate": 1.077753748041968e-07, "loss": 1.0388, "step": 94740 }, { "epoch": 2.7936016511867905, "grad_norm": 3.850986723886594, "learning_rate": 1.0762231470736114e-07, "loss": 1.0123, "step": 94745 }, { "epoch": 2.7937490785788, "grad_norm": 3.4502540596784486, "learning_rate": 1.0746936179164904e-07, "loss": 1.051, "step": 94750 }, { "epoch": 2.7938965059708094, "grad_norm": 3.698962197893603, "learning_rate": 1.0731651606155981e-07, "loss": 1.1023, "step": 94755 }, { "epoch": 2.794043933362819, "grad_norm": 3.5128000462587026, "learning_rate": 1.0716377752159154e-07, "loss": 1.0348, "step": 94760 }, { "epoch": 2.7941913607548283, "grad_norm": 3.4813063045940136, "learning_rate": 1.0701114617623936e-07, "loss": 1.0436, "step": 94765 }, { "epoch": 2.794338788146838, "grad_norm": 3.473266528323306, "learning_rate": 1.0685862202999302e-07, "loss": 1.0063, "step": 94770 }, { "epoch": 2.7944862155388472, "grad_norm": 3.506954721094263, "learning_rate": 1.067062050873427e-07, "loss": 1.0537, "step": 94775 }, { "epoch": 2.7946336429308567, "grad_norm": 3.513766158912939, "learning_rate": 1.0655389535277227e-07, "loss": 1.0553, "step": 94780 }, { "epoch": 2.794781070322866, "grad_norm": 3.6910990635997374, "learning_rate": 1.064016928307636e-07, "loss": 1.0248, "step": 94785 }, { "epoch": 2.794928497714875, "grad_norm": 3.6206881497074, "learning_rate": 1.0624959752579599e-07, "loss": 1.0913, "step": 94790 }, { "epoch": 2.795075925106885, "grad_norm": 3.763913826101822, "learning_rate": 1.0609760944234423e-07, "loss": 1.0001, "step": 94795 }, { "epoch": 2.795223352498894, "grad_norm": 3.3438803575487177, "learning_rate": 1.0594572858488138e-07, "loss": 1.0209, "step": 94800 }, { "epoch": 2.795370779890904, "grad_norm": 3.4373308041970176, "learning_rate": 1.0579395495787722e-07, "loss": 1.0448, "step": 94805 }, { "epoch": 2.795518207282913, "grad_norm": 3.5691246176496443, "learning_rate": 1.0564228856579608e-07, "loss": 1.0718, "step": 94810 }, { "epoch": 2.7956656346749225, "grad_norm": 3.4419860494360184, "learning_rate": 1.0549072941310314e-07, "loss": 0.9963, "step": 94815 }, { "epoch": 2.795813062066932, "grad_norm": 3.613210437070006, "learning_rate": 1.0533927750425609e-07, "loss": 1.0563, "step": 94820 }, { "epoch": 2.7959604894589414, "grad_norm": 3.4796530710592, "learning_rate": 1.0518793284371303e-07, "loss": 1.0201, "step": 94825 }, { "epoch": 2.796107916850951, "grad_norm": 3.566902067189937, "learning_rate": 1.0503669543592706e-07, "loss": 1.0145, "step": 94830 }, { "epoch": 2.7962553442429603, "grad_norm": 3.5897333938378284, "learning_rate": 1.0488556528534837e-07, "loss": 1.0264, "step": 94835 }, { "epoch": 2.7964027716349698, "grad_norm": 3.433268693049603, "learning_rate": 1.0473454239642424e-07, "loss": 1.0005, "step": 94840 }, { "epoch": 2.796550199026979, "grad_norm": 3.6242831071745982, "learning_rate": 1.0458362677359821e-07, "loss": 1.0714, "step": 94845 }, { "epoch": 2.7966976264189887, "grad_norm": 3.550648361053368, "learning_rate": 1.0443281842131253e-07, "loss": 1.0319, "step": 94850 }, { "epoch": 2.796845053810998, "grad_norm": 3.5847675863949306, "learning_rate": 1.0428211734400325e-07, "loss": 1.0558, "step": 94855 }, { "epoch": 2.7969924812030076, "grad_norm": 3.6178784678782603, "learning_rate": 1.0413152354610558e-07, "loss": 1.0532, "step": 94860 }, { "epoch": 2.797139908595017, "grad_norm": 3.4545839378980134, "learning_rate": 1.039810370320514e-07, "loss": 1.0218, "step": 94865 }, { "epoch": 2.7972873359870265, "grad_norm": 3.586096125672562, "learning_rate": 1.0383065780626796e-07, "loss": 1.0104, "step": 94870 }, { "epoch": 2.797434763379036, "grad_norm": 3.5918150584667834, "learning_rate": 1.0368038587318135e-07, "loss": 1.0539, "step": 94875 }, { "epoch": 2.7975821907710454, "grad_norm": 3.499597026706447, "learning_rate": 1.03530221237213e-07, "loss": 1.0501, "step": 94880 }, { "epoch": 2.7977296181630544, "grad_norm": 3.3416717661779485, "learning_rate": 1.0338016390278149e-07, "loss": 1.0239, "step": 94885 }, { "epoch": 2.7978770455550643, "grad_norm": 3.4813996657894872, "learning_rate": 1.0323021387430326e-07, "loss": 1.0279, "step": 94890 }, { "epoch": 2.7980244729470733, "grad_norm": 3.6369071500635926, "learning_rate": 1.0308037115618937e-07, "loss": 1.0582, "step": 94895 }, { "epoch": 2.7981719003390833, "grad_norm": 3.6837092559421594, "learning_rate": 1.0293063575285048e-07, "loss": 1.0169, "step": 94900 }, { "epoch": 2.7983193277310923, "grad_norm": 3.4478079107868416, "learning_rate": 1.0278100766869177e-07, "loss": 1.0505, "step": 94905 }, { "epoch": 2.7984667551231017, "grad_norm": 3.5929685238116873, "learning_rate": 1.0263148690811685e-07, "loss": 1.0864, "step": 94910 }, { "epoch": 2.798614182515111, "grad_norm": 3.621056860150576, "learning_rate": 1.0248207347552552e-07, "loss": 1.0719, "step": 94915 }, { "epoch": 2.7987616099071206, "grad_norm": 3.6322317343669765, "learning_rate": 1.0233276737531341e-07, "loss": 1.024, "step": 94920 }, { "epoch": 2.79890903729913, "grad_norm": 3.4671773564023027, "learning_rate": 1.0218356861187536e-07, "loss": 1.0408, "step": 94925 }, { "epoch": 2.7990564646911396, "grad_norm": 3.5291212416903757, "learning_rate": 1.0203447718960119e-07, "loss": 1.0539, "step": 94930 }, { "epoch": 2.799203892083149, "grad_norm": 3.656747967162137, "learning_rate": 1.0188549311287778e-07, "loss": 1.0584, "step": 94935 }, { "epoch": 2.7993513194751585, "grad_norm": 3.497473434330956, "learning_rate": 1.0173661638609e-07, "loss": 0.9985, "step": 94940 }, { "epoch": 2.799498746867168, "grad_norm": 3.6124620679512653, "learning_rate": 1.0158784701361764e-07, "loss": 1.0683, "step": 94945 }, { "epoch": 2.7996461742591774, "grad_norm": 3.598217028814494, "learning_rate": 1.014391849998389e-07, "loss": 1.0411, "step": 94950 }, { "epoch": 2.799793601651187, "grad_norm": 3.5284181058905366, "learning_rate": 1.0129063034912858e-07, "loss": 1.0871, "step": 94955 }, { "epoch": 2.7999410290431963, "grad_norm": 3.6518145378354503, "learning_rate": 1.0114218306585821e-07, "loss": 1.0845, "step": 94960 }, { "epoch": 2.8000884564352058, "grad_norm": 3.5806867130729834, "learning_rate": 1.0099384315439511e-07, "loss": 1.0425, "step": 94965 }, { "epoch": 2.800235883827215, "grad_norm": 3.5134647735153828, "learning_rate": 1.0084561061910496e-07, "loss": 1.0294, "step": 94970 }, { "epoch": 2.8003833112192247, "grad_norm": 3.4821739985694617, "learning_rate": 1.006974854643497e-07, "loss": 1.0023, "step": 94975 }, { "epoch": 2.8005307386112337, "grad_norm": 3.3152515892822976, "learning_rate": 1.0054946769448792e-07, "loss": 1.0583, "step": 94980 }, { "epoch": 2.8006781660032436, "grad_norm": 3.532611046741085, "learning_rate": 1.004015573138757e-07, "loss": 1.0596, "step": 94985 }, { "epoch": 2.8008255933952526, "grad_norm": 3.4335405987446883, "learning_rate": 1.0025375432686501e-07, "loss": 0.9817, "step": 94990 }, { "epoch": 2.8009730207872625, "grad_norm": 3.4668509945066885, "learning_rate": 1.0010605873780443e-07, "loss": 1.0753, "step": 94995 }, { "epoch": 2.8011204481792715, "grad_norm": 3.514420698952657, "learning_rate": 9.995847055104174e-08, "loss": 1.0558, "step": 95000 }, { "epoch": 2.8011204481792715, "eval_loss": 1.0767158269882202, "eval_runtime": 4.158, "eval_samples_per_second": 95.237, "eval_steps_per_second": 3.126, "step": 95000 }, { "epoch": 2.801267875571281, "grad_norm": 3.5319959269171326, "learning_rate": 9.981098977091804e-08, "loss": 1.0686, "step": 95005 }, { "epoch": 2.8014153029632904, "grad_norm": 3.5102164249624246, "learning_rate": 9.966361640177488e-08, "loss": 1.0519, "step": 95010 }, { "epoch": 2.8015627303553, "grad_norm": 3.457464170626284, "learning_rate": 9.951635044794751e-08, "loss": 1.0252, "step": 95015 }, { "epoch": 2.8017101577473094, "grad_norm": 3.4988943228999725, "learning_rate": 9.936919191377e-08, "loss": 0.9908, "step": 95020 }, { "epoch": 2.801857585139319, "grad_norm": 3.5511332099826807, "learning_rate": 9.922214080357345e-08, "loss": 1.0518, "step": 95025 }, { "epoch": 2.8020050125313283, "grad_norm": 3.423064449026778, "learning_rate": 9.907519712168272e-08, "loss": 0.9757, "step": 95030 }, { "epoch": 2.8021524399233377, "grad_norm": 3.7749809183495353, "learning_rate": 9.892836087242438e-08, "loss": 1.1146, "step": 95035 }, { "epoch": 2.802299867315347, "grad_norm": 3.659010025263164, "learning_rate": 9.878163206011745e-08, "loss": 1.0563, "step": 95040 }, { "epoch": 2.8024472947073567, "grad_norm": 3.526607793043345, "learning_rate": 9.863501068908057e-08, "loss": 1.0306, "step": 95045 }, { "epoch": 2.802594722099366, "grad_norm": 3.4692598972279938, "learning_rate": 9.848849676362778e-08, "loss": 1.0545, "step": 95050 }, { "epoch": 2.8027421494913756, "grad_norm": 3.5781669054555096, "learning_rate": 9.834209028807064e-08, "loss": 1.0653, "step": 95055 }, { "epoch": 2.802889576883385, "grad_norm": 3.3960195672569067, "learning_rate": 9.819579126671737e-08, "loss": 1.0408, "step": 95060 }, { "epoch": 2.8030370042753945, "grad_norm": 3.5637011006851202, "learning_rate": 9.804959970387286e-08, "loss": 1.0538, "step": 95065 }, { "epoch": 2.803184431667404, "grad_norm": 3.5853064524352507, "learning_rate": 9.790351560383826e-08, "loss": 1.0619, "step": 95070 }, { "epoch": 2.8033318590594134, "grad_norm": 3.4959756896896694, "learning_rate": 9.775753897091386e-08, "loss": 1.0764, "step": 95075 }, { "epoch": 2.803479286451423, "grad_norm": 3.5805181786410114, "learning_rate": 9.761166980939373e-08, "loss": 1.024, "step": 95080 }, { "epoch": 2.803626713843432, "grad_norm": 3.574781269858836, "learning_rate": 9.746590812357154e-08, "loss": 1.0678, "step": 95085 }, { "epoch": 2.8037741412354418, "grad_norm": 3.4965250221627806, "learning_rate": 9.732025391773508e-08, "loss": 0.9926, "step": 95090 }, { "epoch": 2.803921568627451, "grad_norm": 3.427461304922183, "learning_rate": 9.717470719617135e-08, "loss": 1.0407, "step": 95095 }, { "epoch": 2.8040689960194607, "grad_norm": 3.3611735320139697, "learning_rate": 9.702926796316319e-08, "loss": 1.0308, "step": 95100 }, { "epoch": 2.8042164234114697, "grad_norm": 3.5183483763655485, "learning_rate": 9.688393622298925e-08, "loss": 1.0429, "step": 95105 }, { "epoch": 2.804363850803479, "grad_norm": 3.5461587723297994, "learning_rate": 9.673871197992776e-08, "loss": 1.0494, "step": 95110 }, { "epoch": 2.8045112781954886, "grad_norm": 3.4703047788521646, "learning_rate": 9.659359523825073e-08, "loss": 1.0404, "step": 95115 }, { "epoch": 2.804658705587498, "grad_norm": 3.5511216757140027, "learning_rate": 9.644858600222934e-08, "loss": 0.9957, "step": 95120 }, { "epoch": 2.8048061329795075, "grad_norm": 3.5308814785848, "learning_rate": 9.630368427612973e-08, "loss": 1.009, "step": 95125 }, { "epoch": 2.804953560371517, "grad_norm": 3.61892179889372, "learning_rate": 9.615889006421643e-08, "loss": 1.0522, "step": 95130 }, { "epoch": 2.8051009877635265, "grad_norm": 3.4835839246031672, "learning_rate": 9.601420337075021e-08, "loss": 1.0347, "step": 95135 }, { "epoch": 2.805248415155536, "grad_norm": 3.6018482895114126, "learning_rate": 9.586962419998846e-08, "loss": 1.0748, "step": 95140 }, { "epoch": 2.8053958425475454, "grad_norm": 3.543189971174574, "learning_rate": 9.57251525561853e-08, "loss": 1.0602, "step": 95145 }, { "epoch": 2.805543269939555, "grad_norm": 3.573317882435169, "learning_rate": 9.558078844359274e-08, "loss": 1.0747, "step": 95150 }, { "epoch": 2.8056906973315643, "grad_norm": 3.5913056633780784, "learning_rate": 9.543653186645823e-08, "loss": 1.0273, "step": 95155 }, { "epoch": 2.8058381247235737, "grad_norm": 3.569562177001649, "learning_rate": 9.52923828290267e-08, "loss": 1.0211, "step": 95160 }, { "epoch": 2.805985552115583, "grad_norm": 3.6180191003559465, "learning_rate": 9.514834133554016e-08, "loss": 1.0737, "step": 95165 }, { "epoch": 2.8061329795075927, "grad_norm": 3.5681706183936375, "learning_rate": 9.500440739023688e-08, "loss": 1.0226, "step": 95170 }, { "epoch": 2.806280406899602, "grad_norm": 3.313452948743647, "learning_rate": 9.48605809973531e-08, "loss": 1.0128, "step": 95175 }, { "epoch": 2.806427834291611, "grad_norm": 3.613957076735304, "learning_rate": 9.471686216111955e-08, "loss": 1.0355, "step": 95180 }, { "epoch": 2.806575261683621, "grad_norm": 3.530855432581932, "learning_rate": 9.457325088576704e-08, "loss": 1.0053, "step": 95185 }, { "epoch": 2.80672268907563, "grad_norm": 3.5497736583175454, "learning_rate": 9.44297471755201e-08, "loss": 1.0655, "step": 95190 }, { "epoch": 2.80687011646764, "grad_norm": 3.5856248872315826, "learning_rate": 9.428635103460245e-08, "loss": 1.075, "step": 95195 }, { "epoch": 2.807017543859649, "grad_norm": 3.5713151201763598, "learning_rate": 9.414306246723319e-08, "loss": 1.0236, "step": 95200 }, { "epoch": 2.8071649712516584, "grad_norm": 3.519313662333397, "learning_rate": 9.399988147762855e-08, "loss": 1.0292, "step": 95205 }, { "epoch": 2.807312398643668, "grad_norm": 3.5910443974384116, "learning_rate": 9.385680807000266e-08, "loss": 1.0585, "step": 95210 }, { "epoch": 2.8074598260356773, "grad_norm": 3.352282893489991, "learning_rate": 9.371384224856463e-08, "loss": 1.0414, "step": 95215 }, { "epoch": 2.807607253427687, "grad_norm": 3.524298790626263, "learning_rate": 9.357098401752236e-08, "loss": 1.0872, "step": 95220 }, { "epoch": 2.8077546808196963, "grad_norm": 3.597122733041107, "learning_rate": 9.342823338107875e-08, "loss": 1.0611, "step": 95225 }, { "epoch": 2.8079021082117057, "grad_norm": 3.4738806254189822, "learning_rate": 9.328559034343459e-08, "loss": 1.0202, "step": 95230 }, { "epoch": 2.808049535603715, "grad_norm": 3.627278505713025, "learning_rate": 9.314305490878735e-08, "loss": 1.0566, "step": 95235 }, { "epoch": 2.8081969629957246, "grad_norm": 3.5724673968443534, "learning_rate": 9.300062708133161e-08, "loss": 1.0567, "step": 95240 }, { "epoch": 2.808344390387734, "grad_norm": 3.423120857227098, "learning_rate": 9.285830686525818e-08, "loss": 1.0468, "step": 95245 }, { "epoch": 2.8084918177797435, "grad_norm": 3.3462717389230656, "learning_rate": 9.271609426475577e-08, "loss": 1.0183, "step": 95250 }, { "epoch": 2.808639245171753, "grad_norm": 3.7624528043906125, "learning_rate": 9.257398928400732e-08, "loss": 1.0226, "step": 95255 }, { "epoch": 2.8087866725637625, "grad_norm": 3.540846799697533, "learning_rate": 9.243199192719654e-08, "loss": 1.0512, "step": 95260 }, { "epoch": 2.808934099955772, "grad_norm": 3.2433267933154863, "learning_rate": 9.22901021985001e-08, "loss": 1.0374, "step": 95265 }, { "epoch": 2.8090815273477814, "grad_norm": 3.3634164791993832, "learning_rate": 9.214832010209506e-08, "loss": 1.0923, "step": 95270 }, { "epoch": 2.8092289547397904, "grad_norm": 3.600156840229558, "learning_rate": 9.200664564215186e-08, "loss": 1.0422, "step": 95275 }, { "epoch": 2.8093763821318003, "grad_norm": 3.416303006589537, "learning_rate": 9.186507882284006e-08, "loss": 1.0216, "step": 95280 }, { "epoch": 2.8095238095238093, "grad_norm": 3.5124334592278554, "learning_rate": 9.172361964832593e-08, "loss": 1.0243, "step": 95285 }, { "epoch": 2.809671236915819, "grad_norm": 3.588680013562104, "learning_rate": 9.158226812277112e-08, "loss": 1.0722, "step": 95290 }, { "epoch": 2.8098186643078282, "grad_norm": 3.5922369057294454, "learning_rate": 9.144102425033607e-08, "loss": 1.0312, "step": 95295 }, { "epoch": 2.8099660916998377, "grad_norm": 3.4742503115650565, "learning_rate": 9.129988803517619e-08, "loss": 1.0701, "step": 95300 }, { "epoch": 2.810113519091847, "grad_norm": 3.574268388543759, "learning_rate": 9.115885948144484e-08, "loss": 1.0365, "step": 95305 }, { "epoch": 2.8102609464838566, "grad_norm": 3.4639195148145077, "learning_rate": 9.101793859329203e-08, "loss": 0.9922, "step": 95310 }, { "epoch": 2.810408373875866, "grad_norm": 3.686925568133672, "learning_rate": 9.087712537486443e-08, "loss": 1.062, "step": 95315 }, { "epoch": 2.8105558012678755, "grad_norm": 3.498231842720268, "learning_rate": 9.073641983030582e-08, "loss": 1.0081, "step": 95320 }, { "epoch": 2.810703228659885, "grad_norm": 3.2839810604076556, "learning_rate": 9.059582196375663e-08, "loss": 0.9897, "step": 95325 }, { "epoch": 2.8108506560518944, "grad_norm": 3.5731422823378725, "learning_rate": 9.045533177935355e-08, "loss": 0.9763, "step": 95330 }, { "epoch": 2.810998083443904, "grad_norm": 3.367646826618832, "learning_rate": 9.031494928123163e-08, "loss": 1.0321, "step": 95335 }, { "epoch": 2.8111455108359134, "grad_norm": 3.355881433254922, "learning_rate": 9.017467447352046e-08, "loss": 1.0426, "step": 95340 }, { "epoch": 2.811292938227923, "grad_norm": 3.3512607316102314, "learning_rate": 9.003450736034924e-08, "loss": 1.0208, "step": 95345 }, { "epoch": 2.8114403656199323, "grad_norm": 3.562443502861363, "learning_rate": 8.989444794584175e-08, "loss": 1.052, "step": 95350 }, { "epoch": 2.8115877930119417, "grad_norm": 3.4135165568072474, "learning_rate": 8.975449623411972e-08, "loss": 1.0273, "step": 95355 }, { "epoch": 2.811735220403951, "grad_norm": 3.3515805652126276, "learning_rate": 8.961465222930106e-08, "loss": 1.0316, "step": 95360 }, { "epoch": 2.8118826477959606, "grad_norm": 3.5285110684007357, "learning_rate": 8.947491593550045e-08, "loss": 1.0658, "step": 95365 }, { "epoch": 2.8120300751879697, "grad_norm": 3.538466687516892, "learning_rate": 8.933528735683122e-08, "loss": 1.0508, "step": 95370 }, { "epoch": 2.8121775025799796, "grad_norm": 3.3721134305271043, "learning_rate": 8.919576649740093e-08, "loss": 1.0315, "step": 95375 }, { "epoch": 2.8123249299719886, "grad_norm": 3.5512695109391386, "learning_rate": 8.905635336131507e-08, "loss": 1.0561, "step": 95380 }, { "epoch": 2.8124723573639985, "grad_norm": 3.3703486697610234, "learning_rate": 8.891704795267658e-08, "loss": 0.9897, "step": 95385 }, { "epoch": 2.8126197847560075, "grad_norm": 3.513495553088851, "learning_rate": 8.877785027558427e-08, "loss": 1.0276, "step": 95390 }, { "epoch": 2.812767212148017, "grad_norm": 3.4830753518497537, "learning_rate": 8.863876033413445e-08, "loss": 1.0476, "step": 95395 }, { "epoch": 2.8129146395400264, "grad_norm": 3.5295934146619055, "learning_rate": 8.849977813242011e-08, "loss": 1.0223, "step": 95400 }, { "epoch": 2.813062066932036, "grad_norm": 3.4163465447090404, "learning_rate": 8.836090367453087e-08, "loss": 1.045, "step": 95405 }, { "epoch": 2.8132094943240453, "grad_norm": 3.6258467234072755, "learning_rate": 8.822213696455306e-08, "loss": 1.0419, "step": 95410 }, { "epoch": 2.813356921716055, "grad_norm": 3.7251805246921035, "learning_rate": 8.808347800657048e-08, "loss": 1.0947, "step": 95415 }, { "epoch": 2.8135043491080642, "grad_norm": 3.5739457345525154, "learning_rate": 8.794492680466241e-08, "loss": 1.0458, "step": 95420 }, { "epoch": 2.8136517765000737, "grad_norm": 3.5322859395075987, "learning_rate": 8.780648336290678e-08, "loss": 1.0294, "step": 95425 }, { "epoch": 2.813799203892083, "grad_norm": 3.5473751049349973, "learning_rate": 8.766814768537746e-08, "loss": 1.0548, "step": 95430 }, { "epoch": 2.8139466312840926, "grad_norm": 3.60563905372271, "learning_rate": 8.75299197761449e-08, "loss": 1.0402, "step": 95435 }, { "epoch": 2.814094058676102, "grad_norm": 3.556502423409089, "learning_rate": 8.739179963927549e-08, "loss": 1.0563, "step": 95440 }, { "epoch": 2.8142414860681115, "grad_norm": 3.6524194202864897, "learning_rate": 8.725378727883593e-08, "loss": 1.0261, "step": 95445 }, { "epoch": 2.814388913460121, "grad_norm": 3.556951045241821, "learning_rate": 8.711588269888549e-08, "loss": 1.0483, "step": 95450 }, { "epoch": 2.8145363408521304, "grad_norm": 3.390812555047053, "learning_rate": 8.697808590348257e-08, "loss": 1.0574, "step": 95455 }, { "epoch": 2.81468376824414, "grad_norm": 3.5224057139106537, "learning_rate": 8.684039689668272e-08, "loss": 1.0355, "step": 95460 }, { "epoch": 2.814831195636149, "grad_norm": 3.662202827835509, "learning_rate": 8.670281568253683e-08, "loss": 1.0313, "step": 95465 }, { "epoch": 2.814978623028159, "grad_norm": 3.8233131966723373, "learning_rate": 8.656534226509333e-08, "loss": 1.0381, "step": 95470 }, { "epoch": 2.815126050420168, "grad_norm": 3.5106190347098067, "learning_rate": 8.642797664839817e-08, "loss": 1.0247, "step": 95475 }, { "epoch": 2.8152734778121777, "grad_norm": 3.6497718919448565, "learning_rate": 8.62907188364935e-08, "loss": 1.0462, "step": 95480 }, { "epoch": 2.8154209052041868, "grad_norm": 3.632022776522423, "learning_rate": 8.615356883341738e-08, "loss": 1.0548, "step": 95485 }, { "epoch": 2.815568332596196, "grad_norm": 3.47064610855095, "learning_rate": 8.601652664320656e-08, "loss": 0.9878, "step": 95490 }, { "epoch": 2.8157157599882057, "grad_norm": 3.5474827196364864, "learning_rate": 8.587959226989281e-08, "loss": 1.0617, "step": 95495 }, { "epoch": 2.815863187380215, "grad_norm": 3.6010915804860373, "learning_rate": 8.574276571750625e-08, "loss": 1.0223, "step": 95500 }, { "epoch": 2.815863187380215, "eval_loss": 1.0767959356307983, "eval_runtime": 4.2431, "eval_samples_per_second": 93.328, "eval_steps_per_second": 3.064, "step": 95500 }, { "epoch": 2.8160106147722246, "grad_norm": 3.589160849285533, "learning_rate": 8.560604699007324e-08, "loss": 1.0576, "step": 95505 }, { "epoch": 2.816158042164234, "grad_norm": 3.627707092897406, "learning_rate": 8.54694360916164e-08, "loss": 1.0429, "step": 95510 }, { "epoch": 2.8163054695562435, "grad_norm": 3.4972018837732555, "learning_rate": 8.533293302615541e-08, "loss": 1.0168, "step": 95515 }, { "epoch": 2.816452896948253, "grad_norm": 3.490671018424075, "learning_rate": 8.519653779770833e-08, "loss": 1.0215, "step": 95520 }, { "epoch": 2.8166003243402624, "grad_norm": 3.5604431025851175, "learning_rate": 8.506025041028695e-08, "loss": 1.0528, "step": 95525 }, { "epoch": 2.816747751732272, "grad_norm": 3.4184549975364575, "learning_rate": 8.492407086790346e-08, "loss": 0.9893, "step": 95530 }, { "epoch": 2.8168951791242813, "grad_norm": 3.3994894948154477, "learning_rate": 8.478799917456425e-08, "loss": 0.9946, "step": 95535 }, { "epoch": 2.817042606516291, "grad_norm": 3.4480888869480575, "learning_rate": 8.46520353342728e-08, "loss": 1.0616, "step": 95540 }, { "epoch": 2.8171900339083003, "grad_norm": 3.53133407061246, "learning_rate": 8.451617935103129e-08, "loss": 1.1177, "step": 95545 }, { "epoch": 2.8173374613003097, "grad_norm": 3.5033307832183023, "learning_rate": 8.438043122883571e-08, "loss": 1.0785, "step": 95550 }, { "epoch": 2.817484888692319, "grad_norm": 3.386534625493698, "learning_rate": 8.424479097168286e-08, "loss": 1.051, "step": 95555 }, { "epoch": 2.8176323160843286, "grad_norm": 3.508024733834633, "learning_rate": 8.410925858356205e-08, "loss": 1.0417, "step": 95560 }, { "epoch": 2.817779743476338, "grad_norm": 3.348210140637747, "learning_rate": 8.397383406846259e-08, "loss": 0.9888, "step": 95565 }, { "epoch": 2.817927170868347, "grad_norm": 3.5425282964115046, "learning_rate": 8.383851743036922e-08, "loss": 1.0515, "step": 95570 }, { "epoch": 2.818074598260357, "grad_norm": 3.6097538855021014, "learning_rate": 8.370330867326373e-08, "loss": 1.0355, "step": 95575 }, { "epoch": 2.818222025652366, "grad_norm": 3.4281402004531416, "learning_rate": 8.356820780112504e-08, "loss": 1.04, "step": 95580 }, { "epoch": 2.818369453044376, "grad_norm": 3.3062297049236697, "learning_rate": 8.343321481792829e-08, "loss": 1.0448, "step": 95585 }, { "epoch": 2.818516880436385, "grad_norm": 3.331441828718834, "learning_rate": 8.329832972764573e-08, "loss": 1.0297, "step": 95590 }, { "epoch": 2.8186643078283944, "grad_norm": 3.454785713130393, "learning_rate": 8.316355253424751e-08, "loss": 1.0082, "step": 95595 }, { "epoch": 2.818811735220404, "grad_norm": 3.7321390259592473, "learning_rate": 8.302888324169796e-08, "loss": 1.0312, "step": 95600 }, { "epoch": 2.8189591626124133, "grad_norm": 3.3394659040618486, "learning_rate": 8.289432185396143e-08, "loss": 1.0099, "step": 95605 }, { "epoch": 2.8191065900044228, "grad_norm": 3.4761671468069544, "learning_rate": 8.275986837499638e-08, "loss": 1.0563, "step": 95610 }, { "epoch": 2.819254017396432, "grad_norm": 3.4520850616779, "learning_rate": 8.26255228087601e-08, "loss": 1.0518, "step": 95615 }, { "epoch": 2.8194014447884417, "grad_norm": 3.427314881132456, "learning_rate": 8.249128515920565e-08, "loss": 1.0719, "step": 95620 }, { "epoch": 2.819548872180451, "grad_norm": 3.53421710594091, "learning_rate": 8.235715543028238e-08, "loss": 1.0287, "step": 95625 }, { "epoch": 2.8196962995724606, "grad_norm": 3.6153009784587367, "learning_rate": 8.22231336259384e-08, "loss": 1.0708, "step": 95630 }, { "epoch": 2.81984372696447, "grad_norm": 3.523399638707139, "learning_rate": 8.208921975011635e-08, "loss": 1.0292, "step": 95635 }, { "epoch": 2.8199911543564795, "grad_norm": 3.537768278892073, "learning_rate": 8.195541380675769e-08, "loss": 1.0238, "step": 95640 }, { "epoch": 2.820138581748489, "grad_norm": 3.533752084595045, "learning_rate": 8.182171579979927e-08, "loss": 1.0226, "step": 95645 }, { "epoch": 2.8202860091404984, "grad_norm": 3.5626606331381288, "learning_rate": 8.168812573317585e-08, "loss": 1.0336, "step": 95650 }, { "epoch": 2.820433436532508, "grad_norm": 3.3707110393046884, "learning_rate": 8.155464361081763e-08, "loss": 1.0305, "step": 95655 }, { "epoch": 2.8205808639245173, "grad_norm": 3.438555803875736, "learning_rate": 8.142126943665313e-08, "loss": 1.0055, "step": 95660 }, { "epoch": 2.8207282913165264, "grad_norm": 3.574926140621202, "learning_rate": 8.12880032146067e-08, "loss": 1.0898, "step": 95665 }, { "epoch": 2.8208757187085363, "grad_norm": 3.4988130665273425, "learning_rate": 8.115484494860062e-08, "loss": 1.0042, "step": 95670 }, { "epoch": 2.8210231461005453, "grad_norm": 3.4894180331096125, "learning_rate": 8.102179464255218e-08, "loss": 1.0197, "step": 95675 }, { "epoch": 2.821170573492555, "grad_norm": 3.482869188270352, "learning_rate": 8.088885230037702e-08, "loss": 1.0219, "step": 95680 }, { "epoch": 2.821318000884564, "grad_norm": 3.410714456520631, "learning_rate": 8.075601792598697e-08, "loss": 1.0543, "step": 95685 }, { "epoch": 2.8214654282765737, "grad_norm": 3.58919345828368, "learning_rate": 8.062329152329059e-08, "loss": 1.0508, "step": 95690 }, { "epoch": 2.821612855668583, "grad_norm": 3.4695449362704167, "learning_rate": 8.049067309619476e-08, "loss": 1.0496, "step": 95695 }, { "epoch": 2.8217602830605926, "grad_norm": 3.741850648610623, "learning_rate": 8.03581626486001e-08, "loss": 1.0441, "step": 95700 }, { "epoch": 2.821907710452602, "grad_norm": 3.346934361907017, "learning_rate": 8.022576018440725e-08, "loss": 1.0168, "step": 95705 }, { "epoch": 2.8220551378446115, "grad_norm": 3.695507668439266, "learning_rate": 8.009346570751183e-08, "loss": 1.0265, "step": 95710 }, { "epoch": 2.822202565236621, "grad_norm": 3.4527028751008237, "learning_rate": 7.996127922180657e-08, "loss": 1.0112, "step": 95715 }, { "epoch": 2.8223499926286304, "grad_norm": 3.6654549422133833, "learning_rate": 7.982920073118169e-08, "loss": 1.0473, "step": 95720 }, { "epoch": 2.82249742002064, "grad_norm": 3.362403250844321, "learning_rate": 7.969723023952324e-08, "loss": 1.0417, "step": 95725 }, { "epoch": 2.8226448474126493, "grad_norm": 3.570204956946604, "learning_rate": 7.95653677507148e-08, "loss": 1.0917, "step": 95730 }, { "epoch": 2.8227922748046588, "grad_norm": 3.620683001225833, "learning_rate": 7.9433613268637e-08, "loss": 1.0197, "step": 95735 }, { "epoch": 2.8229397021966682, "grad_norm": 3.495744918352408, "learning_rate": 7.93019667971663e-08, "loss": 1.0496, "step": 95740 }, { "epoch": 2.8230871295886777, "grad_norm": 3.483974970801257, "learning_rate": 7.917042834017673e-08, "loss": 1.0133, "step": 95745 }, { "epoch": 2.823234556980687, "grad_norm": 3.4502190327488287, "learning_rate": 7.903899790153893e-08, "loss": 1.0449, "step": 95750 }, { "epoch": 2.8233819843726966, "grad_norm": 3.319200389083887, "learning_rate": 7.890767548512021e-08, "loss": 1.0213, "step": 95755 }, { "epoch": 2.8235294117647056, "grad_norm": 3.5925895487534025, "learning_rate": 7.877646109478498e-08, "loss": 1.0331, "step": 95760 }, { "epoch": 2.8236768391567155, "grad_norm": 3.670377084119756, "learning_rate": 7.864535473439474e-08, "loss": 1.0337, "step": 95765 }, { "epoch": 2.8238242665487245, "grad_norm": 3.5553760213265915, "learning_rate": 7.851435640780766e-08, "loss": 1.026, "step": 95770 }, { "epoch": 2.8239716939407344, "grad_norm": 3.4413780357817436, "learning_rate": 7.83834661188773e-08, "loss": 1.0182, "step": 95775 }, { "epoch": 2.8241191213327435, "grad_norm": 3.576260164501972, "learning_rate": 7.825268387145645e-08, "loss": 1.0304, "step": 95780 }, { "epoch": 2.824266548724753, "grad_norm": 3.5670017430351817, "learning_rate": 7.812200966939243e-08, "loss": 1.0556, "step": 95785 }, { "epoch": 2.8244139761167624, "grad_norm": 3.499814360387195, "learning_rate": 7.799144351653173e-08, "loss": 1.0575, "step": 95790 }, { "epoch": 2.824561403508772, "grad_norm": 3.417860301798474, "learning_rate": 7.786098541671588e-08, "loss": 1.037, "step": 95795 }, { "epoch": 2.8247088309007813, "grad_norm": 3.64236891073415, "learning_rate": 7.773063537378305e-08, "loss": 1.0054, "step": 95800 }, { "epoch": 2.8248562582927907, "grad_norm": 3.4269362407144275, "learning_rate": 7.76003933915706e-08, "loss": 1.0404, "step": 95805 }, { "epoch": 2.8250036856848, "grad_norm": 3.418326900849637, "learning_rate": 7.747025947390878e-08, "loss": 1.008, "step": 95810 }, { "epoch": 2.8251511130768097, "grad_norm": 3.6197033272701913, "learning_rate": 7.734023362462914e-08, "loss": 1.0368, "step": 95815 }, { "epoch": 2.825298540468819, "grad_norm": 3.5003927066523333, "learning_rate": 7.72103158475565e-08, "loss": 1.0058, "step": 95820 }, { "epoch": 2.8254459678608286, "grad_norm": 3.480033435189355, "learning_rate": 7.708050614651408e-08, "loss": 1.0259, "step": 95825 }, { "epoch": 2.825593395252838, "grad_norm": 3.583358162337453, "learning_rate": 7.695080452532171e-08, "loss": 1.0472, "step": 95830 }, { "epoch": 2.8257408226448475, "grad_norm": 3.4464990849437824, "learning_rate": 7.682121098779638e-08, "loss": 1.0693, "step": 95835 }, { "epoch": 2.825888250036857, "grad_norm": 3.4392394044763885, "learning_rate": 7.669172553775125e-08, "loss": 1.0174, "step": 95840 }, { "epoch": 2.8260356774288664, "grad_norm": 3.5538776953172606, "learning_rate": 7.656234817899663e-08, "loss": 1.0617, "step": 95845 }, { "epoch": 2.826183104820876, "grad_norm": 3.510859378619939, "learning_rate": 7.643307891533905e-08, "loss": 1.0369, "step": 95850 }, { "epoch": 2.826330532212885, "grad_norm": 3.4127173843820673, "learning_rate": 7.630391775058338e-08, "loss": 1.0266, "step": 95855 }, { "epoch": 2.826477959604895, "grad_norm": 3.6318701565148706, "learning_rate": 7.61748646885295e-08, "loss": 1.0484, "step": 95860 }, { "epoch": 2.826625386996904, "grad_norm": 3.6026656437776072, "learning_rate": 7.604591973297562e-08, "loss": 1.0488, "step": 95865 }, { "epoch": 2.8267728143889137, "grad_norm": 3.60489236706595, "learning_rate": 7.591708288771579e-08, "loss": 1.0396, "step": 95870 }, { "epoch": 2.8269202417809227, "grad_norm": 3.5219418055117107, "learning_rate": 7.578835415654073e-08, "loss": 1.0487, "step": 95875 }, { "epoch": 2.827067669172932, "grad_norm": 3.533320374088594, "learning_rate": 7.565973354323949e-08, "loss": 1.0451, "step": 95880 }, { "epoch": 2.8272150965649416, "grad_norm": 3.501975561237074, "learning_rate": 7.553122105159571e-08, "loss": 1.0942, "step": 95885 }, { "epoch": 2.827362523956951, "grad_norm": 3.6785819336386836, "learning_rate": 7.540281668539178e-08, "loss": 1.062, "step": 95890 }, { "epoch": 2.8275099513489605, "grad_norm": 3.4343729837107606, "learning_rate": 7.527452044840594e-08, "loss": 1.0602, "step": 95895 }, { "epoch": 2.82765737874097, "grad_norm": 3.5463556140413863, "learning_rate": 7.514633234441349e-08, "loss": 1.0566, "step": 95900 }, { "epoch": 2.8278048061329795, "grad_norm": 3.4744979952944286, "learning_rate": 7.501825237718599e-08, "loss": 1.0259, "step": 95905 }, { "epoch": 2.827952233524989, "grad_norm": 3.4805498851432555, "learning_rate": 7.489028055049336e-08, "loss": 1.0592, "step": 95910 }, { "epoch": 2.8280996609169984, "grad_norm": 3.461946863321138, "learning_rate": 7.476241686810047e-08, "loss": 1.0194, "step": 95915 }, { "epoch": 2.828247088309008, "grad_norm": 3.6000982562715307, "learning_rate": 7.46346613337702e-08, "loss": 1.0221, "step": 95920 }, { "epoch": 2.8283945157010173, "grad_norm": 3.3832343089635475, "learning_rate": 7.450701395126158e-08, "loss": 1.0173, "step": 95925 }, { "epoch": 2.8285419430930268, "grad_norm": 3.5987293051289453, "learning_rate": 7.437947472433163e-08, "loss": 0.9927, "step": 95930 }, { "epoch": 2.828689370485036, "grad_norm": 3.583955522423343, "learning_rate": 7.425204365673275e-08, "loss": 1.0336, "step": 95935 }, { "epoch": 2.8288367978770457, "grad_norm": 3.503514975509639, "learning_rate": 7.412472075221444e-08, "loss": 1.0296, "step": 95940 }, { "epoch": 2.828984225269055, "grad_norm": 3.391795836983237, "learning_rate": 7.399750601452329e-08, "loss": 1.0258, "step": 95945 }, { "epoch": 2.8291316526610646, "grad_norm": 3.4393200235337447, "learning_rate": 7.38703994474034e-08, "loss": 1.0267, "step": 95950 }, { "epoch": 2.829279080053074, "grad_norm": 3.415354867492811, "learning_rate": 7.37434010545951e-08, "loss": 1.0063, "step": 95955 }, { "epoch": 2.829426507445083, "grad_norm": 3.5385041242416637, "learning_rate": 7.361651083983456e-08, "loss": 1.0422, "step": 95960 }, { "epoch": 2.829573934837093, "grad_norm": 3.545468267600317, "learning_rate": 7.34897288068563e-08, "loss": 1.044, "step": 95965 }, { "epoch": 2.829721362229102, "grad_norm": 3.4589086666404714, "learning_rate": 7.33630549593911e-08, "loss": 1.0637, "step": 95970 }, { "epoch": 2.829868789621112, "grad_norm": 3.606955402187958, "learning_rate": 7.323648930116597e-08, "loss": 1.0457, "step": 95975 }, { "epoch": 2.830016217013121, "grad_norm": 3.5883722656114205, "learning_rate": 7.311003183590584e-08, "loss": 1.0377, "step": 95980 }, { "epoch": 2.8301636444051304, "grad_norm": 3.5132370362828356, "learning_rate": 7.298368256733148e-08, "loss": 0.9896, "step": 95985 }, { "epoch": 2.83031107179714, "grad_norm": 3.3361478800989466, "learning_rate": 7.285744149916076e-08, "loss": 1.0461, "step": 95990 }, { "epoch": 2.8304584991891493, "grad_norm": 3.6735407020469455, "learning_rate": 7.273130863510905e-08, "loss": 1.0294, "step": 95995 }, { "epoch": 2.8306059265811587, "grad_norm": 3.373773791104643, "learning_rate": 7.260528397888752e-08, "loss": 1.0122, "step": 96000 }, { "epoch": 2.8306059265811587, "eval_loss": 1.0769128799438477, "eval_runtime": 4.1851, "eval_samples_per_second": 94.621, "eval_steps_per_second": 3.106, "step": 96000 }, { "epoch": 2.830753353973168, "grad_norm": 3.571201849958141, "learning_rate": 7.247936753420448e-08, "loss": 1.0329, "step": 96005 }, { "epoch": 2.8309007813651776, "grad_norm": 3.585699477646499, "learning_rate": 7.235355930476528e-08, "loss": 1.0311, "step": 96010 }, { "epoch": 2.831048208757187, "grad_norm": 3.4863842474664577, "learning_rate": 7.22278592942724e-08, "loss": 1.025, "step": 96015 }, { "epoch": 2.8311956361491966, "grad_norm": 3.6076698842309187, "learning_rate": 7.210226750642409e-08, "loss": 1.0375, "step": 96020 }, { "epoch": 2.831343063541206, "grad_norm": 3.4595088900185083, "learning_rate": 7.19767839449162e-08, "loss": 1.0396, "step": 96025 }, { "epoch": 2.8314904909332155, "grad_norm": 3.5870563570945557, "learning_rate": 7.185140861344156e-08, "loss": 1.0504, "step": 96030 }, { "epoch": 2.831637918325225, "grad_norm": 3.6271321244338153, "learning_rate": 7.17261415156889e-08, "loss": 1.0339, "step": 96035 }, { "epoch": 2.8317853457172344, "grad_norm": 3.5343862266051307, "learning_rate": 7.160098265534487e-08, "loss": 1.0167, "step": 96040 }, { "epoch": 2.831932773109244, "grad_norm": 3.402265600531631, "learning_rate": 7.147593203609193e-08, "loss": 0.9771, "step": 96045 }, { "epoch": 2.8320802005012533, "grad_norm": 3.406256424103292, "learning_rate": 7.135098966161046e-08, "loss": 1.0226, "step": 96050 }, { "epoch": 2.8322276278932623, "grad_norm": 3.665105135623187, "learning_rate": 7.122615553557671e-08, "loss": 1.0257, "step": 96055 }, { "epoch": 2.8323750552852722, "grad_norm": 3.431600596518038, "learning_rate": 7.110142966166397e-08, "loss": 1.0064, "step": 96060 }, { "epoch": 2.8325224826772812, "grad_norm": 3.277961566605383, "learning_rate": 7.097681204354265e-08, "loss": 0.9883, "step": 96065 }, { "epoch": 2.832669910069291, "grad_norm": 3.4274701541906016, "learning_rate": 7.085230268487938e-08, "loss": 1.0223, "step": 96070 }, { "epoch": 2.8328173374613, "grad_norm": 3.5621269086389185, "learning_rate": 7.072790158933833e-08, "loss": 1.0223, "step": 96075 }, { "epoch": 2.8329647648533096, "grad_norm": 3.4818457452893146, "learning_rate": 7.060360876058034e-08, "loss": 1.0159, "step": 96080 }, { "epoch": 2.833112192245319, "grad_norm": 3.678233640518735, "learning_rate": 7.047942420226203e-08, "loss": 1.0445, "step": 96085 }, { "epoch": 2.8332596196373285, "grad_norm": 3.715959384476998, "learning_rate": 7.035534791803883e-08, "loss": 1.0544, "step": 96090 }, { "epoch": 2.833407047029338, "grad_norm": 3.4996659422035745, "learning_rate": 7.023137991156072e-08, "loss": 1.0489, "step": 96095 }, { "epoch": 2.8335544744213474, "grad_norm": 3.6195560793090302, "learning_rate": 7.010752018647649e-08, "loss": 1.0486, "step": 96100 }, { "epoch": 2.833701901813357, "grad_norm": 3.3680855237326024, "learning_rate": 6.998376874643025e-08, "loss": 1.0281, "step": 96105 }, { "epoch": 2.8338493292053664, "grad_norm": 3.5072199834090503, "learning_rate": 6.986012559506372e-08, "loss": 1.04, "step": 96110 }, { "epoch": 2.833996756597376, "grad_norm": 3.4325354985234684, "learning_rate": 6.973659073601563e-08, "loss": 1.0339, "step": 96115 }, { "epoch": 2.8341441839893853, "grad_norm": 3.4657129027363687, "learning_rate": 6.961316417292016e-08, "loss": 1.0122, "step": 96120 }, { "epoch": 2.8342916113813947, "grad_norm": 3.558548623779902, "learning_rate": 6.948984590941065e-08, "loss": 1.0385, "step": 96125 }, { "epoch": 2.834439038773404, "grad_norm": 3.3872961379458175, "learning_rate": 6.936663594911463e-08, "loss": 1.0367, "step": 96130 }, { "epoch": 2.8345864661654137, "grad_norm": 3.553640674964726, "learning_rate": 6.924353429565836e-08, "loss": 1.0503, "step": 96135 }, { "epoch": 2.834733893557423, "grad_norm": 3.4672590205769653, "learning_rate": 6.912054095266435e-08, "loss": 1.0273, "step": 96140 }, { "epoch": 2.8348813209494326, "grad_norm": 3.4783285445340266, "learning_rate": 6.899765592375055e-08, "loss": 1.0055, "step": 96145 }, { "epoch": 2.8350287483414416, "grad_norm": 3.6371585512880387, "learning_rate": 6.887487921253488e-08, "loss": 1.0522, "step": 96150 }, { "epoch": 2.8351761757334515, "grad_norm": 3.485055485995929, "learning_rate": 6.875221082262905e-08, "loss": 1.0251, "step": 96155 }, { "epoch": 2.8353236031254605, "grad_norm": 3.5807162359517455, "learning_rate": 6.86296507576431e-08, "loss": 1.0635, "step": 96160 }, { "epoch": 2.8354710305174704, "grad_norm": 3.6542422628936015, "learning_rate": 6.85071990211833e-08, "loss": 1.1167, "step": 96165 }, { "epoch": 2.8356184579094794, "grad_norm": 3.5112101623541334, "learning_rate": 6.83848556168526e-08, "loss": 1.0152, "step": 96170 }, { "epoch": 2.835765885301489, "grad_norm": 3.35275862424043, "learning_rate": 6.826262054825186e-08, "loss": 1.0076, "step": 96175 }, { "epoch": 2.8359133126934983, "grad_norm": 3.4696483150283335, "learning_rate": 6.81404938189778e-08, "loss": 1.0257, "step": 96180 }, { "epoch": 2.836060740085508, "grad_norm": 3.501299094122497, "learning_rate": 6.801847543262338e-08, "loss": 1.0354, "step": 96185 }, { "epoch": 2.8362081674775173, "grad_norm": 3.3521754204280088, "learning_rate": 6.789656539278031e-08, "loss": 1.0359, "step": 96190 }, { "epoch": 2.8363555948695267, "grad_norm": 3.386859155067094, "learning_rate": 6.777476370303529e-08, "loss": 1.0558, "step": 96195 }, { "epoch": 2.836503022261536, "grad_norm": 3.4852812316954185, "learning_rate": 6.765307036697213e-08, "loss": 1.0543, "step": 96200 }, { "epoch": 2.8366504496535456, "grad_norm": 3.375321101804765, "learning_rate": 6.753148538817214e-08, "loss": 1.0462, "step": 96205 }, { "epoch": 2.836797877045555, "grad_norm": 3.5643486512807314, "learning_rate": 6.741000877021327e-08, "loss": 1.0134, "step": 96210 }, { "epoch": 2.8369453044375645, "grad_norm": 3.492886207285228, "learning_rate": 6.728864051667018e-08, "loss": 1.017, "step": 96215 }, { "epoch": 2.837092731829574, "grad_norm": 3.576900751204835, "learning_rate": 6.716738063111378e-08, "loss": 1.0528, "step": 96220 }, { "epoch": 2.8372401592215835, "grad_norm": 3.6777957951219835, "learning_rate": 6.704622911711284e-08, "loss": 1.0441, "step": 96225 }, { "epoch": 2.837387586613593, "grad_norm": 3.3398404571650846, "learning_rate": 6.692518597823202e-08, "loss": 0.9945, "step": 96230 }, { "epoch": 2.8375350140056024, "grad_norm": 3.6679394905496405, "learning_rate": 6.680425121803307e-08, "loss": 1.0395, "step": 96235 }, { "epoch": 2.837682441397612, "grad_norm": 3.5109628753412094, "learning_rate": 6.66834248400748e-08, "loss": 1.0473, "step": 96240 }, { "epoch": 2.837829868789621, "grad_norm": 3.6216190311552148, "learning_rate": 6.656270684791271e-08, "loss": 1.0314, "step": 96245 }, { "epoch": 2.8379772961816307, "grad_norm": 3.752906388561625, "learning_rate": 6.644209724509895e-08, "loss": 1.0715, "step": 96250 }, { "epoch": 2.8381247235736398, "grad_norm": 3.405207351903073, "learning_rate": 6.632159603518276e-08, "loss": 1.0103, "step": 96255 }, { "epoch": 2.8382721509656497, "grad_norm": 3.6690771409120106, "learning_rate": 6.620120322171006e-08, "loss": 1.0466, "step": 96260 }, { "epoch": 2.8384195783576587, "grad_norm": 3.686122329331373, "learning_rate": 6.608091880822342e-08, "loss": 1.0421, "step": 96265 }, { "epoch": 2.838567005749668, "grad_norm": 3.5754740954591506, "learning_rate": 6.596074279826211e-08, "loss": 1.0258, "step": 96270 }, { "epoch": 2.8387144331416776, "grad_norm": 3.431901671816764, "learning_rate": 6.584067519536247e-08, "loss": 1.0374, "step": 96275 }, { "epoch": 2.838861860533687, "grad_norm": 3.6167312973960124, "learning_rate": 6.572071600305832e-08, "loss": 1.064, "step": 96280 }, { "epoch": 2.8390092879256965, "grad_norm": 3.4899665156800173, "learning_rate": 6.560086522487852e-08, "loss": 1.0491, "step": 96285 }, { "epoch": 2.839156715317706, "grad_norm": 3.5789526534337908, "learning_rate": 6.548112286435107e-08, "loss": 1.0439, "step": 96290 }, { "epoch": 2.8393041427097154, "grad_norm": 3.4472936545168054, "learning_rate": 6.536148892499816e-08, "loss": 0.977, "step": 96295 }, { "epoch": 2.839451570101725, "grad_norm": 3.694727011584174, "learning_rate": 6.524196341034156e-08, "loss": 1.0836, "step": 96300 }, { "epoch": 2.8395989974937343, "grad_norm": 3.571797928147751, "learning_rate": 6.51225463238972e-08, "loss": 1.0584, "step": 96305 }, { "epoch": 2.839746424885744, "grad_norm": 3.5919433290020018, "learning_rate": 6.500323766918017e-08, "loss": 1.0379, "step": 96310 }, { "epoch": 2.8398938522777533, "grad_norm": 3.6027795480620086, "learning_rate": 6.48840374497002e-08, "loss": 1.0458, "step": 96315 }, { "epoch": 2.8400412796697627, "grad_norm": 3.4484096054093354, "learning_rate": 6.476494566896571e-08, "loss": 1.0428, "step": 96320 }, { "epoch": 2.840188707061772, "grad_norm": 3.589841705162372, "learning_rate": 6.464596233048099e-08, "loss": 1.0811, "step": 96325 }, { "epoch": 2.8403361344537816, "grad_norm": 3.5197013632120706, "learning_rate": 6.452708743774613e-08, "loss": 1.031, "step": 96330 }, { "epoch": 2.840483561845791, "grad_norm": 3.59373709996322, "learning_rate": 6.440832099426087e-08, "loss": 1.0668, "step": 96335 }, { "epoch": 2.8406309892378, "grad_norm": 3.5270943756548294, "learning_rate": 6.428966300351905e-08, "loss": 1.0593, "step": 96340 }, { "epoch": 2.84077841662981, "grad_norm": 3.4261894755104185, "learning_rate": 6.417111346901247e-08, "loss": 0.99, "step": 96345 }, { "epoch": 2.840925844021819, "grad_norm": 3.6673614407706103, "learning_rate": 6.405267239423002e-08, "loss": 1.0239, "step": 96350 }, { "epoch": 2.841073271413829, "grad_norm": 3.5801517521248685, "learning_rate": 6.393433978265598e-08, "loss": 1.0239, "step": 96355 }, { "epoch": 2.841220698805838, "grad_norm": 3.4883945644968617, "learning_rate": 6.381611563777339e-08, "loss": 1.0332, "step": 96360 }, { "epoch": 2.8413681261978474, "grad_norm": 3.572691306489597, "learning_rate": 6.369799996306075e-08, "loss": 1.0158, "step": 96365 }, { "epoch": 2.841515553589857, "grad_norm": 3.4487896325768714, "learning_rate": 6.357999276199358e-08, "loss": 1.0271, "step": 96370 }, { "epoch": 2.8416629809818663, "grad_norm": 3.722300696343258, "learning_rate": 6.346209403804537e-08, "loss": 1.0283, "step": 96375 }, { "epoch": 2.8418104083738758, "grad_norm": 3.5534540546524185, "learning_rate": 6.334430379468334e-08, "loss": 1.0627, "step": 96380 }, { "epoch": 2.8419578357658852, "grad_norm": 3.6330332011948454, "learning_rate": 6.322662203537597e-08, "loss": 1.0553, "step": 96385 }, { "epoch": 2.8421052631578947, "grad_norm": 3.773782308046887, "learning_rate": 6.310904876358464e-08, "loss": 1.0336, "step": 96390 }, { "epoch": 2.842252690549904, "grad_norm": 3.46614851643883, "learning_rate": 6.299158398276952e-08, "loss": 1.0232, "step": 96395 }, { "epoch": 2.8424001179419136, "grad_norm": 3.373561509899613, "learning_rate": 6.287422769638785e-08, "loss": 1.063, "step": 96400 }, { "epoch": 2.842547545333923, "grad_norm": 3.405134354948257, "learning_rate": 6.275697990789101e-08, "loss": 1.033, "step": 96405 }, { "epoch": 2.8426949727259325, "grad_norm": 3.501836311745997, "learning_rate": 6.263984062073167e-08, "loss": 1.0483, "step": 96410 }, { "epoch": 2.842842400117942, "grad_norm": 3.452446407217829, "learning_rate": 6.252280983835498e-08, "loss": 1.0267, "step": 96415 }, { "epoch": 2.8429898275099514, "grad_norm": 3.650284549246293, "learning_rate": 6.240588756420487e-08, "loss": 1.0474, "step": 96420 }, { "epoch": 2.843137254901961, "grad_norm": 3.4947425305261377, "learning_rate": 6.228907380172274e-08, "loss": 1.0481, "step": 96425 }, { "epoch": 2.8432846822939704, "grad_norm": 3.351050738227177, "learning_rate": 6.217236855434544e-08, "loss": 1.0568, "step": 96430 }, { "epoch": 2.84343210968598, "grad_norm": 3.5041251353077345, "learning_rate": 6.205577182550729e-08, "loss": 1.0049, "step": 96435 }, { "epoch": 2.8435795370779893, "grad_norm": 3.6159451870689585, "learning_rate": 6.193928361863929e-08, "loss": 1.026, "step": 96440 }, { "epoch": 2.8437269644699983, "grad_norm": 3.630663099603646, "learning_rate": 6.182290393716914e-08, "loss": 1.0455, "step": 96445 }, { "epoch": 2.843874391862008, "grad_norm": 3.360250695030687, "learning_rate": 6.170663278452198e-08, "loss": 1.0178, "step": 96450 }, { "epoch": 2.844021819254017, "grad_norm": 3.398522449894402, "learning_rate": 6.159047016411803e-08, "loss": 1.0475, "step": 96455 }, { "epoch": 2.844169246646027, "grad_norm": 3.507448082905939, "learning_rate": 6.147441607937662e-08, "loss": 1.0151, "step": 96460 }, { "epoch": 2.844316674038036, "grad_norm": 3.6297136422034297, "learning_rate": 6.135847053371211e-08, "loss": 1.0549, "step": 96465 }, { "epoch": 2.8444641014300456, "grad_norm": 3.5183544466944308, "learning_rate": 6.124263353053675e-08, "loss": 1.0311, "step": 96470 }, { "epoch": 2.844611528822055, "grad_norm": 3.5131255852921432, "learning_rate": 6.112690507325907e-08, "loss": 1.0018, "step": 96475 }, { "epoch": 2.8447589562140645, "grad_norm": 3.3785242095857573, "learning_rate": 6.101128516528428e-08, "loss": 0.9873, "step": 96480 }, { "epoch": 2.844906383606074, "grad_norm": 3.5174617360495604, "learning_rate": 6.089577381001507e-08, "loss": 1.0554, "step": 96485 }, { "epoch": 2.8450538109980834, "grad_norm": 3.3788962115407757, "learning_rate": 6.078037101084954e-08, "loss": 0.9939, "step": 96490 }, { "epoch": 2.845201238390093, "grad_norm": 3.562216414495048, "learning_rate": 6.066507677118499e-08, "loss": 1.0569, "step": 96495 }, { "epoch": 2.8453486657821023, "grad_norm": 3.4307452829149216, "learning_rate": 6.054989109441328e-08, "loss": 1.0324, "step": 96500 }, { "epoch": 2.8453486657821023, "eval_loss": 1.0764929056167603, "eval_runtime": 4.3396, "eval_samples_per_second": 91.252, "eval_steps_per_second": 2.996, "step": 96500 }, { "epoch": 2.845496093174112, "grad_norm": 3.2945007525079766, "learning_rate": 6.043481398392378e-08, "loss": 0.9874, "step": 96505 }, { "epoch": 2.8456435205661212, "grad_norm": 3.452712713976529, "learning_rate": 6.031984544310254e-08, "loss": 1.0525, "step": 96510 }, { "epoch": 2.8457909479581307, "grad_norm": 3.326374707509519, "learning_rate": 6.020498547533352e-08, "loss": 1.0184, "step": 96515 }, { "epoch": 2.84593837535014, "grad_norm": 3.3845596856775475, "learning_rate": 6.009023408399608e-08, "loss": 1.0332, "step": 96520 }, { "epoch": 2.8460858027421496, "grad_norm": 3.4829901339322213, "learning_rate": 5.997559127246671e-08, "loss": 1.0554, "step": 96525 }, { "epoch": 2.846233230134159, "grad_norm": 3.558507315181004, "learning_rate": 5.986105704411937e-08, "loss": 1.0351, "step": 96530 }, { "epoch": 2.8463806575261685, "grad_norm": 3.427328477022467, "learning_rate": 5.974663140232387e-08, "loss": 0.9907, "step": 96535 }, { "epoch": 2.8465280849181775, "grad_norm": 3.5744976036893386, "learning_rate": 5.963231435044792e-08, "loss": 1.0028, "step": 96540 }, { "epoch": 2.8466755123101875, "grad_norm": 3.629664148450359, "learning_rate": 5.951810589185466e-08, "loss": 1.0317, "step": 96545 }, { "epoch": 2.8468229397021965, "grad_norm": 3.6338525009114955, "learning_rate": 5.9404006029905586e-08, "loss": 1.0313, "step": 96550 }, { "epoch": 2.8469703670942064, "grad_norm": 3.460327892864296, "learning_rate": 5.929001476795759e-08, "loss": 0.9993, "step": 96555 }, { "epoch": 2.8471177944862154, "grad_norm": 3.5053553467921206, "learning_rate": 5.917613210936548e-08, "loss": 1.0485, "step": 96560 }, { "epoch": 2.847265221878225, "grad_norm": 3.359332473701779, "learning_rate": 5.90623580574795e-08, "loss": 1.0218, "step": 96565 }, { "epoch": 2.8474126492702343, "grad_norm": 3.547023544794101, "learning_rate": 5.894869261564906e-08, "loss": 1.0362, "step": 96570 }, { "epoch": 2.8475600766622438, "grad_norm": 3.579457084624208, "learning_rate": 5.883513578721772e-08, "loss": 1.0279, "step": 96575 }, { "epoch": 2.847707504054253, "grad_norm": 3.5777892651301637, "learning_rate": 5.87216875755274e-08, "loss": 1.01, "step": 96580 }, { "epoch": 2.8478549314462627, "grad_norm": 3.676343632094121, "learning_rate": 5.860834798391668e-08, "loss": 1.0211, "step": 96585 }, { "epoch": 2.848002358838272, "grad_norm": 3.530732287366023, "learning_rate": 5.849511701571997e-08, "loss": 1.0353, "step": 96590 }, { "epoch": 2.8481497862302816, "grad_norm": 3.559361968517728, "learning_rate": 5.838199467427044e-08, "loss": 0.9951, "step": 96595 }, { "epoch": 2.848297213622291, "grad_norm": 3.695885380976367, "learning_rate": 5.826898096289543e-08, "loss": 1.0537, "step": 96600 }, { "epoch": 2.8484446410143005, "grad_norm": 3.5925729266122977, "learning_rate": 5.815607588492145e-08, "loss": 1.077, "step": 96605 }, { "epoch": 2.84859206840631, "grad_norm": 3.5361218311211697, "learning_rate": 5.804327944367041e-08, "loss": 1.0305, "step": 96610 }, { "epoch": 2.8487394957983194, "grad_norm": 3.511861828067366, "learning_rate": 5.7930591642461755e-08, "loss": 1.0203, "step": 96615 }, { "epoch": 2.848886923190329, "grad_norm": 3.529722628596026, "learning_rate": 5.781801248461116e-08, "loss": 1.0361, "step": 96620 }, { "epoch": 2.8490343505823383, "grad_norm": 3.673044798227877, "learning_rate": 5.7705541973431806e-08, "loss": 1.0099, "step": 96625 }, { "epoch": 2.849181777974348, "grad_norm": 3.7220913634245507, "learning_rate": 5.7593180112232715e-08, "loss": 1.0593, "step": 96630 }, { "epoch": 2.849329205366357, "grad_norm": 3.458161703476527, "learning_rate": 5.7480926904321246e-08, "loss": 1.0465, "step": 96635 }, { "epoch": 2.8494766327583667, "grad_norm": 3.5576971141950766, "learning_rate": 5.736878235299933e-08, "loss": 1.0432, "step": 96640 }, { "epoch": 2.8496240601503757, "grad_norm": 3.429579061041929, "learning_rate": 5.7256746461567675e-08, "loss": 1.0244, "step": 96645 }, { "epoch": 2.8497714875423856, "grad_norm": 3.422849511767143, "learning_rate": 5.714481923332321e-08, "loss": 1.0504, "step": 96650 }, { "epoch": 2.8499189149343946, "grad_norm": 3.5634709022144384, "learning_rate": 5.703300067155873e-08, "loss": 1.0411, "step": 96655 }, { "epoch": 2.850066342326404, "grad_norm": 3.460935870123944, "learning_rate": 5.692129077956576e-08, "loss": 1.051, "step": 96660 }, { "epoch": 2.8502137697184136, "grad_norm": 3.685691808573154, "learning_rate": 5.680968956063001e-08, "loss": 1.0587, "step": 96665 }, { "epoch": 2.850361197110423, "grad_norm": 3.465529655766597, "learning_rate": 5.669819701803719e-08, "loss": 0.9798, "step": 96670 }, { "epoch": 2.8505086245024325, "grad_norm": 3.4801894823779533, "learning_rate": 5.658681315506675e-08, "loss": 1.0367, "step": 96675 }, { "epoch": 2.850656051894442, "grad_norm": 3.433555094198359, "learning_rate": 5.647553797499691e-08, "loss": 1.0064, "step": 96680 }, { "epoch": 2.8508034792864514, "grad_norm": 3.317035861016389, "learning_rate": 5.6364371481101714e-08, "loss": 1.0326, "step": 96685 }, { "epoch": 2.850950906678461, "grad_norm": 3.4962995274109128, "learning_rate": 5.6253313676652705e-08, "loss": 1.0232, "step": 96690 }, { "epoch": 2.8510983340704703, "grad_norm": 3.4859144221270966, "learning_rate": 5.6142364564918113e-08, "loss": 1.0437, "step": 96695 }, { "epoch": 2.8512457614624798, "grad_norm": 3.4488442310095184, "learning_rate": 5.603152414916199e-08, "loss": 1.0105, "step": 96700 }, { "epoch": 2.8513931888544892, "grad_norm": 3.4435183543525345, "learning_rate": 5.5920792432646306e-08, "loss": 1.0387, "step": 96705 }, { "epoch": 2.8515406162464987, "grad_norm": 3.476218823082793, "learning_rate": 5.581016941862971e-08, "loss": 1.0295, "step": 96710 }, { "epoch": 2.851688043638508, "grad_norm": 3.393799566478287, "learning_rate": 5.5699655110366674e-08, "loss": 1.077, "step": 96715 }, { "epoch": 2.8518354710305176, "grad_norm": 3.346802977555856, "learning_rate": 5.558924951111044e-08, "loss": 1.0029, "step": 96720 }, { "epoch": 2.851982898422527, "grad_norm": 3.697437582343782, "learning_rate": 5.547895262410882e-08, "loss": 1.076, "step": 96725 }, { "epoch": 2.852130325814536, "grad_norm": 3.592849101275311, "learning_rate": 5.536876445260797e-08, "loss": 1.0526, "step": 96730 }, { "epoch": 2.852277753206546, "grad_norm": 3.494558118907851, "learning_rate": 5.525868499984987e-08, "loss": 1.0614, "step": 96735 }, { "epoch": 2.852425180598555, "grad_norm": 3.6354645711876663, "learning_rate": 5.5148714269073616e-08, "loss": 1.0403, "step": 96740 }, { "epoch": 2.852572607990565, "grad_norm": 3.5974502572372917, "learning_rate": 5.5038852263516185e-08, "loss": 1.0588, "step": 96745 }, { "epoch": 2.852720035382574, "grad_norm": 3.298565551691983, "learning_rate": 5.4929098986408744e-08, "loss": 1.0413, "step": 96750 }, { "epoch": 2.8528674627745834, "grad_norm": 3.711905318934391, "learning_rate": 5.481945444098288e-08, "loss": 1.0638, "step": 96755 }, { "epoch": 2.853014890166593, "grad_norm": 3.616406596753286, "learning_rate": 5.4709918630463516e-08, "loss": 1.0367, "step": 96760 }, { "epoch": 2.8531623175586023, "grad_norm": 3.3845919861005944, "learning_rate": 5.460049155807431e-08, "loss": 1.0455, "step": 96765 }, { "epoch": 2.8533097449506117, "grad_norm": 3.5698217132391687, "learning_rate": 5.4491173227035614e-08, "loss": 1.0128, "step": 96770 }, { "epoch": 2.853457172342621, "grad_norm": 3.513361182523601, "learning_rate": 5.438196364056402e-08, "loss": 1.0484, "step": 96775 }, { "epoch": 2.8536045997346307, "grad_norm": 3.63094205032173, "learning_rate": 5.42728628018728e-08, "loss": 1.0491, "step": 96780 }, { "epoch": 2.85375202712664, "grad_norm": 3.5586236885197007, "learning_rate": 5.4163870714173116e-08, "loss": 1.043, "step": 96785 }, { "epoch": 2.8538994545186496, "grad_norm": 3.512652244090197, "learning_rate": 5.405498738067116e-08, "loss": 1.0376, "step": 96790 }, { "epoch": 2.854046881910659, "grad_norm": 3.452022151279345, "learning_rate": 5.3946212804571874e-08, "loss": 1.0331, "step": 96795 }, { "epoch": 2.8541943093026685, "grad_norm": 3.5443484942488483, "learning_rate": 5.383754698907562e-08, "loss": 1.0733, "step": 96800 }, { "epoch": 2.854341736694678, "grad_norm": 3.597613870062695, "learning_rate": 5.372898993738024e-08, "loss": 1.046, "step": 96805 }, { "epoch": 2.8544891640866874, "grad_norm": 3.510726789291282, "learning_rate": 5.362054165268027e-08, "loss": 1.0624, "step": 96810 }, { "epoch": 2.854636591478697, "grad_norm": 3.5347439072069813, "learning_rate": 5.351220213816607e-08, "loss": 1.0603, "step": 96815 }, { "epoch": 2.8547840188707063, "grad_norm": 3.4849024445822643, "learning_rate": 5.340397139702677e-08, "loss": 1.0166, "step": 96820 }, { "epoch": 2.8549314462627158, "grad_norm": 3.393307020617451, "learning_rate": 5.329584943244606e-08, "loss": 1.0385, "step": 96825 }, { "epoch": 2.8550788736547252, "grad_norm": 3.510696874011955, "learning_rate": 5.3187836247606815e-08, "loss": 1.0227, "step": 96830 }, { "epoch": 2.8552263010467342, "grad_norm": 3.52243587497794, "learning_rate": 5.307993184568649e-08, "loss": 1.0314, "step": 96835 }, { "epoch": 2.855373728438744, "grad_norm": 3.6567991099972326, "learning_rate": 5.297213622986088e-08, "loss": 1.0275, "step": 96840 }, { "epoch": 2.855521155830753, "grad_norm": 3.586420900265071, "learning_rate": 5.286444940330121e-08, "loss": 1.1085, "step": 96845 }, { "epoch": 2.8556685832227626, "grad_norm": 3.7103316494519256, "learning_rate": 5.275687136917742e-08, "loss": 1.0818, "step": 96850 }, { "epoch": 2.855816010614772, "grad_norm": 3.5263531402038364, "learning_rate": 5.2649402130654084e-08, "loss": 1.045, "step": 96855 }, { "epoch": 2.8559634380067815, "grad_norm": 3.604998768056343, "learning_rate": 5.254204169089449e-08, "loss": 1.0547, "step": 96860 }, { "epoch": 2.856110865398791, "grad_norm": 3.5392068960581122, "learning_rate": 5.243479005305696e-08, "loss": 0.9557, "step": 96865 }, { "epoch": 2.8562582927908005, "grad_norm": 3.5508178264797765, "learning_rate": 5.2327647220297706e-08, "loss": 1.0222, "step": 96870 }, { "epoch": 2.85640572018281, "grad_norm": 3.50552433762563, "learning_rate": 5.222061319577004e-08, "loss": 1.0292, "step": 96875 }, { "epoch": 2.8565531475748194, "grad_norm": 3.593678255016418, "learning_rate": 5.211368798262312e-08, "loss": 1.0359, "step": 96880 }, { "epoch": 2.856700574966829, "grad_norm": 3.542013730682103, "learning_rate": 5.200687158400358e-08, "loss": 1.0458, "step": 96885 }, { "epoch": 2.8568480023588383, "grad_norm": 3.5589312095353463, "learning_rate": 5.1900164003054335e-08, "loss": 1.0391, "step": 96890 }, { "epoch": 2.8569954297508477, "grad_norm": 3.5411375603348643, "learning_rate": 5.17935652429162e-08, "loss": 1.032, "step": 96895 }, { "epoch": 2.857142857142857, "grad_norm": 3.601240744091345, "learning_rate": 5.168707530672459e-08, "loss": 1.0249, "step": 96900 }, { "epoch": 2.8572902845348667, "grad_norm": 3.560766490251561, "learning_rate": 5.1580694197614076e-08, "loss": 1.0521, "step": 96905 }, { "epoch": 2.857437711926876, "grad_norm": 3.635421388949629, "learning_rate": 5.147442191871507e-08, "loss": 1.078, "step": 96910 }, { "epoch": 2.8575851393188856, "grad_norm": 3.612444943586526, "learning_rate": 5.1368258473153825e-08, "loss": 0.9775, "step": 96915 }, { "epoch": 2.857732566710895, "grad_norm": 3.4718526275353767, "learning_rate": 5.126220386405575e-08, "loss": 1.0209, "step": 96920 }, { "epoch": 2.8578799941029045, "grad_norm": 3.601718041834748, "learning_rate": 5.115625809454003e-08, "loss": 1.0358, "step": 96925 }, { "epoch": 2.8580274214949135, "grad_norm": 3.5112293028620103, "learning_rate": 5.105042116772582e-08, "loss": 1.0662, "step": 96930 }, { "epoch": 2.8581748488869234, "grad_norm": 3.5484584550934533, "learning_rate": 5.094469308672606e-08, "loss": 1.052, "step": 96935 }, { "epoch": 2.8583222762789324, "grad_norm": 3.524525548116382, "learning_rate": 5.083907385465283e-08, "loss": 1.0041, "step": 96940 }, { "epoch": 2.8584697036709423, "grad_norm": 3.49148552732965, "learning_rate": 5.073356347461408e-08, "loss": 1.05, "step": 96945 }, { "epoch": 2.8586171310629513, "grad_norm": 3.661963822492569, "learning_rate": 5.062816194971398e-08, "loss": 1.0355, "step": 96950 }, { "epoch": 2.858764558454961, "grad_norm": 3.4974025095806534, "learning_rate": 5.052286928305464e-08, "loss": 1.0377, "step": 96955 }, { "epoch": 2.8589119858469703, "grad_norm": 3.582208189420298, "learning_rate": 5.0417685477733985e-08, "loss": 1.0538, "step": 96960 }, { "epoch": 2.8590594132389797, "grad_norm": 3.36599178923487, "learning_rate": 5.0312610536847884e-08, "loss": 1.0115, "step": 96965 }, { "epoch": 2.859206840630989, "grad_norm": 3.5229065423167585, "learning_rate": 5.020764446348761e-08, "loss": 1.0649, "step": 96970 }, { "epoch": 2.8593542680229986, "grad_norm": 3.394245851337207, "learning_rate": 5.010278726074194e-08, "loss": 1.0214, "step": 96975 }, { "epoch": 2.859501695415008, "grad_norm": 3.6154401500632933, "learning_rate": 4.9998038931696743e-08, "loss": 1.066, "step": 96980 }, { "epoch": 2.8596491228070176, "grad_norm": 3.319709511000112, "learning_rate": 4.989339947943455e-08, "loss": 1.0079, "step": 96985 }, { "epoch": 2.859796550199027, "grad_norm": 3.6567680950388244, "learning_rate": 4.978886890703374e-08, "loss": 1.0422, "step": 96990 }, { "epoch": 2.8599439775910365, "grad_norm": 3.561388065858607, "learning_rate": 4.968444721757101e-08, "loss": 1.0794, "step": 96995 }, { "epoch": 2.860091404983046, "grad_norm": 3.6780421721096364, "learning_rate": 4.9580134414118075e-08, "loss": 1.0924, "step": 97000 }, { "epoch": 2.860091404983046, "eval_loss": 1.0765845775604248, "eval_runtime": 4.1553, "eval_samples_per_second": 95.299, "eval_steps_per_second": 3.129, "step": 97000 }, { "epoch": 2.8602388323750554, "grad_norm": 3.5542717121270817, "learning_rate": 4.94759304997458e-08, "loss": 1.0431, "step": 97005 }, { "epoch": 2.860386259767065, "grad_norm": 3.3824152547644744, "learning_rate": 4.937183547751925e-08, "loss": 1.0228, "step": 97010 }, { "epoch": 2.8605336871590743, "grad_norm": 3.632857919747569, "learning_rate": 4.9267849350502634e-08, "loss": 1.0326, "step": 97015 }, { "epoch": 2.8606811145510838, "grad_norm": 3.2913729509014433, "learning_rate": 4.9163972121754746e-08, "loss": 1.0184, "step": 97020 }, { "epoch": 2.8608285419430928, "grad_norm": 3.6399227886280343, "learning_rate": 4.906020379433315e-08, "loss": 1.0318, "step": 97025 }, { "epoch": 2.8609759693351027, "grad_norm": 3.2409255110775463, "learning_rate": 4.895654437129124e-08, "loss": 1.0125, "step": 97030 }, { "epoch": 2.8611233967271117, "grad_norm": 3.565032357486966, "learning_rate": 4.885299385567907e-08, "loss": 1.0472, "step": 97035 }, { "epoch": 2.8612708241191216, "grad_norm": 3.585637357985042, "learning_rate": 4.874955225054378e-08, "loss": 1.0594, "step": 97040 }, { "epoch": 2.8614182515111306, "grad_norm": 3.603095244643887, "learning_rate": 4.864621955892962e-08, "loss": 1.0439, "step": 97045 }, { "epoch": 2.86156567890314, "grad_norm": 3.4851477168370533, "learning_rate": 4.854299578387664e-08, "loss": 1.003, "step": 97050 }, { "epoch": 2.8617131062951495, "grad_norm": 3.515305136362916, "learning_rate": 4.843988092842283e-08, "loss": 1.0448, "step": 97055 }, { "epoch": 2.861860533687159, "grad_norm": 3.46381071116528, "learning_rate": 4.833687499560202e-08, "loss": 1.0255, "step": 97060 }, { "epoch": 2.8620079610791684, "grad_norm": 3.590576126587871, "learning_rate": 4.823397798844553e-08, "loss": 1.0323, "step": 97065 }, { "epoch": 2.862155388471178, "grad_norm": 3.496484178671587, "learning_rate": 4.8131189909981776e-08, "loss": 1.0676, "step": 97070 }, { "epoch": 2.8623028158631874, "grad_norm": 3.7226189953826725, "learning_rate": 4.802851076323417e-08, "loss": 1.0496, "step": 97075 }, { "epoch": 2.862450243255197, "grad_norm": 3.441065910200551, "learning_rate": 4.792594055122529e-08, "loss": 1.056, "step": 97080 }, { "epoch": 2.8625976706472063, "grad_norm": 3.4043447321074534, "learning_rate": 4.782347927697231e-08, "loss": 1.0167, "step": 97085 }, { "epoch": 2.8627450980392157, "grad_norm": 3.7281714251693963, "learning_rate": 4.7721126943491565e-08, "loss": 1.0377, "step": 97090 }, { "epoch": 2.862892525431225, "grad_norm": 3.2830886872978846, "learning_rate": 4.761888355379398e-08, "loss": 0.9902, "step": 97095 }, { "epoch": 2.8630399528232346, "grad_norm": 3.5066700105556112, "learning_rate": 4.751674911088841e-08, "loss": 1.0315, "step": 97100 }, { "epoch": 2.863187380215244, "grad_norm": 3.663984960610243, "learning_rate": 4.741472361777993e-08, "loss": 1.0421, "step": 97105 }, { "epoch": 2.8633348076072536, "grad_norm": 3.395703299411107, "learning_rate": 4.731280707747157e-08, "loss": 0.9874, "step": 97110 }, { "epoch": 2.863482234999263, "grad_norm": 3.543993511419737, "learning_rate": 4.721099949296176e-08, "loss": 1.0365, "step": 97115 }, { "epoch": 2.863629662391272, "grad_norm": 3.4793310578347754, "learning_rate": 4.7109300867246434e-08, "loss": 1.0271, "step": 97120 }, { "epoch": 2.863777089783282, "grad_norm": 3.4014188627379967, "learning_rate": 4.7007711203318206e-08, "loss": 1.0463, "step": 97125 }, { "epoch": 2.863924517175291, "grad_norm": 3.614147291348273, "learning_rate": 4.690623050416634e-08, "loss": 1.0406, "step": 97130 }, { "epoch": 2.864071944567301, "grad_norm": 3.5671097315757225, "learning_rate": 4.680485877277721e-08, "loss": 1.0355, "step": 97135 }, { "epoch": 2.86421937195931, "grad_norm": 3.454143267639449, "learning_rate": 4.6703596012133836e-08, "loss": 1.0387, "step": 97140 }, { "epoch": 2.8643667993513193, "grad_norm": 3.6774121129864636, "learning_rate": 4.660244222521593e-08, "loss": 1.046, "step": 97145 }, { "epoch": 2.864514226743329, "grad_norm": 3.567684010124084, "learning_rate": 4.650139741499942e-08, "loss": 1.0591, "step": 97150 }, { "epoch": 2.8646616541353382, "grad_norm": 3.5296824823265296, "learning_rate": 4.640046158445904e-08, "loss": 1.0435, "step": 97155 }, { "epoch": 2.8648090815273477, "grad_norm": 3.480045313890414, "learning_rate": 4.629963473656365e-08, "loss": 1.0397, "step": 97160 }, { "epoch": 2.864956508919357, "grad_norm": 3.761813010620093, "learning_rate": 4.619891687428088e-08, "loss": 1.0545, "step": 97165 }, { "epoch": 2.8651039363113666, "grad_norm": 3.468855710133204, "learning_rate": 4.609830800057419e-08, "loss": 1.0446, "step": 97170 }, { "epoch": 2.865251363703376, "grad_norm": 3.576875081551451, "learning_rate": 4.599780811840412e-08, "loss": 1.0462, "step": 97175 }, { "epoch": 2.8653987910953855, "grad_norm": 3.3995551385323752, "learning_rate": 4.589741723072832e-08, "loss": 1.0039, "step": 97180 }, { "epoch": 2.865546218487395, "grad_norm": 3.5500284218725464, "learning_rate": 4.579713534050026e-08, "loss": 1.0191, "step": 97185 }, { "epoch": 2.8656936458794044, "grad_norm": 3.62248004626002, "learning_rate": 4.569696245067173e-08, "loss": 1.0513, "step": 97190 }, { "epoch": 2.865841073271414, "grad_norm": 3.5157397417060583, "learning_rate": 4.5596898564189557e-08, "loss": 1.0518, "step": 97195 }, { "epoch": 2.8659885006634234, "grad_norm": 3.62655331678833, "learning_rate": 4.549694368399887e-08, "loss": 1.0385, "step": 97200 }, { "epoch": 2.866135928055433, "grad_norm": 3.459551873843438, "learning_rate": 4.539709781304066e-08, "loss": 1.0189, "step": 97205 }, { "epoch": 2.8662833554474423, "grad_norm": 3.393450771805869, "learning_rate": 4.529736095425299e-08, "loss": 0.995, "step": 97210 }, { "epoch": 2.8664307828394513, "grad_norm": 3.6003808828787265, "learning_rate": 4.5197733110571016e-08, "loss": 1.0716, "step": 97215 }, { "epoch": 2.866578210231461, "grad_norm": 3.393759278421118, "learning_rate": 4.5098214284925724e-08, "loss": 1.0086, "step": 97220 }, { "epoch": 2.86672563762347, "grad_norm": 3.535838748683288, "learning_rate": 4.499880448024643e-08, "loss": 0.9947, "step": 97225 }, { "epoch": 2.86687306501548, "grad_norm": 3.603026820711777, "learning_rate": 4.4899503699457886e-08, "loss": 1.0615, "step": 97230 }, { "epoch": 2.867020492407489, "grad_norm": 3.536681278638987, "learning_rate": 4.4800311945481926e-08, "loss": 1.0715, "step": 97235 }, { "epoch": 2.8671679197994986, "grad_norm": 3.545282093357031, "learning_rate": 4.470122922123829e-08, "loss": 1.0489, "step": 97240 }, { "epoch": 2.867315347191508, "grad_norm": 3.5524157296956775, "learning_rate": 4.460225552964173e-08, "loss": 1.0355, "step": 97245 }, { "epoch": 2.8674627745835175, "grad_norm": 3.6148010198398697, "learning_rate": 4.45033908736045e-08, "loss": 1.0389, "step": 97250 }, { "epoch": 2.867610201975527, "grad_norm": 3.502396003255564, "learning_rate": 4.440463525603719e-08, "loss": 1.0503, "step": 97255 }, { "epoch": 2.8677576293675364, "grad_norm": 3.7029859086181434, "learning_rate": 4.430598867984373e-08, "loss": 1.0582, "step": 97260 }, { "epoch": 2.867905056759546, "grad_norm": 3.4011883470026785, "learning_rate": 4.420745114792887e-08, "loss": 0.9934, "step": 97265 }, { "epoch": 2.8680524841515553, "grad_norm": 3.5419654094502695, "learning_rate": 4.410902266319072e-08, "loss": 1.036, "step": 97270 }, { "epoch": 2.868199911543565, "grad_norm": 3.5655126776634223, "learning_rate": 4.401070322852654e-08, "loss": 1.0157, "step": 97275 }, { "epoch": 2.8683473389355743, "grad_norm": 3.515598183529393, "learning_rate": 4.3912492846829025e-08, "loss": 1.0499, "step": 97280 }, { "epoch": 2.8684947663275837, "grad_norm": 3.5461552869272364, "learning_rate": 4.3814391520988764e-08, "loss": 1.0359, "step": 97285 }, { "epoch": 2.868642193719593, "grad_norm": 3.5364225764876194, "learning_rate": 4.3716399253891373e-08, "loss": 1.0477, "step": 97290 }, { "epoch": 2.8687896211116026, "grad_norm": 3.6866749364272535, "learning_rate": 4.3618516048421624e-08, "loss": 1.0795, "step": 97295 }, { "epoch": 2.868937048503612, "grad_norm": 3.5243988850764945, "learning_rate": 4.352074190745889e-08, "loss": 1.0667, "step": 97300 }, { "epoch": 2.8690844758956215, "grad_norm": 3.4645365004430384, "learning_rate": 4.3423076833881274e-08, "loss": 1.0351, "step": 97305 }, { "epoch": 2.869231903287631, "grad_norm": 3.728555497473986, "learning_rate": 4.33255208305619e-08, "loss": 1.0817, "step": 97310 }, { "epoch": 2.8693793306796405, "grad_norm": 3.4952404313887757, "learning_rate": 4.3228073900371395e-08, "loss": 1.0278, "step": 97315 }, { "epoch": 2.8695267580716495, "grad_norm": 3.5010985824393286, "learning_rate": 4.3130736046177456e-08, "loss": 1.066, "step": 97320 }, { "epoch": 2.8696741854636594, "grad_norm": 3.4863040387377637, "learning_rate": 4.3033507270844454e-08, "loss": 1.0374, "step": 97325 }, { "epoch": 2.8698216128556684, "grad_norm": 3.569516399002016, "learning_rate": 4.2936387577233865e-08, "loss": 1.0488, "step": 97330 }, { "epoch": 2.8699690402476783, "grad_norm": 3.4299983704167434, "learning_rate": 4.283937696820214e-08, "loss": 1.0508, "step": 97335 }, { "epoch": 2.8701164676396873, "grad_norm": 3.580726718472105, "learning_rate": 4.2742475446605754e-08, "loss": 1.0771, "step": 97340 }, { "epoch": 2.8702638950316968, "grad_norm": 3.6554257908786862, "learning_rate": 4.2645683015294925e-08, "loss": 1.0539, "step": 97345 }, { "epoch": 2.8704113224237062, "grad_norm": 3.4492025403108646, "learning_rate": 4.25489996771182e-08, "loss": 1.0183, "step": 97350 }, { "epoch": 2.8705587498157157, "grad_norm": 3.6471140498364796, "learning_rate": 4.245242543492081e-08, "loss": 1.0381, "step": 97355 }, { "epoch": 2.870706177207725, "grad_norm": 3.598572318496143, "learning_rate": 4.2355960291544233e-08, "loss": 1.0163, "step": 97360 }, { "epoch": 2.8708536045997346, "grad_norm": 3.4361156639290127, "learning_rate": 4.2259604249827446e-08, "loss": 1.0123, "step": 97365 }, { "epoch": 2.871001031991744, "grad_norm": 3.516726049766629, "learning_rate": 4.216335731260526e-08, "loss": 1.0149, "step": 97370 }, { "epoch": 2.8711484593837535, "grad_norm": 3.6040525560412924, "learning_rate": 4.2067219482710414e-08, "loss": 1.0291, "step": 97375 }, { "epoch": 2.871295886775763, "grad_norm": 3.749087845921328, "learning_rate": 4.1971190762971485e-08, "loss": 1.0397, "step": 97380 }, { "epoch": 2.8714433141677724, "grad_norm": 3.577847232353189, "learning_rate": 4.1875271156214534e-08, "loss": 1.0444, "step": 97385 }, { "epoch": 2.871590741559782, "grad_norm": 3.4115951516473144, "learning_rate": 4.1779460665261894e-08, "loss": 0.9936, "step": 97390 }, { "epoch": 2.8717381689517913, "grad_norm": 3.629424651372385, "learning_rate": 4.168375929293297e-08, "loss": 1.0639, "step": 97395 }, { "epoch": 2.871885596343801, "grad_norm": 3.4398430987580833, "learning_rate": 4.158816704204385e-08, "loss": 1.0678, "step": 97400 }, { "epoch": 2.8720330237358103, "grad_norm": 3.518190849412452, "learning_rate": 4.14926839154077e-08, "loss": 0.9789, "step": 97405 }, { "epoch": 2.8721804511278197, "grad_norm": 3.5034166759455325, "learning_rate": 4.1397309915833515e-08, "loss": 1.0705, "step": 97410 }, { "epoch": 2.8723278785198287, "grad_norm": 3.3919721984583138, "learning_rate": 4.1302045046129056e-08, "loss": 1.062, "step": 97415 }, { "epoch": 2.8724753059118386, "grad_norm": 3.617087634549347, "learning_rate": 4.120688930909583e-08, "loss": 1.0165, "step": 97420 }, { "epoch": 2.8726227333038477, "grad_norm": 3.5769871078276716, "learning_rate": 4.111184270753576e-08, "loss": 1.0418, "step": 97425 }, { "epoch": 2.8727701606958576, "grad_norm": 3.594849956505351, "learning_rate": 4.1016905244244116e-08, "loss": 1.0118, "step": 97430 }, { "epoch": 2.8729175880878666, "grad_norm": 3.521903808389763, "learning_rate": 4.0922076922015743e-08, "loss": 1.0543, "step": 97435 }, { "epoch": 2.873065015479876, "grad_norm": 3.634902106158661, "learning_rate": 4.082735774364049e-08, "loss": 1.0761, "step": 97440 }, { "epoch": 2.8732124428718855, "grad_norm": 3.5809047210907816, "learning_rate": 4.07327477119053e-08, "loss": 1.0237, "step": 97445 }, { "epoch": 2.873359870263895, "grad_norm": 3.559113116888776, "learning_rate": 4.063824682959502e-08, "loss": 1.0419, "step": 97450 }, { "epoch": 2.8735072976559044, "grad_norm": 3.5552719121898435, "learning_rate": 4.0543855099489935e-08, "loss": 1.062, "step": 97455 }, { "epoch": 2.873654725047914, "grad_norm": 3.5925376269352722, "learning_rate": 4.0449572524367403e-08, "loss": 1.0275, "step": 97460 }, { "epoch": 2.8738021524399233, "grad_norm": 3.6488433637570417, "learning_rate": 4.035539910700228e-08, "loss": 1.042, "step": 97465 }, { "epoch": 2.8739495798319328, "grad_norm": 3.3775747124455955, "learning_rate": 4.026133485016528e-08, "loss": 1.0207, "step": 97470 }, { "epoch": 2.8740970072239422, "grad_norm": 3.544222503895964, "learning_rate": 4.01673797566246e-08, "loss": 1.0047, "step": 97475 }, { "epoch": 2.8742444346159517, "grad_norm": 3.7815132096450603, "learning_rate": 4.007353382914511e-08, "loss": 1.0015, "step": 97480 }, { "epoch": 2.874391862007961, "grad_norm": 3.5723204712550993, "learning_rate": 3.997979707048835e-08, "loss": 1.0047, "step": 97485 }, { "epoch": 2.8745392893999706, "grad_norm": 3.5901271583595165, "learning_rate": 3.9886169483412114e-08, "loss": 1.0258, "step": 97490 }, { "epoch": 2.87468671679198, "grad_norm": 3.6193062005844134, "learning_rate": 3.9792651070671707e-08, "loss": 1.0471, "step": 97495 }, { "epoch": 2.8748341441839895, "grad_norm": 3.5706835980501452, "learning_rate": 3.9699241835019913e-08, "loss": 1.0757, "step": 97500 }, { "epoch": 2.8748341441839895, "eval_loss": 1.076522946357727, "eval_runtime": 4.2722, "eval_samples_per_second": 92.693, "eval_steps_per_second": 3.043, "step": 97500 }, { "epoch": 2.874981571575999, "grad_norm": 3.482474464264781, "learning_rate": 3.9605941779204126e-08, "loss": 1.0422, "step": 97505 }, { "epoch": 2.875128998968008, "grad_norm": 3.4728828523906183, "learning_rate": 3.9512750905970066e-08, "loss": 1.0222, "step": 97510 }, { "epoch": 2.875276426360018, "grad_norm": 3.4614694604583645, "learning_rate": 3.941966921806095e-08, "loss": 1.0162, "step": 97515 }, { "epoch": 2.875423853752027, "grad_norm": 3.616061505097192, "learning_rate": 3.932669671821418e-08, "loss": 1.0052, "step": 97520 }, { "epoch": 2.875571281144037, "grad_norm": 3.4936428913339355, "learning_rate": 3.923383340916756e-08, "loss": 1.0472, "step": 97525 }, { "epoch": 2.875718708536046, "grad_norm": 3.5065782695368277, "learning_rate": 3.914107929365182e-08, "loss": 1.043, "step": 97530 }, { "epoch": 2.8758661359280553, "grad_norm": 3.7308145740356395, "learning_rate": 3.90484343743977e-08, "loss": 1.0541, "step": 97535 }, { "epoch": 2.8760135633200647, "grad_norm": 3.425868423483316, "learning_rate": 3.895589865413052e-08, "loss": 1.0063, "step": 97540 }, { "epoch": 2.876160990712074, "grad_norm": 3.558695629929218, "learning_rate": 3.8863472135573506e-08, "loss": 1.0471, "step": 97545 }, { "epoch": 2.8763084181040837, "grad_norm": 3.4652172695272854, "learning_rate": 3.877115482144658e-08, "loss": 1.0095, "step": 97550 }, { "epoch": 2.876455845496093, "grad_norm": 3.5109018407563926, "learning_rate": 3.867894671446631e-08, "loss": 1.0009, "step": 97555 }, { "epoch": 2.8766032728881026, "grad_norm": 3.31084474440316, "learning_rate": 3.858684781734595e-08, "loss": 1.0025, "step": 97560 }, { "epoch": 2.876750700280112, "grad_norm": 3.4065443682984062, "learning_rate": 3.849485813279582e-08, "loss": 0.9982, "step": 97565 }, { "epoch": 2.8768981276721215, "grad_norm": 3.313424170729594, "learning_rate": 3.840297766352211e-08, "loss": 1.0173, "step": 97570 }, { "epoch": 2.877045555064131, "grad_norm": 3.6342994517058522, "learning_rate": 3.8311206412229294e-08, "loss": 1.0692, "step": 97575 }, { "epoch": 2.8771929824561404, "grad_norm": 3.4206817501087787, "learning_rate": 3.8219544381617315e-08, "loss": 1.0537, "step": 97580 }, { "epoch": 2.87734040984815, "grad_norm": 3.625669507912686, "learning_rate": 3.81279915743836e-08, "loss": 1.0562, "step": 97585 }, { "epoch": 2.8774878372401593, "grad_norm": 3.512986500399101, "learning_rate": 3.803654799322265e-08, "loss": 1.0345, "step": 97590 }, { "epoch": 2.877635264632169, "grad_norm": 3.5486698562055996, "learning_rate": 3.79452136408244e-08, "loss": 1.0062, "step": 97595 }, { "epoch": 2.8777826920241782, "grad_norm": 3.451730373390389, "learning_rate": 3.7853988519877535e-08, "loss": 1.03, "step": 97600 }, { "epoch": 2.8779301194161873, "grad_norm": 3.387324562047251, "learning_rate": 3.776287263306533e-08, "loss": 1.0399, "step": 97605 }, { "epoch": 2.878077546808197, "grad_norm": 3.5645441831142857, "learning_rate": 3.7671865983069795e-08, "loss": 1.0355, "step": 97610 }, { "epoch": 2.878224974200206, "grad_norm": 3.378893580797717, "learning_rate": 3.758096857256879e-08, "loss": 1.0476, "step": 97615 }, { "epoch": 2.878372401592216, "grad_norm": 3.51423635206079, "learning_rate": 3.7490180404236433e-08, "loss": 1.0344, "step": 97620 }, { "epoch": 2.878519828984225, "grad_norm": 3.5848427962314395, "learning_rate": 3.739950148074517e-08, "loss": 1.0752, "step": 97625 }, { "epoch": 2.8786672563762346, "grad_norm": 3.504797623042029, "learning_rate": 3.730893180476286e-08, "loss": 1.0683, "step": 97630 }, { "epoch": 2.878814683768244, "grad_norm": 3.5439448912117353, "learning_rate": 3.721847137895446e-08, "loss": 1.0411, "step": 97635 }, { "epoch": 2.8789621111602535, "grad_norm": 3.5581633923871117, "learning_rate": 3.712812020598241e-08, "loss": 1.0321, "step": 97640 }, { "epoch": 2.879109538552263, "grad_norm": 3.496901784680091, "learning_rate": 3.7037878288505025e-08, "loss": 1.0339, "step": 97645 }, { "epoch": 2.8792569659442724, "grad_norm": 3.549685559642552, "learning_rate": 3.694774562917766e-08, "loss": 1.0247, "step": 97650 }, { "epoch": 2.879404393336282, "grad_norm": 3.5004869177188502, "learning_rate": 3.6857722230652785e-08, "loss": 1.0772, "step": 97655 }, { "epoch": 2.8795518207282913, "grad_norm": 3.336526112046132, "learning_rate": 3.676780809557911e-08, "loss": 1.0496, "step": 97660 }, { "epoch": 2.8796992481203008, "grad_norm": 3.6441263678923352, "learning_rate": 3.667800322660328e-08, "loss": 1.0588, "step": 97665 }, { "epoch": 2.87984667551231, "grad_norm": 3.4769422367114737, "learning_rate": 3.658830762636692e-08, "loss": 1.0132, "step": 97670 }, { "epoch": 2.8799941029043197, "grad_norm": 3.6358043961945463, "learning_rate": 3.649872129751042e-08, "loss": 1.0192, "step": 97675 }, { "epoch": 2.880141530296329, "grad_norm": 3.5316148560759086, "learning_rate": 3.6409244242668756e-08, "loss": 1.058, "step": 97680 }, { "epoch": 2.8802889576883386, "grad_norm": 3.759955973492272, "learning_rate": 3.631987646447607e-08, "loss": 1.0507, "step": 97685 }, { "epoch": 2.880436385080348, "grad_norm": 3.5208224176277576, "learning_rate": 3.623061796556151e-08, "loss": 1.0074, "step": 97690 }, { "epoch": 2.8805838124723575, "grad_norm": 3.589327109658889, "learning_rate": 3.614146874855173e-08, "loss": 1.0371, "step": 97695 }, { "epoch": 2.8807312398643665, "grad_norm": 3.5424327089790433, "learning_rate": 3.605242881607004e-08, "loss": 1.0096, "step": 97700 }, { "epoch": 2.8808786672563764, "grad_norm": 3.492151231484006, "learning_rate": 3.5963498170736017e-08, "loss": 0.997, "step": 97705 }, { "epoch": 2.8810260946483854, "grad_norm": 3.5189286677800857, "learning_rate": 3.5874676815167566e-08, "loss": 1.0528, "step": 97710 }, { "epoch": 2.8811735220403953, "grad_norm": 3.5690307832311334, "learning_rate": 3.5785964751977595e-08, "loss": 1.0441, "step": 97715 }, { "epoch": 2.8813209494324044, "grad_norm": 3.6052938499868823, "learning_rate": 3.569736198377693e-08, "loss": 1.0486, "step": 97720 }, { "epoch": 2.881468376824414, "grad_norm": 3.5183079790654617, "learning_rate": 3.560886851317266e-08, "loss": 1.0841, "step": 97725 }, { "epoch": 2.8816158042164233, "grad_norm": 3.6021092285257326, "learning_rate": 3.552048434276895e-08, "loss": 1.0697, "step": 97730 }, { "epoch": 2.8817632316084327, "grad_norm": 3.4699106756549285, "learning_rate": 3.5432209475166206e-08, "loss": 1.0764, "step": 97735 }, { "epoch": 2.881910659000442, "grad_norm": 3.666152720041607, "learning_rate": 3.5344043912962365e-08, "loss": 1.0218, "step": 97740 }, { "epoch": 2.8820580863924516, "grad_norm": 3.501510288133519, "learning_rate": 3.52559876587516e-08, "loss": 0.9838, "step": 97745 }, { "epoch": 2.882205513784461, "grad_norm": 3.410792917956987, "learning_rate": 3.5168040715125586e-08, "loss": 1.0126, "step": 97750 }, { "epoch": 2.8823529411764706, "grad_norm": 3.5044547286954346, "learning_rate": 3.5080203084671414e-08, "loss": 1.0674, "step": 97755 }, { "epoch": 2.88250036856848, "grad_norm": 3.5787617779961116, "learning_rate": 3.499247476997453e-08, "loss": 1.0799, "step": 97760 }, { "epoch": 2.8826477959604895, "grad_norm": 3.569519008902345, "learning_rate": 3.4904855773616185e-08, "loss": 1.0426, "step": 97765 }, { "epoch": 2.882795223352499, "grad_norm": 3.5792668044484617, "learning_rate": 3.481734609817474e-08, "loss": 1.035, "step": 97770 }, { "epoch": 2.8829426507445084, "grad_norm": 3.580277787399672, "learning_rate": 3.472994574622523e-08, "loss": 1.0098, "step": 97775 }, { "epoch": 2.883090078136518, "grad_norm": 3.3918887919714735, "learning_rate": 3.4642654720338924e-08, "loss": 1.0225, "step": 97780 }, { "epoch": 2.8832375055285273, "grad_norm": 3.6291279019889124, "learning_rate": 3.455547302308543e-08, "loss": 1.0849, "step": 97785 }, { "epoch": 2.8833849329205368, "grad_norm": 3.4546905505143988, "learning_rate": 3.446840065702936e-08, "loss": 1.0382, "step": 97790 }, { "epoch": 2.8835323603125462, "grad_norm": 3.4184612147257987, "learning_rate": 3.438143762473367e-08, "loss": 1.0456, "step": 97795 }, { "epoch": 2.8836797877045557, "grad_norm": 3.392683288943115, "learning_rate": 3.429458392875715e-08, "loss": 0.9849, "step": 97800 }, { "epoch": 2.8838272150965647, "grad_norm": 3.4926992013168627, "learning_rate": 3.420783957165482e-08, "loss": 1.0595, "step": 97805 }, { "epoch": 2.8839746424885746, "grad_norm": 3.608465753524756, "learning_rate": 3.412120455598008e-08, "loss": 1.0392, "step": 97810 }, { "epoch": 2.8841220698805836, "grad_norm": 3.50419363447903, "learning_rate": 3.403467888428213e-08, "loss": 1.0716, "step": 97815 }, { "epoch": 2.8842694972725935, "grad_norm": 3.556880924837343, "learning_rate": 3.3948262559106435e-08, "loss": 1.0263, "step": 97820 }, { "epoch": 2.8844169246646025, "grad_norm": 3.4215646067154073, "learning_rate": 3.3861955582997205e-08, "loss": 1.0476, "step": 97825 }, { "epoch": 2.884564352056612, "grad_norm": 3.4719543943971836, "learning_rate": 3.377575795849283e-08, "loss": 1.0195, "step": 97830 }, { "epoch": 2.8847117794486214, "grad_norm": 3.3992491342044437, "learning_rate": 3.368966968813003e-08, "loss": 1.0483, "step": 97835 }, { "epoch": 2.884859206840631, "grad_norm": 3.5731387788945024, "learning_rate": 3.3603690774442616e-08, "loss": 1.0263, "step": 97840 }, { "epoch": 2.8850066342326404, "grad_norm": 3.584351370252624, "learning_rate": 3.351782121996022e-08, "loss": 1.0377, "step": 97845 }, { "epoch": 2.88515406162465, "grad_norm": 3.6396257822020788, "learning_rate": 3.343206102721e-08, "loss": 1.0466, "step": 97850 }, { "epoch": 2.8853014890166593, "grad_norm": 3.51602186957383, "learning_rate": 3.334641019871451e-08, "loss": 1.0193, "step": 97855 }, { "epoch": 2.8854489164086687, "grad_norm": 3.5079165164677657, "learning_rate": 3.326086873699591e-08, "loss": 0.9942, "step": 97860 }, { "epoch": 2.885596343800678, "grad_norm": 3.3487462572255002, "learning_rate": 3.317543664456926e-08, "loss": 1.044, "step": 97865 }, { "epoch": 2.8857437711926877, "grad_norm": 3.6575510526345614, "learning_rate": 3.3090113923950474e-08, "loss": 1.0443, "step": 97870 }, { "epoch": 2.885891198584697, "grad_norm": 3.493482465215264, "learning_rate": 3.3004900577648794e-08, "loss": 1.0564, "step": 97875 }, { "epoch": 2.8860386259767066, "grad_norm": 3.4083667471322032, "learning_rate": 3.291979660817221e-08, "loss": 1.0635, "step": 97880 }, { "epoch": 2.886186053368716, "grad_norm": 3.776881613988088, "learning_rate": 3.283480201802538e-08, "loss": 1.0718, "step": 97885 }, { "epoch": 2.8863334807607255, "grad_norm": 3.1926372979942044, "learning_rate": 3.2749916809708816e-08, "loss": 1.0037, "step": 97890 }, { "epoch": 2.886480908152735, "grad_norm": 3.429384738639117, "learning_rate": 3.2665140985720934e-08, "loss": 1.0499, "step": 97895 }, { "epoch": 2.886628335544744, "grad_norm": 3.4573457853885063, "learning_rate": 3.258047454855598e-08, "loss": 1.0255, "step": 97900 }, { "epoch": 2.886775762936754, "grad_norm": 3.5926594075627656, "learning_rate": 3.249591750070488e-08, "loss": 1.0446, "step": 97905 }, { "epoch": 2.886923190328763, "grad_norm": 3.5680624554176474, "learning_rate": 3.24114698446569e-08, "loss": 1.0278, "step": 97910 }, { "epoch": 2.887070617720773, "grad_norm": 3.2881557530601566, "learning_rate": 3.23271315828963e-08, "loss": 1.0329, "step": 97915 }, { "epoch": 2.887218045112782, "grad_norm": 3.733237499672491, "learning_rate": 3.224290271790484e-08, "loss": 1.04, "step": 97920 }, { "epoch": 2.8873654725047913, "grad_norm": 3.726200057055563, "learning_rate": 3.2158783252161375e-08, "loss": 1.0751, "step": 97925 }, { "epoch": 2.8875128998968007, "grad_norm": 3.5671990441671366, "learning_rate": 3.2074773188141004e-08, "loss": 1.0741, "step": 97930 }, { "epoch": 2.88766032728881, "grad_norm": 3.438959230593618, "learning_rate": 3.199087252831592e-08, "loss": 1.0195, "step": 97935 }, { "epoch": 2.8878077546808196, "grad_norm": 3.4914708523631504, "learning_rate": 3.190708127515457e-08, "loss": 1.043, "step": 97940 }, { "epoch": 2.887955182072829, "grad_norm": 3.61292409179849, "learning_rate": 3.182339943112372e-08, "loss": 0.983, "step": 97945 }, { "epoch": 2.8881026094648385, "grad_norm": 3.4193924805476827, "learning_rate": 3.173982699868433e-08, "loss": 1.0407, "step": 97950 }, { "epoch": 2.888250036856848, "grad_norm": 3.356076812659167, "learning_rate": 3.1656363980296504e-08, "loss": 1.0135, "step": 97955 }, { "epoch": 2.8883974642488575, "grad_norm": 3.5308902338843975, "learning_rate": 3.157301037841662e-08, "loss": 1.0168, "step": 97960 }, { "epoch": 2.888544891640867, "grad_norm": 3.6384874450131472, "learning_rate": 3.148976619549604e-08, "loss": 1.0195, "step": 97965 }, { "epoch": 2.8886923190328764, "grad_norm": 3.403056310711826, "learning_rate": 3.1406631433985724e-08, "loss": 0.9815, "step": 97970 }, { "epoch": 2.888839746424886, "grad_norm": 3.5757444477173235, "learning_rate": 3.1323606096331644e-08, "loss": 1.0475, "step": 97975 }, { "epoch": 2.8889871738168953, "grad_norm": 3.597382438557592, "learning_rate": 3.1240690184976414e-08, "loss": 1.0606, "step": 97980 }, { "epoch": 2.8891346012089048, "grad_norm": 3.5343894939577183, "learning_rate": 3.115788370236017e-08, "loss": 1.0192, "step": 97985 }, { "epoch": 2.889282028600914, "grad_norm": 3.549850846761757, "learning_rate": 3.107518665091971e-08, "loss": 1.06, "step": 97990 }, { "epoch": 2.8894294559929232, "grad_norm": 3.500516678126157, "learning_rate": 3.0992599033088514e-08, "loss": 1.0352, "step": 97995 }, { "epoch": 2.889576883384933, "grad_norm": 3.563259656496731, "learning_rate": 3.0910120851296715e-08, "loss": 1.0703, "step": 98000 }, { "epoch": 2.889576883384933, "eval_loss": 1.076637625694275, "eval_runtime": 4.2019, "eval_samples_per_second": 94.243, "eval_steps_per_second": 3.094, "step": 98000 }, { "epoch": 2.889724310776942, "grad_norm": 3.622944350199053, "learning_rate": 3.082775210797154e-08, "loss": 1.0466, "step": 98005 }, { "epoch": 2.889871738168952, "grad_norm": 3.6899603661207636, "learning_rate": 3.074549280553646e-08, "loss": 1.0697, "step": 98010 }, { "epoch": 2.890019165560961, "grad_norm": 3.4221120603853366, "learning_rate": 3.0663342946412045e-08, "loss": 1.0295, "step": 98015 }, { "epoch": 2.8901665929529705, "grad_norm": 3.51520829641303, "learning_rate": 3.058130253301594e-08, "loss": 1.0139, "step": 98020 }, { "epoch": 2.89031402034498, "grad_norm": 3.521479145324692, "learning_rate": 3.0499371567762475e-08, "loss": 1.037, "step": 98025 }, { "epoch": 2.8904614477369894, "grad_norm": 3.2346182982495146, "learning_rate": 3.0417550053061794e-08, "loss": 1.0262, "step": 98030 }, { "epoch": 2.890608875128999, "grad_norm": 3.446539408129569, "learning_rate": 3.033583799132239e-08, "loss": 1.0566, "step": 98035 }, { "epoch": 2.8907563025210083, "grad_norm": 3.6788353406450405, "learning_rate": 3.025423538494776e-08, "loss": 1.0447, "step": 98040 }, { "epoch": 2.890903729913018, "grad_norm": 3.7036178635162265, "learning_rate": 3.0172742236340144e-08, "loss": 1.0468, "step": 98045 }, { "epoch": 2.8910511573050273, "grad_norm": 3.510552462047157, "learning_rate": 3.009135854789721e-08, "loss": 0.9984, "step": 98050 }, { "epoch": 2.8911985846970367, "grad_norm": 3.5083679372840613, "learning_rate": 3.001008432201371e-08, "loss": 1.0192, "step": 98055 }, { "epoch": 2.891346012089046, "grad_norm": 3.4958375864150404, "learning_rate": 2.992891956108107e-08, "loss": 0.9822, "step": 98060 }, { "epoch": 2.8914934394810556, "grad_norm": 3.5482279808474773, "learning_rate": 2.98478642674882e-08, "loss": 1.0416, "step": 98065 }, { "epoch": 2.891640866873065, "grad_norm": 3.572857245899929, "learning_rate": 2.9766918443619866e-08, "loss": 1.0412, "step": 98070 }, { "epoch": 2.8917882942650746, "grad_norm": 3.3619611067087596, "learning_rate": 2.96860820918575e-08, "loss": 1.0311, "step": 98075 }, { "epoch": 2.891935721657084, "grad_norm": 3.534040885540675, "learning_rate": 2.9605355214580855e-08, "loss": 1.0288, "step": 98080 }, { "epoch": 2.8920831490490935, "grad_norm": 3.5706088354470635, "learning_rate": 2.9524737814164704e-08, "loss": 0.9862, "step": 98085 }, { "epoch": 2.8922305764411025, "grad_norm": 3.232161122238837, "learning_rate": 2.9444229892981734e-08, "loss": 0.9942, "step": 98090 }, { "epoch": 2.8923780038331124, "grad_norm": 3.5136425654962693, "learning_rate": 2.9363831453400464e-08, "loss": 1.0413, "step": 98095 }, { "epoch": 2.8925254312251214, "grad_norm": 3.396296677727133, "learning_rate": 2.928354249778692e-08, "loss": 1.0408, "step": 98100 }, { "epoch": 2.8926728586171313, "grad_norm": 3.5655239589652385, "learning_rate": 2.9203363028503794e-08, "loss": 1.064, "step": 98105 }, { "epoch": 2.8928202860091403, "grad_norm": 3.498385235592681, "learning_rate": 2.912329304791045e-08, "loss": 1.0458, "step": 98110 }, { "epoch": 2.8929677134011498, "grad_norm": 3.396195387686295, "learning_rate": 2.904333255836292e-08, "loss": 1.0002, "step": 98115 }, { "epoch": 2.8931151407931592, "grad_norm": 3.363715742986352, "learning_rate": 2.8963481562214317e-08, "loss": 1.0174, "step": 98120 }, { "epoch": 2.8932625681851687, "grad_norm": 3.6134099938618043, "learning_rate": 2.8883740061814018e-08, "loss": 1.0148, "step": 98125 }, { "epoch": 2.893409995577178, "grad_norm": 3.5665808291621293, "learning_rate": 2.880410805950931e-08, "loss": 1.0383, "step": 98130 }, { "epoch": 2.8935574229691876, "grad_norm": 3.497418891311571, "learning_rate": 2.8724585557642484e-08, "loss": 1.0324, "step": 98135 }, { "epoch": 2.893704850361197, "grad_norm": 3.613563140149556, "learning_rate": 2.8645172558553756e-08, "loss": 1.0484, "step": 98140 }, { "epoch": 2.8938522777532065, "grad_norm": 3.542241565278948, "learning_rate": 2.856586906458042e-08, "loss": 0.9934, "step": 98145 }, { "epoch": 2.893999705145216, "grad_norm": 3.4840294499764517, "learning_rate": 2.8486675078055606e-08, "loss": 1.0491, "step": 98150 }, { "epoch": 2.8941471325372254, "grad_norm": 3.535183155696757, "learning_rate": 2.8407590601310374e-08, "loss": 1.0081, "step": 98155 }, { "epoch": 2.894294559929235, "grad_norm": 3.560842274523809, "learning_rate": 2.832861563667119e-08, "loss": 1.0205, "step": 98160 }, { "epoch": 2.8944419873212444, "grad_norm": 3.596113151845254, "learning_rate": 2.8249750186462033e-08, "loss": 1.0314, "step": 98165 }, { "epoch": 2.894589414713254, "grad_norm": 3.5266315238503676, "learning_rate": 2.8170994253003956e-08, "loss": 1.0255, "step": 98170 }, { "epoch": 2.8947368421052633, "grad_norm": 3.518625014181879, "learning_rate": 2.809234783861428e-08, "loss": 1.0482, "step": 98175 }, { "epoch": 2.8948842694972727, "grad_norm": 3.4692435946257754, "learning_rate": 2.8013810945606983e-08, "loss": 1.0463, "step": 98180 }, { "epoch": 2.895031696889282, "grad_norm": 3.69793778728439, "learning_rate": 2.7935383576293548e-08, "loss": 1.0687, "step": 98185 }, { "epoch": 2.8951791242812916, "grad_norm": 3.547587495473158, "learning_rate": 2.78570657329813e-08, "loss": 1.0469, "step": 98190 }, { "epoch": 2.8953265516733007, "grad_norm": 3.4464204924360184, "learning_rate": 2.7778857417975896e-08, "loss": 1.03, "step": 98195 }, { "epoch": 2.8954739790653106, "grad_norm": 3.268754162408651, "learning_rate": 2.7700758633577157e-08, "loss": 1.0197, "step": 98200 }, { "epoch": 2.8956214064573196, "grad_norm": 3.594111074286689, "learning_rate": 2.7622769382084497e-08, "loss": 1.0623, "step": 98205 }, { "epoch": 2.8957688338493295, "grad_norm": 3.6073338171455234, "learning_rate": 2.7544889665791912e-08, "loss": 1.0813, "step": 98210 }, { "epoch": 2.8959162612413385, "grad_norm": 3.436131982819762, "learning_rate": 2.7467119486991738e-08, "loss": 1.0481, "step": 98215 }, { "epoch": 2.896063688633348, "grad_norm": 3.530649112327108, "learning_rate": 2.738945884797256e-08, "loss": 1.0137, "step": 98220 }, { "epoch": 2.8962111160253574, "grad_norm": 3.7032333537047397, "learning_rate": 2.7311907751018798e-08, "loss": 1.068, "step": 98225 }, { "epoch": 2.896358543417367, "grad_norm": 3.5085968751381724, "learning_rate": 2.7234466198413634e-08, "loss": 1.0417, "step": 98230 }, { "epoch": 2.8965059708093763, "grad_norm": 3.48356836425899, "learning_rate": 2.7157134192434817e-08, "loss": 1.0294, "step": 98235 }, { "epoch": 2.896653398201386, "grad_norm": 3.445877852298788, "learning_rate": 2.7079911735358452e-08, "loss": 1.0117, "step": 98240 }, { "epoch": 2.8968008255933952, "grad_norm": 3.527485936508387, "learning_rate": 2.7002798829456887e-08, "loss": 1.0545, "step": 98245 }, { "epoch": 2.8969482529854047, "grad_norm": 3.4485331943891255, "learning_rate": 2.6925795476999136e-08, "loss": 1.056, "step": 98250 }, { "epoch": 2.897095680377414, "grad_norm": 3.4626817325328996, "learning_rate": 2.684890168025131e-08, "loss": 1.0883, "step": 98255 }, { "epoch": 2.8972431077694236, "grad_norm": 3.491173926537932, "learning_rate": 2.6772117441475757e-08, "loss": 1.047, "step": 98260 }, { "epoch": 2.897390535161433, "grad_norm": 3.5061796353477535, "learning_rate": 2.6695442762931926e-08, "loss": 1.0157, "step": 98265 }, { "epoch": 2.8975379625534425, "grad_norm": 3.6075337042982145, "learning_rate": 2.661887764687676e-08, "loss": 1.0283, "step": 98270 }, { "epoch": 2.897685389945452, "grad_norm": 3.5102725066484, "learning_rate": 2.6542422095562207e-08, "loss": 1.0474, "step": 98275 }, { "epoch": 2.8978328173374615, "grad_norm": 3.3825667544853086, "learning_rate": 2.6466076111239386e-08, "loss": 1.0402, "step": 98280 }, { "epoch": 2.897980244729471, "grad_norm": 3.5364348201634055, "learning_rate": 2.6389839696153577e-08, "loss": 1.0229, "step": 98285 }, { "epoch": 2.89812767212148, "grad_norm": 3.6305476239157, "learning_rate": 2.631371285254841e-08, "loss": 1.0274, "step": 98290 }, { "epoch": 2.89827509951349, "grad_norm": 3.4588922785596496, "learning_rate": 2.6237695582665006e-08, "loss": 1.0598, "step": 98295 }, { "epoch": 2.898422526905499, "grad_norm": 3.460463488465586, "learning_rate": 2.6161787888738657e-08, "loss": 1.036, "step": 98300 }, { "epoch": 2.8985699542975087, "grad_norm": 3.5657572384097778, "learning_rate": 2.6085989773004666e-08, "loss": 1.0339, "step": 98305 }, { "epoch": 2.8987173816895178, "grad_norm": 3.6294053762929885, "learning_rate": 2.6010301237692075e-08, "loss": 1.0541, "step": 98310 }, { "epoch": 2.898864809081527, "grad_norm": 3.413428178413551, "learning_rate": 2.5934722285029106e-08, "loss": 1.0529, "step": 98315 }, { "epoch": 2.8990122364735367, "grad_norm": 3.6443902631106915, "learning_rate": 2.585925291723898e-08, "loss": 1.0412, "step": 98320 }, { "epoch": 2.899159663865546, "grad_norm": 3.6445455003632223, "learning_rate": 2.5783893136543253e-08, "loss": 1.0254, "step": 98325 }, { "epoch": 2.8993070912575556, "grad_norm": 3.4960273646493634, "learning_rate": 2.57086429451589e-08, "loss": 1.0505, "step": 98330 }, { "epoch": 2.899454518649565, "grad_norm": 3.318606575111685, "learning_rate": 2.5633502345300402e-08, "loss": 1.0124, "step": 98335 }, { "epoch": 2.8996019460415745, "grad_norm": 3.4731583872203466, "learning_rate": 2.5558471339178906e-08, "loss": 1.0364, "step": 98340 }, { "epoch": 2.899749373433584, "grad_norm": 3.458450967751681, "learning_rate": 2.548354992900223e-08, "loss": 1.0291, "step": 98345 }, { "epoch": 2.8998968008255934, "grad_norm": 3.535609797520178, "learning_rate": 2.540873811697528e-08, "loss": 1.041, "step": 98350 }, { "epoch": 2.900044228217603, "grad_norm": 3.528256560289681, "learning_rate": 2.5334035905298787e-08, "loss": 1.0236, "step": 98355 }, { "epoch": 2.9001916556096123, "grad_norm": 3.5337547640986298, "learning_rate": 2.525944329617183e-08, "loss": 1.0116, "step": 98360 }, { "epoch": 2.900339083001622, "grad_norm": 3.400648411962029, "learning_rate": 2.5184960291788905e-08, "loss": 1.0313, "step": 98365 }, { "epoch": 2.9004865103936313, "grad_norm": 3.4696757860057543, "learning_rate": 2.5110586894341593e-08, "loss": 1.0477, "step": 98370 }, { "epoch": 2.9006339377856407, "grad_norm": 3.75118381917849, "learning_rate": 2.5036323106018555e-08, "loss": 1.0412, "step": 98375 }, { "epoch": 2.90078136517765, "grad_norm": 3.515443365410548, "learning_rate": 2.4962168929005546e-08, "loss": 1.0526, "step": 98380 }, { "epoch": 2.900928792569659, "grad_norm": 3.4999385400301706, "learning_rate": 2.4888124365484156e-08, "loss": 1.0513, "step": 98385 }, { "epoch": 2.901076219961669, "grad_norm": 3.4423593592805912, "learning_rate": 2.481418941763347e-08, "loss": 1.0129, "step": 98390 }, { "epoch": 2.901223647353678, "grad_norm": 3.3636570290591354, "learning_rate": 2.4740364087628836e-08, "loss": 1.0495, "step": 98395 }, { "epoch": 2.901371074745688, "grad_norm": 3.5717560996450284, "learning_rate": 2.466664837764268e-08, "loss": 1.0543, "step": 98400 }, { "epoch": 2.901518502137697, "grad_norm": 3.348748172526545, "learning_rate": 2.4593042289844104e-08, "loss": 1.0254, "step": 98405 }, { "epoch": 2.9016659295297065, "grad_norm": 3.587703776281434, "learning_rate": 2.4519545826399702e-08, "loss": 1.0387, "step": 98410 }, { "epoch": 2.901813356921716, "grad_norm": 3.5016923907366473, "learning_rate": 2.44461589894715e-08, "loss": 1.1238, "step": 98415 }, { "epoch": 2.9019607843137254, "grad_norm": 3.5618073729860433, "learning_rate": 2.437288178121902e-08, "loss": 1.0494, "step": 98420 }, { "epoch": 2.902108211705735, "grad_norm": 3.476381778808815, "learning_rate": 2.4299714203798867e-08, "loss": 1.0622, "step": 98425 }, { "epoch": 2.9022556390977443, "grad_norm": 3.5363301435298307, "learning_rate": 2.4226656259363906e-08, "loss": 1.0781, "step": 98430 }, { "epoch": 2.9024030664897538, "grad_norm": 3.4402350576506384, "learning_rate": 2.415370795006408e-08, "loss": 0.9943, "step": 98435 }, { "epoch": 2.9025504938817632, "grad_norm": 3.437545387169623, "learning_rate": 2.4080869278045593e-08, "loss": 1.0323, "step": 98440 }, { "epoch": 2.9026979212737727, "grad_norm": 3.6265358685828115, "learning_rate": 2.400814024545256e-08, "loss": 1.029, "step": 98445 }, { "epoch": 2.902845348665782, "grad_norm": 3.5787086393379433, "learning_rate": 2.3935520854424103e-08, "loss": 1.0513, "step": 98450 }, { "epoch": 2.9029927760577916, "grad_norm": 3.5809054292392335, "learning_rate": 2.3863011107098098e-08, "loss": 1.0497, "step": 98455 }, { "epoch": 2.903140203449801, "grad_norm": 3.504119317773011, "learning_rate": 2.379061100560742e-08, "loss": 1.0572, "step": 98460 }, { "epoch": 2.9032876308418105, "grad_norm": 3.6020606394746855, "learning_rate": 2.3718320552083284e-08, "loss": 1.0641, "step": 98465 }, { "epoch": 2.90343505823382, "grad_norm": 3.4450745530718403, "learning_rate": 2.3646139748652317e-08, "loss": 1.0196, "step": 98470 }, { "epoch": 2.9035824856258294, "grad_norm": 3.4824405751765415, "learning_rate": 2.3574068597438657e-08, "loss": 1.0392, "step": 98475 }, { "epoch": 2.9037299130178384, "grad_norm": 3.442967532088426, "learning_rate": 2.3502107100563525e-08, "loss": 1.0515, "step": 98480 }, { "epoch": 2.9038773404098484, "grad_norm": 3.4974865370460173, "learning_rate": 2.343025526014356e-08, "loss": 1.0219, "step": 98485 }, { "epoch": 2.9040247678018574, "grad_norm": 3.601744073808024, "learning_rate": 2.3358513078294148e-08, "loss": 1.0078, "step": 98490 }, { "epoch": 2.9041721951938673, "grad_norm": 3.5275533414481357, "learning_rate": 2.328688055712569e-08, "loss": 1.0227, "step": 98495 }, { "epoch": 2.9043196225858763, "grad_norm": 3.5431075892645443, "learning_rate": 2.321535769874608e-08, "loss": 1.0424, "step": 98500 }, { "epoch": 2.9043196225858763, "eval_loss": 1.0765695571899414, "eval_runtime": 4.2731, "eval_samples_per_second": 92.672, "eval_steps_per_second": 3.042, "step": 98500 }, { "epoch": 2.9044670499778857, "grad_norm": 3.4835556220249444, "learning_rate": 2.3143944505260305e-08, "loss": 1.0185, "step": 98505 }, { "epoch": 2.904614477369895, "grad_norm": 3.514001670363818, "learning_rate": 2.3072640978769184e-08, "loss": 1.0107, "step": 98510 }, { "epoch": 2.9047619047619047, "grad_norm": 3.4721615972841415, "learning_rate": 2.3001447121371866e-08, "loss": 1.0053, "step": 98515 }, { "epoch": 2.904909332153914, "grad_norm": 3.5403900302961837, "learning_rate": 2.2930362935162514e-08, "loss": 1.0425, "step": 98520 }, { "epoch": 2.9050567595459236, "grad_norm": 3.472936053244967, "learning_rate": 2.2859388422232786e-08, "loss": 1.0428, "step": 98525 }, { "epoch": 2.905204186937933, "grad_norm": 3.5713701241286993, "learning_rate": 2.278852358467226e-08, "loss": 1.0432, "step": 98530 }, { "epoch": 2.9053516143299425, "grad_norm": 3.587392201618331, "learning_rate": 2.2717768424564688e-08, "loss": 1.0287, "step": 98535 }, { "epoch": 2.905499041721952, "grad_norm": 3.550345695392621, "learning_rate": 2.2647122943993404e-08, "loss": 1.0425, "step": 98540 }, { "epoch": 2.9056464691139614, "grad_norm": 3.470543268113498, "learning_rate": 2.2576587145036325e-08, "loss": 1.0458, "step": 98545 }, { "epoch": 2.905793896505971, "grad_norm": 3.370697413536164, "learning_rate": 2.2506161029769712e-08, "loss": 1.0494, "step": 98550 }, { "epoch": 2.9059413238979803, "grad_norm": 3.56207758880359, "learning_rate": 2.2435844600265654e-08, "loss": 1.0273, "step": 98555 }, { "epoch": 2.90608875128999, "grad_norm": 3.3502928160999206, "learning_rate": 2.2365637858592912e-08, "loss": 0.9846, "step": 98560 }, { "epoch": 2.9062361786819992, "grad_norm": 3.393295522830168, "learning_rate": 2.229554080681817e-08, "loss": 1.0352, "step": 98565 }, { "epoch": 2.9063836060740087, "grad_norm": 3.413036562323904, "learning_rate": 2.2225553447003526e-08, "loss": 1.0091, "step": 98570 }, { "epoch": 2.9065310334660177, "grad_norm": 3.479701609042248, "learning_rate": 2.2155675781209e-08, "loss": 0.9994, "step": 98575 }, { "epoch": 2.9066784608580276, "grad_norm": 3.43138698281334, "learning_rate": 2.2085907811490446e-08, "loss": 0.9865, "step": 98580 }, { "epoch": 2.9068258882500366, "grad_norm": 3.6038238102920612, "learning_rate": 2.2016249539900392e-08, "loss": 0.9939, "step": 98585 }, { "epoch": 2.9069733156420465, "grad_norm": 3.575401669138665, "learning_rate": 2.194670096848969e-08, "loss": 1.0666, "step": 98590 }, { "epoch": 2.9071207430340555, "grad_norm": 3.3979339914082156, "learning_rate": 2.1877262099303796e-08, "loss": 1.0297, "step": 98595 }, { "epoch": 2.907268170426065, "grad_norm": 3.400418102978444, "learning_rate": 2.1807932934386903e-08, "loss": 1.0446, "step": 98600 }, { "epoch": 2.9074155978180745, "grad_norm": 3.5701984215950695, "learning_rate": 2.1738713475778633e-08, "loss": 1.0853, "step": 98605 }, { "epoch": 2.907563025210084, "grad_norm": 3.3029242170711592, "learning_rate": 2.1669603725515686e-08, "loss": 0.9982, "step": 98610 }, { "epoch": 2.9077104526020934, "grad_norm": 3.707269395176585, "learning_rate": 2.160060368563227e-08, "loss": 1.0924, "step": 98615 }, { "epoch": 2.907857879994103, "grad_norm": 3.5248388132357165, "learning_rate": 2.1531713358158427e-08, "loss": 1.0041, "step": 98620 }, { "epoch": 2.9080053073861123, "grad_norm": 3.639230158622459, "learning_rate": 2.1462932745121288e-08, "loss": 1.0709, "step": 98625 }, { "epoch": 2.9081527347781218, "grad_norm": 3.520846132419784, "learning_rate": 2.1394261848545067e-08, "loss": 1.0475, "step": 98630 }, { "epoch": 2.908300162170131, "grad_norm": 3.494459244742601, "learning_rate": 2.132570067044981e-08, "loss": 1.0105, "step": 98635 }, { "epoch": 2.9084475895621407, "grad_norm": 3.596143436230729, "learning_rate": 2.1257249212854324e-08, "loss": 1.037, "step": 98640 }, { "epoch": 2.90859501695415, "grad_norm": 3.6529880910530528, "learning_rate": 2.1188907477771163e-08, "loss": 1.0688, "step": 98645 }, { "epoch": 2.9087424443461596, "grad_norm": 3.405394913685508, "learning_rate": 2.1120675467212885e-08, "loss": 1.0444, "step": 98650 }, { "epoch": 2.908889871738169, "grad_norm": 3.6430172386632975, "learning_rate": 2.1052553183186634e-08, "loss": 1.0597, "step": 98655 }, { "epoch": 2.9090372991301785, "grad_norm": 3.4223644157936906, "learning_rate": 2.0984540627696642e-08, "loss": 0.9872, "step": 98660 }, { "epoch": 2.909184726522188, "grad_norm": 3.4631144318341884, "learning_rate": 2.0916637802745053e-08, "loss": 1.0384, "step": 98665 }, { "epoch": 2.9093321539141974, "grad_norm": 3.3516114946519426, "learning_rate": 2.0848844710329023e-08, "loss": 0.9947, "step": 98670 }, { "epoch": 2.909479581306207, "grad_norm": 3.4597503814540493, "learning_rate": 2.0781161352444455e-08, "loss": 1.0238, "step": 98675 }, { "epoch": 2.909627008698216, "grad_norm": 3.4329174428144964, "learning_rate": 2.0713587731082673e-08, "loss": 1.0306, "step": 98680 }, { "epoch": 2.909774436090226, "grad_norm": 3.5544291311768457, "learning_rate": 2.064612384823167e-08, "loss": 1.0489, "step": 98685 }, { "epoch": 2.909921863482235, "grad_norm": 3.418185552232278, "learning_rate": 2.057876970587694e-08, "loss": 1.0579, "step": 98690 }, { "epoch": 2.9100692908742447, "grad_norm": 3.5409008015006624, "learning_rate": 2.0511525306000227e-08, "loss": 1.0206, "step": 98695 }, { "epoch": 2.9102167182662537, "grad_norm": 3.4246862651049113, "learning_rate": 2.044439065058079e-08, "loss": 1.0504, "step": 98700 }, { "epoch": 2.910364145658263, "grad_norm": 3.3832881173159155, "learning_rate": 2.037736574159413e-08, "loss": 1.0094, "step": 98705 }, { "epoch": 2.9105115730502726, "grad_norm": 3.330465889204288, "learning_rate": 2.0310450581011996e-08, "loss": 1.0389, "step": 98710 }, { "epoch": 2.910659000442282, "grad_norm": 3.577181932774766, "learning_rate": 2.0243645170803653e-08, "loss": 0.9981, "step": 98715 }, { "epoch": 2.9108064278342916, "grad_norm": 3.551566166850099, "learning_rate": 2.0176949512935028e-08, "loss": 1.0602, "step": 98720 }, { "epoch": 2.910953855226301, "grad_norm": 3.5422845044727493, "learning_rate": 2.0110363609368713e-08, "loss": 1.0259, "step": 98725 }, { "epoch": 2.9111012826183105, "grad_norm": 3.4519623674222175, "learning_rate": 2.004388746206398e-08, "loss": 1.0517, "step": 98730 }, { "epoch": 2.91124871001032, "grad_norm": 3.5744843680043483, "learning_rate": 1.9977521072977177e-08, "loss": 1.0713, "step": 98735 }, { "epoch": 2.9113961374023294, "grad_norm": 3.5140206679320634, "learning_rate": 1.991126444406091e-08, "loss": 1.0272, "step": 98740 }, { "epoch": 2.911543564794339, "grad_norm": 3.385352125300881, "learning_rate": 1.984511757726487e-08, "loss": 1.0208, "step": 98745 }, { "epoch": 2.9116909921863483, "grad_norm": 3.473268330290845, "learning_rate": 1.9779080474535833e-08, "loss": 1.0444, "step": 98750 }, { "epoch": 2.9118384195783578, "grad_norm": 3.6394421190917434, "learning_rate": 1.9713153137816825e-08, "loss": 1.0269, "step": 98755 }, { "epoch": 2.911985846970367, "grad_norm": 3.49591593951934, "learning_rate": 1.9647335569047965e-08, "loss": 1.0205, "step": 98760 }, { "epoch": 2.9121332743623767, "grad_norm": 3.750901893991006, "learning_rate": 1.9581627770165617e-08, "loss": 1.0972, "step": 98765 }, { "epoch": 2.912280701754386, "grad_norm": 3.612397562070851, "learning_rate": 1.9516029743103653e-08, "loss": 1.0414, "step": 98770 }, { "epoch": 2.912428129146395, "grad_norm": 3.4943228651697766, "learning_rate": 1.9450541489792195e-08, "loss": 1.0463, "step": 98775 }, { "epoch": 2.912575556538405, "grad_norm": 3.4173891126213394, "learning_rate": 1.9385163012158453e-08, "loss": 1.0042, "step": 98780 }, { "epoch": 2.912722983930414, "grad_norm": 3.6589445992518774, "learning_rate": 1.9319894312125887e-08, "loss": 1.0383, "step": 98785 }, { "epoch": 2.912870411322424, "grad_norm": 3.612609575550137, "learning_rate": 1.9254735391615874e-08, "loss": 1.0154, "step": 98790 }, { "epoch": 2.913017838714433, "grad_norm": 3.644092226521409, "learning_rate": 1.918968625254522e-08, "loss": 1.0992, "step": 98795 }, { "epoch": 2.9131652661064424, "grad_norm": 3.6961211985615243, "learning_rate": 1.912474689682822e-08, "loss": 1.0503, "step": 98800 }, { "epoch": 2.913312693498452, "grad_norm": 3.42571114074729, "learning_rate": 1.9059917326375853e-08, "loss": 1.0533, "step": 98805 }, { "epoch": 2.9134601208904614, "grad_norm": 3.467436938481875, "learning_rate": 1.899519754309534e-08, "loss": 1.0547, "step": 98810 }, { "epoch": 2.913607548282471, "grad_norm": 3.380822915375799, "learning_rate": 1.893058754889182e-08, "loss": 1.0006, "step": 98815 }, { "epoch": 2.9137549756744803, "grad_norm": 3.445951813027374, "learning_rate": 1.886608734566586e-08, "loss": 1.0216, "step": 98820 }, { "epoch": 2.9139024030664897, "grad_norm": 3.344044615283654, "learning_rate": 1.880169693531636e-08, "loss": 1.0326, "step": 98825 }, { "epoch": 2.914049830458499, "grad_norm": 3.4023854018468587, "learning_rate": 1.87374163197368e-08, "loss": 1.0159, "step": 98830 }, { "epoch": 2.9141972578505086, "grad_norm": 3.37993038610737, "learning_rate": 1.867324550082025e-08, "loss": 0.9849, "step": 98835 }, { "epoch": 2.914344685242518, "grad_norm": 3.613859414096055, "learning_rate": 1.860918448045354e-08, "loss": 1.0538, "step": 98840 }, { "epoch": 2.9144921126345276, "grad_norm": 3.394170669159055, "learning_rate": 1.8545233260522655e-08, "loss": 0.9711, "step": 98845 }, { "epoch": 2.914639540026537, "grad_norm": 3.509372646755016, "learning_rate": 1.8481391842909006e-08, "loss": 0.9848, "step": 98850 }, { "epoch": 2.9147869674185465, "grad_norm": 3.4962542846952105, "learning_rate": 1.8417660229491097e-08, "loss": 1.0342, "step": 98855 }, { "epoch": 2.914934394810556, "grad_norm": 3.584492914239713, "learning_rate": 1.835403842214492e-08, "loss": 1.033, "step": 98860 }, { "epoch": 2.9150818222025654, "grad_norm": 3.382792284359878, "learning_rate": 1.8290526422742315e-08, "loss": 1.0399, "step": 98865 }, { "epoch": 2.9152292495945744, "grad_norm": 3.423365300770828, "learning_rate": 1.8227124233151786e-08, "loss": 1.0271, "step": 98870 }, { "epoch": 2.9153766769865843, "grad_norm": 3.4684806960077417, "learning_rate": 1.816383185523976e-08, "loss": 1.0298, "step": 98875 }, { "epoch": 2.9155241043785933, "grad_norm": 3.5180186587795563, "learning_rate": 1.8100649290868075e-08, "loss": 1.0332, "step": 98880 }, { "epoch": 2.9156715317706032, "grad_norm": 3.640511692068742, "learning_rate": 1.80375765418965e-08, "loss": 1.0324, "step": 98885 }, { "epoch": 2.9158189591626122, "grad_norm": 3.467137414857885, "learning_rate": 1.7974613610180214e-08, "loss": 1.0311, "step": 98890 }, { "epoch": 2.9159663865546217, "grad_norm": 3.595877672541343, "learning_rate": 1.791176049757273e-08, "loss": 1.0488, "step": 98895 }, { "epoch": 2.916113813946631, "grad_norm": 3.4897225831037813, "learning_rate": 1.7849017205923413e-08, "loss": 1.0104, "step": 98900 }, { "epoch": 2.9162612413386406, "grad_norm": 3.3868139102097135, "learning_rate": 1.778638373707786e-08, "loss": 0.9991, "step": 98905 }, { "epoch": 2.91640866873065, "grad_norm": 3.4125172113573816, "learning_rate": 1.7723860092880434e-08, "loss": 1.0206, "step": 98910 }, { "epoch": 2.9165560961226595, "grad_norm": 3.6302827753128493, "learning_rate": 1.7661446275169663e-08, "loss": 0.9762, "step": 98915 }, { "epoch": 2.916703523514669, "grad_norm": 3.5817214551360608, "learning_rate": 1.7599142285782825e-08, "loss": 1.0786, "step": 98920 }, { "epoch": 2.9168509509066785, "grad_norm": 3.610634670040123, "learning_rate": 1.753694812655346e-08, "loss": 1.0728, "step": 98925 }, { "epoch": 2.916998378298688, "grad_norm": 3.695073484898001, "learning_rate": 1.747486379931093e-08, "loss": 1.0218, "step": 98930 }, { "epoch": 2.9171458056906974, "grad_norm": 3.3030414528531944, "learning_rate": 1.7412889305882946e-08, "loss": 1.0241, "step": 98935 }, { "epoch": 2.917293233082707, "grad_norm": 3.3956562203917953, "learning_rate": 1.7351024648093044e-08, "loss": 1.0296, "step": 98940 }, { "epoch": 2.9174406604747163, "grad_norm": 3.6844362534268296, "learning_rate": 1.728926982776144e-08, "loss": 1.0526, "step": 98945 }, { "epoch": 2.9175880878667257, "grad_norm": 3.5197091483434275, "learning_rate": 1.722762484670501e-08, "loss": 1.0943, "step": 98950 }, { "epoch": 2.917735515258735, "grad_norm": 3.4178840066415783, "learning_rate": 1.716608970673855e-08, "loss": 1.0041, "step": 98955 }, { "epoch": 2.9178829426507447, "grad_norm": 3.5042831826028, "learning_rate": 1.710466440967187e-08, "loss": 1.0459, "step": 98960 }, { "epoch": 2.9180303700427537, "grad_norm": 3.511643559048549, "learning_rate": 1.704334895731352e-08, "loss": 1.0639, "step": 98965 }, { "epoch": 2.9181777974347636, "grad_norm": 3.290005764402654, "learning_rate": 1.698214335146664e-08, "loss": 1.03, "step": 98970 }, { "epoch": 2.9183252248267726, "grad_norm": 3.497364438776281, "learning_rate": 1.6921047593933118e-08, "loss": 0.9956, "step": 98975 }, { "epoch": 2.9184726522187825, "grad_norm": 3.352863032082298, "learning_rate": 1.686006168651069e-08, "loss": 1.0547, "step": 98980 }, { "epoch": 2.9186200796107915, "grad_norm": 3.4594041988247173, "learning_rate": 1.6799185630993336e-08, "loss": 1.0288, "step": 98985 }, { "epoch": 2.918767507002801, "grad_norm": 3.4829455674808685, "learning_rate": 1.673841942917337e-08, "loss": 1.0181, "step": 98990 }, { "epoch": 2.9189149343948104, "grad_norm": 3.478130264807485, "learning_rate": 1.6677763082837694e-08, "loss": 1.0408, "step": 98995 }, { "epoch": 2.91906236178682, "grad_norm": 3.4460470986698906, "learning_rate": 1.6617216593772384e-08, "loss": 1.055, "step": 99000 }, { "epoch": 2.91906236178682, "eval_loss": 1.0765235424041748, "eval_runtime": 4.2253, "eval_samples_per_second": 93.721, "eval_steps_per_second": 3.077, "step": 99000 }, { "epoch": 2.9192097891788293, "grad_norm": 3.6749088000510812, "learning_rate": 1.6556779963758094e-08, "loss": 1.0632, "step": 99005 }, { "epoch": 2.919357216570839, "grad_norm": 3.4668256601990226, "learning_rate": 1.649645319457424e-08, "loss": 1.0764, "step": 99010 }, { "epoch": 2.9195046439628483, "grad_norm": 3.593385756735802, "learning_rate": 1.6436236287995234e-08, "loss": 1.0324, "step": 99015 }, { "epoch": 2.9196520713548577, "grad_norm": 3.563564401624335, "learning_rate": 1.6376129245792985e-08, "loss": 1.0449, "step": 99020 }, { "epoch": 2.919799498746867, "grad_norm": 3.371286233144171, "learning_rate": 1.6316132069736505e-08, "loss": 1.0314, "step": 99025 }, { "epoch": 2.9199469261388766, "grad_norm": 3.6701871010517624, "learning_rate": 1.625624476159146e-08, "loss": 1.0438, "step": 99030 }, { "epoch": 2.920094353530886, "grad_norm": 3.5847577146153777, "learning_rate": 1.619646732312019e-08, "loss": 1.0535, "step": 99035 }, { "epoch": 2.9202417809228955, "grad_norm": 3.6257407901277157, "learning_rate": 1.6136799756080872e-08, "loss": 1.0352, "step": 99040 }, { "epoch": 2.920389208314905, "grad_norm": 3.629758142733898, "learning_rate": 1.607724206223002e-08, "loss": 1.0332, "step": 99045 }, { "epoch": 2.9205366357069145, "grad_norm": 3.6461350770555088, "learning_rate": 1.6017794243320394e-08, "loss": 1.0763, "step": 99050 }, { "epoch": 2.920684063098924, "grad_norm": 3.693022555059179, "learning_rate": 1.595845630110018e-08, "loss": 1.035, "step": 99055 }, { "epoch": 2.9208314904909334, "grad_norm": 3.385506344583273, "learning_rate": 1.5899228237317154e-08, "loss": 1.0549, "step": 99060 }, { "epoch": 2.920978917882943, "grad_norm": 3.2903012968034195, "learning_rate": 1.5840110053712414e-08, "loss": 1.0372, "step": 99065 }, { "epoch": 2.921126345274952, "grad_norm": 3.4936365990494385, "learning_rate": 1.5781101752026655e-08, "loss": 1.0534, "step": 99070 }, { "epoch": 2.9212737726669618, "grad_norm": 3.3429299055420016, "learning_rate": 1.5722203333996406e-08, "loss": 0.9851, "step": 99075 }, { "epoch": 2.9214212000589708, "grad_norm": 3.4316221185852465, "learning_rate": 1.5663414801354024e-08, "loss": 1.046, "step": 99080 }, { "epoch": 2.9215686274509802, "grad_norm": 3.4441486450373513, "learning_rate": 1.56047361558298e-08, "loss": 1.058, "step": 99085 }, { "epoch": 2.9217160548429897, "grad_norm": 3.5450422784237157, "learning_rate": 1.554616739915027e-08, "loss": 1.0759, "step": 99090 }, { "epoch": 2.921863482234999, "grad_norm": 3.4496378887362265, "learning_rate": 1.5487708533039464e-08, "loss": 1.0251, "step": 99095 }, { "epoch": 2.9220109096270086, "grad_norm": 3.2832065904980374, "learning_rate": 1.542935955921726e-08, "loss": 1.028, "step": 99100 }, { "epoch": 2.922158337019018, "grad_norm": 3.5507907838366006, "learning_rate": 1.53711204794002e-08, "loss": 1.0383, "step": 99105 }, { "epoch": 2.9223057644110275, "grad_norm": 3.2917418246668384, "learning_rate": 1.5312991295302754e-08, "loss": 1.0342, "step": 99110 }, { "epoch": 2.922453191803037, "grad_norm": 3.5002351158142457, "learning_rate": 1.5254972008634794e-08, "loss": 1.0296, "step": 99115 }, { "epoch": 2.9226006191950464, "grad_norm": 3.3892216926760286, "learning_rate": 1.5197062621104124e-08, "loss": 1.0633, "step": 99120 }, { "epoch": 2.922748046587056, "grad_norm": 3.7304771544704627, "learning_rate": 1.51392631344148e-08, "loss": 1.0179, "step": 99125 }, { "epoch": 2.9228954739790654, "grad_norm": 3.6475986106965235, "learning_rate": 1.5081573550267124e-08, "loss": 1.0267, "step": 99130 }, { "epoch": 2.923042901371075, "grad_norm": 3.322236617961461, "learning_rate": 1.5023993870358908e-08, "loss": 1.0506, "step": 99135 }, { "epoch": 2.9231903287630843, "grad_norm": 3.4476947328612697, "learning_rate": 1.496652409638463e-08, "loss": 1.0373, "step": 99140 }, { "epoch": 2.9233377561550937, "grad_norm": 3.473311225522239, "learning_rate": 1.4909164230035438e-08, "loss": 1.0441, "step": 99145 }, { "epoch": 2.923485183547103, "grad_norm": 3.309571357006991, "learning_rate": 1.4851914272999146e-08, "loss": 0.9816, "step": 99150 }, { "epoch": 2.9236326109391126, "grad_norm": 3.4627969420995055, "learning_rate": 1.479477422696024e-08, "loss": 1.0407, "step": 99155 }, { "epoch": 2.923780038331122, "grad_norm": 3.5489568293490388, "learning_rate": 1.4737744093600291e-08, "loss": 1.0415, "step": 99160 }, { "epoch": 2.923927465723131, "grad_norm": 3.6057923575483257, "learning_rate": 1.4680823874597542e-08, "loss": 1.0143, "step": 99165 }, { "epoch": 2.924074893115141, "grad_norm": 3.5722107165489008, "learning_rate": 1.4624013571626488e-08, "loss": 1.0284, "step": 99170 }, { "epoch": 2.92422232050715, "grad_norm": 3.549708549149804, "learning_rate": 1.4567313186359538e-08, "loss": 1.0464, "step": 99175 }, { "epoch": 2.92436974789916, "grad_norm": 3.4592532143332955, "learning_rate": 1.4510722720464942e-08, "loss": 1.025, "step": 99180 }, { "epoch": 2.924517175291169, "grad_norm": 3.4459577317752865, "learning_rate": 1.4454242175607618e-08, "loss": 1.0305, "step": 99185 }, { "epoch": 2.9246646026831784, "grad_norm": 3.6541778403668483, "learning_rate": 1.439787155344957e-08, "loss": 1.0194, "step": 99190 }, { "epoch": 2.924812030075188, "grad_norm": 3.4378959640939115, "learning_rate": 1.434161085564989e-08, "loss": 1.0502, "step": 99195 }, { "epoch": 2.9249594574671973, "grad_norm": 3.547388397916149, "learning_rate": 1.4285460083864333e-08, "loss": 0.9935, "step": 99200 }, { "epoch": 2.925106884859207, "grad_norm": 3.486871522299659, "learning_rate": 1.4229419239744496e-08, "loss": 1.0563, "step": 99205 }, { "epoch": 2.9252543122512162, "grad_norm": 3.4437981891574463, "learning_rate": 1.4173488324939893e-08, "loss": 1.0148, "step": 99210 }, { "epoch": 2.9254017396432257, "grad_norm": 3.525512320648781, "learning_rate": 1.4117667341095874e-08, "loss": 1.0077, "step": 99215 }, { "epoch": 2.925549167035235, "grad_norm": 3.51621796810161, "learning_rate": 1.406195628985571e-08, "loss": 1.0464, "step": 99220 }, { "epoch": 2.9256965944272446, "grad_norm": 3.6198273858751246, "learning_rate": 1.4006355172858504e-08, "loss": 1.066, "step": 99225 }, { "epoch": 2.925844021819254, "grad_norm": 3.5564835662690335, "learning_rate": 1.3950863991740448e-08, "loss": 1.0527, "step": 99230 }, { "epoch": 2.9259914492112635, "grad_norm": 3.5331338221150324, "learning_rate": 1.3895482748134402e-08, "loss": 1.0986, "step": 99235 }, { "epoch": 2.926138876603273, "grad_norm": 3.4234076769594766, "learning_rate": 1.3840211443669898e-08, "loss": 1.0368, "step": 99240 }, { "epoch": 2.9262863039952824, "grad_norm": 3.563758421651889, "learning_rate": 1.3785050079973549e-08, "loss": 1.0234, "step": 99245 }, { "epoch": 2.926433731387292, "grad_norm": 3.4921386888695807, "learning_rate": 1.3729998658667809e-08, "loss": 1.0362, "step": 99250 }, { "epoch": 2.9265811587793014, "grad_norm": 3.473631995361901, "learning_rate": 1.367505718137388e-08, "loss": 1.0323, "step": 99255 }, { "epoch": 2.9267285861713104, "grad_norm": 3.5370543241764993, "learning_rate": 1.3620225649707552e-08, "loss": 1.0082, "step": 99260 }, { "epoch": 2.9268760135633203, "grad_norm": 3.6133348863486843, "learning_rate": 1.3565504065282536e-08, "loss": 1.0586, "step": 99265 }, { "epoch": 2.9270234409553293, "grad_norm": 3.522079964848031, "learning_rate": 1.3510892429709209e-08, "loss": 1.0942, "step": 99270 }, { "epoch": 2.927170868347339, "grad_norm": 3.5667485626962456, "learning_rate": 1.345639074459462e-08, "loss": 1.0708, "step": 99275 }, { "epoch": 2.927318295739348, "grad_norm": 3.711215041072243, "learning_rate": 1.3401999011542488e-08, "loss": 1.0318, "step": 99280 }, { "epoch": 2.9274657231313577, "grad_norm": 3.4843130487193354, "learning_rate": 1.3347717232152779e-08, "loss": 1.0259, "step": 99285 }, { "epoch": 2.927613150523367, "grad_norm": 3.6335373652404366, "learning_rate": 1.32935454080238e-08, "loss": 1.0645, "step": 99290 }, { "epoch": 2.9277605779153766, "grad_norm": 3.5668014086425326, "learning_rate": 1.323948354074886e-08, "loss": 1.0471, "step": 99295 }, { "epoch": 2.927908005307386, "grad_norm": 3.4316254169638696, "learning_rate": 1.3185531631919184e-08, "loss": 1.0567, "step": 99300 }, { "epoch": 2.9280554326993955, "grad_norm": 3.4189476095847366, "learning_rate": 1.313168968312184e-08, "loss": 1.0274, "step": 99305 }, { "epoch": 2.928202860091405, "grad_norm": 3.573586649407601, "learning_rate": 1.3077957695942222e-08, "loss": 1.0797, "step": 99310 }, { "epoch": 2.9283502874834144, "grad_norm": 3.6089916713615033, "learning_rate": 1.3024335671960318e-08, "loss": 1.0519, "step": 99315 }, { "epoch": 2.928497714875424, "grad_norm": 3.4883587693525193, "learning_rate": 1.2970823612754868e-08, "loss": 1.0748, "step": 99320 }, { "epoch": 2.9286451422674333, "grad_norm": 3.6603718717831235, "learning_rate": 1.2917421519900024e-08, "loss": 1.0648, "step": 99325 }, { "epoch": 2.928792569659443, "grad_norm": 3.57584169114834, "learning_rate": 1.2864129394967033e-08, "loss": 1.0346, "step": 99330 }, { "epoch": 2.9289399970514522, "grad_norm": 3.3926099561226937, "learning_rate": 1.2810947239525056e-08, "loss": 1.0329, "step": 99335 }, { "epoch": 2.9290874244434617, "grad_norm": 3.423295321438131, "learning_rate": 1.275787505513784e-08, "loss": 0.9797, "step": 99340 }, { "epoch": 2.929234851835471, "grad_norm": 3.6070032967342263, "learning_rate": 1.2704912843367889e-08, "loss": 1.0371, "step": 99345 }, { "epoch": 2.9293822792274806, "grad_norm": 3.6130151270646573, "learning_rate": 1.265206060577312e-08, "loss": 1.0677, "step": 99350 }, { "epoch": 2.9295297066194896, "grad_norm": 3.67279990758747, "learning_rate": 1.2599318343909372e-08, "loss": 1.0262, "step": 99355 }, { "epoch": 2.9296771340114995, "grad_norm": 3.4948397806274274, "learning_rate": 1.2546686059328322e-08, "loss": 1.0221, "step": 99360 }, { "epoch": 2.9298245614035086, "grad_norm": 3.822214672836274, "learning_rate": 1.249416375357873e-08, "loss": 0.9773, "step": 99365 }, { "epoch": 2.9299719887955185, "grad_norm": 3.7018066363964617, "learning_rate": 1.2441751428206027e-08, "loss": 1.0553, "step": 99370 }, { "epoch": 2.9301194161875275, "grad_norm": 3.327782037139593, "learning_rate": 1.238944908475273e-08, "loss": 1.0211, "step": 99375 }, { "epoch": 2.930266843579537, "grad_norm": 3.4255131539553476, "learning_rate": 1.2337256724758023e-08, "loss": 0.9804, "step": 99380 }, { "epoch": 2.9304142709715464, "grad_norm": 3.59649253808191, "learning_rate": 1.2285174349757344e-08, "loss": 1.0652, "step": 99385 }, { "epoch": 2.930561698363556, "grad_norm": 3.66460640457055, "learning_rate": 1.2233201961283218e-08, "loss": 1.0384, "step": 99390 }, { "epoch": 2.9307091257555653, "grad_norm": 3.5100559419846404, "learning_rate": 1.2181339560865257e-08, "loss": 1.0507, "step": 99395 }, { "epoch": 2.9308565531475748, "grad_norm": 3.5606334008365224, "learning_rate": 1.2129587150029735e-08, "loss": 1.0358, "step": 99400 }, { "epoch": 2.931003980539584, "grad_norm": 3.423875749431818, "learning_rate": 1.207794473029919e-08, "loss": 1.0469, "step": 99405 }, { "epoch": 2.9311514079315937, "grad_norm": 3.442606255494045, "learning_rate": 1.2026412303193652e-08, "loss": 1.0306, "step": 99410 }, { "epoch": 2.931298835323603, "grad_norm": 3.535275899198091, "learning_rate": 1.1974989870228575e-08, "loss": 1.037, "step": 99415 }, { "epoch": 2.9314462627156126, "grad_norm": 3.471520613969956, "learning_rate": 1.1923677432918583e-08, "loss": 1.0208, "step": 99420 }, { "epoch": 2.931593690107622, "grad_norm": 3.566589195975881, "learning_rate": 1.1872474992772054e-08, "loss": 1.0709, "step": 99425 }, { "epoch": 2.9317411174996315, "grad_norm": 3.473959463240393, "learning_rate": 1.1821382551296944e-08, "loss": 1.0703, "step": 99430 }, { "epoch": 2.931888544891641, "grad_norm": 3.3552711364449257, "learning_rate": 1.177040010999622e-08, "loss": 1.0427, "step": 99435 }, { "epoch": 2.9320359722836504, "grad_norm": 3.629239217112003, "learning_rate": 1.1719527670369934e-08, "loss": 1.0911, "step": 99440 }, { "epoch": 2.93218339967566, "grad_norm": 3.5594351332137384, "learning_rate": 1.1668765233915219e-08, "loss": 1.0235, "step": 99445 }, { "epoch": 2.932330827067669, "grad_norm": 3.5877179888285635, "learning_rate": 1.161811280212588e-08, "loss": 1.028, "step": 99450 }, { "epoch": 2.932478254459679, "grad_norm": 3.4479046795826993, "learning_rate": 1.1567570376492392e-08, "loss": 1.035, "step": 99455 }, { "epoch": 2.932625681851688, "grad_norm": 3.674154821442347, "learning_rate": 1.15171379585019e-08, "loss": 1.0508, "step": 99460 }, { "epoch": 2.9327731092436977, "grad_norm": 3.5362640031406496, "learning_rate": 1.1466815549638632e-08, "loss": 1.0362, "step": 99465 }, { "epoch": 2.9329205366357067, "grad_norm": 3.5246218412574217, "learning_rate": 1.141660315138307e-08, "loss": 1.0561, "step": 99470 }, { "epoch": 2.933067964027716, "grad_norm": 3.5763735139011685, "learning_rate": 1.1366500765212784e-08, "loss": 1.04, "step": 99475 }, { "epoch": 2.9332153914197256, "grad_norm": 3.539648393898585, "learning_rate": 1.1316508392602842e-08, "loss": 1.0115, "step": 99480 }, { "epoch": 2.933362818811735, "grad_norm": 3.3518884600921885, "learning_rate": 1.126662603502332e-08, "loss": 1.0571, "step": 99485 }, { "epoch": 2.9335102462037446, "grad_norm": 3.39428433213158, "learning_rate": 1.1216853693942624e-08, "loss": 1.0138, "step": 99490 }, { "epoch": 2.933657673595754, "grad_norm": 3.4423089617358795, "learning_rate": 1.1167191370825419e-08, "loss": 1.0167, "step": 99495 }, { "epoch": 2.9338051009877635, "grad_norm": 3.56719944100232, "learning_rate": 1.1117639067132618e-08, "loss": 1.0556, "step": 99500 }, { "epoch": 2.9338051009877635, "eval_loss": 1.0764729976654053, "eval_runtime": 4.2889, "eval_samples_per_second": 92.332, "eval_steps_per_second": 3.031, "step": 99500 }, { "epoch": 2.933952528379773, "grad_norm": 3.551409788768033, "learning_rate": 1.1068196784323053e-08, "loss": 1.0549, "step": 99505 }, { "epoch": 2.9340999557717824, "grad_norm": 3.67220927685757, "learning_rate": 1.101886452385098e-08, "loss": 1.0471, "step": 99510 }, { "epoch": 2.934247383163792, "grad_norm": 3.455654965190704, "learning_rate": 1.0969642287168152e-08, "loss": 1.0483, "step": 99515 }, { "epoch": 2.9343948105558013, "grad_norm": 3.5405749936834483, "learning_rate": 1.0920530075723412e-08, "loss": 1.0302, "step": 99520 }, { "epoch": 2.9345422379478108, "grad_norm": 3.542299761242554, "learning_rate": 1.0871527890961435e-08, "loss": 1.0485, "step": 99525 }, { "epoch": 2.9346896653398202, "grad_norm": 3.5456944858175885, "learning_rate": 1.082263573432482e-08, "loss": 1.0219, "step": 99530 }, { "epoch": 2.9348370927318297, "grad_norm": 3.5630706806456964, "learning_rate": 1.0773853607251167e-08, "loss": 0.9948, "step": 99535 }, { "epoch": 2.934984520123839, "grad_norm": 3.6806162558280944, "learning_rate": 1.0725181511176823e-08, "loss": 1.0167, "step": 99540 }, { "epoch": 2.9351319475158486, "grad_norm": 3.6928020296800432, "learning_rate": 1.067661944753398e-08, "loss": 1.0405, "step": 99545 }, { "epoch": 2.935279374907858, "grad_norm": 3.528192918350596, "learning_rate": 1.0628167417751494e-08, "loss": 1.0082, "step": 99550 }, { "epoch": 2.935426802299867, "grad_norm": 3.507609509013764, "learning_rate": 1.0579825423254891e-08, "loss": 1.0773, "step": 99555 }, { "epoch": 2.935574229691877, "grad_norm": 3.510576385420657, "learning_rate": 1.0531593465467199e-08, "loss": 1.0437, "step": 99560 }, { "epoch": 2.935721657083886, "grad_norm": 3.5053322564698037, "learning_rate": 1.0483471545807282e-08, "loss": 1.037, "step": 99565 }, { "epoch": 2.935869084475896, "grad_norm": 3.5190909784966964, "learning_rate": 1.0435459665691094e-08, "loss": 0.9964, "step": 99570 }, { "epoch": 2.936016511867905, "grad_norm": 3.51228631790265, "learning_rate": 1.0387557826531669e-08, "loss": 1.0457, "step": 99575 }, { "epoch": 2.9361639392599144, "grad_norm": 3.4930610806472924, "learning_rate": 1.0339766029738711e-08, "loss": 1.0298, "step": 99580 }, { "epoch": 2.936311366651924, "grad_norm": 3.6171439990117946, "learning_rate": 1.0292084276718599e-08, "loss": 1.0596, "step": 99585 }, { "epoch": 2.9364587940439333, "grad_norm": 3.430346272141245, "learning_rate": 1.024451256887396e-08, "loss": 1.0327, "step": 99590 }, { "epoch": 2.9366062214359427, "grad_norm": 3.4071783881910838, "learning_rate": 1.0197050907604921e-08, "loss": 1.0097, "step": 99595 }, { "epoch": 2.936753648827952, "grad_norm": 3.496311734289111, "learning_rate": 1.0149699294307867e-08, "loss": 1.0166, "step": 99600 }, { "epoch": 2.9369010762199617, "grad_norm": 3.5653628039035703, "learning_rate": 1.0102457730376685e-08, "loss": 1.0295, "step": 99605 }, { "epoch": 2.937048503611971, "grad_norm": 3.5583251270315714, "learning_rate": 1.0055326217200678e-08, "loss": 1.0727, "step": 99610 }, { "epoch": 2.9371959310039806, "grad_norm": 3.5526382176748217, "learning_rate": 1.0008304756167902e-08, "loss": 1.0565, "step": 99615 }, { "epoch": 2.93734335839599, "grad_norm": 3.6932332888374773, "learning_rate": 9.961393348661003e-09, "loss": 1.0346, "step": 99620 }, { "epoch": 2.9374907857879995, "grad_norm": 3.5644462211376626, "learning_rate": 9.914591996060956e-09, "loss": 1.0348, "step": 99625 }, { "epoch": 2.937638213180009, "grad_norm": 3.504208464255532, "learning_rate": 9.86790069974458e-09, "loss": 1.0895, "step": 99630 }, { "epoch": 2.9377856405720184, "grad_norm": 3.53131511092512, "learning_rate": 9.821319461086193e-09, "loss": 1.0215, "step": 99635 }, { "epoch": 2.937933067964028, "grad_norm": 3.6309518734776693, "learning_rate": 9.774848281455945e-09, "loss": 1.0726, "step": 99640 }, { "epoch": 2.9380804953560373, "grad_norm": 3.555221096280462, "learning_rate": 9.728487162222327e-09, "loss": 1.0592, "step": 99645 }, { "epoch": 2.9382279227480463, "grad_norm": 3.536067799372759, "learning_rate": 9.682236104748415e-09, "loss": 0.993, "step": 99650 }, { "epoch": 2.9383753501400562, "grad_norm": 3.496956177393752, "learning_rate": 9.636095110396037e-09, "loss": 1.014, "step": 99655 }, { "epoch": 2.9385227775320653, "grad_norm": 3.494607016679398, "learning_rate": 9.590064180522023e-09, "loss": 1.0453, "step": 99660 }, { "epoch": 2.938670204924075, "grad_norm": 3.5645725510439346, "learning_rate": 9.544143316481956e-09, "loss": 1.0507, "step": 99665 }, { "epoch": 2.938817632316084, "grad_norm": 3.6297306576418458, "learning_rate": 9.49833251962684e-09, "loss": 1.047, "step": 99670 }, { "epoch": 2.9389650597080936, "grad_norm": 3.6335225674660765, "learning_rate": 9.452631791303928e-09, "loss": 1.0259, "step": 99675 }, { "epoch": 2.939112487100103, "grad_norm": 3.3286781651549116, "learning_rate": 9.407041132858813e-09, "loss": 1.0299, "step": 99680 }, { "epoch": 2.9392599144921125, "grad_norm": 3.6463756560864735, "learning_rate": 9.36156054563292e-09, "loss": 1.0521, "step": 99685 }, { "epoch": 2.939407341884122, "grad_norm": 3.4640375581296032, "learning_rate": 9.316190030964345e-09, "loss": 1.0254, "step": 99690 }, { "epoch": 2.9395547692761315, "grad_norm": 3.3620201639433454, "learning_rate": 9.270929590188271e-09, "loss": 1.0497, "step": 99695 }, { "epoch": 2.939702196668141, "grad_norm": 3.5097999876833996, "learning_rate": 9.225779224636549e-09, "loss": 0.9496, "step": 99700 }, { "epoch": 2.9398496240601504, "grad_norm": 3.4878418184512845, "learning_rate": 9.180738935638116e-09, "loss": 1.0311, "step": 99705 }, { "epoch": 2.93999705145216, "grad_norm": 3.5681896132832738, "learning_rate": 9.135808724517748e-09, "loss": 1.0322, "step": 99710 }, { "epoch": 2.9401444788441693, "grad_norm": 3.375195086528033, "learning_rate": 9.090988592597716e-09, "loss": 1.0101, "step": 99715 }, { "epoch": 2.9402919062361788, "grad_norm": 3.522030237616419, "learning_rate": 9.046278541197383e-09, "loss": 1.0323, "step": 99720 }, { "epoch": 2.940439333628188, "grad_norm": 3.430035246901891, "learning_rate": 9.001678571631533e-09, "loss": 1.0699, "step": 99725 }, { "epoch": 2.9405867610201977, "grad_norm": 3.6250466723038888, "learning_rate": 8.95718868521328e-09, "loss": 1.0514, "step": 99730 }, { "epoch": 2.940734188412207, "grad_norm": 3.6091756198259684, "learning_rate": 8.912808883251578e-09, "loss": 1.0379, "step": 99735 }, { "epoch": 2.9408816158042166, "grad_norm": 3.43349525916536, "learning_rate": 8.868539167052463e-09, "loss": 0.9693, "step": 99740 }, { "epoch": 2.9410290431962256, "grad_norm": 3.437911176355915, "learning_rate": 8.824379537918648e-09, "loss": 1.0162, "step": 99745 }, { "epoch": 2.9411764705882355, "grad_norm": 3.531810039505988, "learning_rate": 8.78032999714909e-09, "loss": 1.0437, "step": 99750 }, { "epoch": 2.9413238979802445, "grad_norm": 3.5678979833766897, "learning_rate": 8.736390546040668e-09, "loss": 1.0258, "step": 99755 }, { "epoch": 2.9414713253722544, "grad_norm": 3.458618841235761, "learning_rate": 8.692561185885683e-09, "loss": 1.0109, "step": 99760 }, { "epoch": 2.9416187527642634, "grad_norm": 3.489652315340847, "learning_rate": 8.648841917974355e-09, "loss": 1.0383, "step": 99765 }, { "epoch": 2.941766180156273, "grad_norm": 3.5432169390386976, "learning_rate": 8.60523274359315e-09, "loss": 1.0351, "step": 99770 }, { "epoch": 2.9419136075482824, "grad_norm": 3.6769472866311457, "learning_rate": 8.561733664024795e-09, "loss": 1.0312, "step": 99775 }, { "epoch": 2.942061034940292, "grad_norm": 3.5807824384398708, "learning_rate": 8.518344680549933e-09, "loss": 1.0273, "step": 99780 }, { "epoch": 2.9422084623323013, "grad_norm": 3.433477637892989, "learning_rate": 8.475065794445458e-09, "loss": 1.02, "step": 99785 }, { "epoch": 2.9423558897243107, "grad_norm": 3.5454168313669117, "learning_rate": 8.431897006984102e-09, "loss": 1.093, "step": 99790 }, { "epoch": 2.94250331711632, "grad_norm": 3.5117357711038553, "learning_rate": 8.388838319436931e-09, "loss": 1.0326, "step": 99795 }, { "epoch": 2.9426507445083296, "grad_norm": 3.6982197210132286, "learning_rate": 8.345889733070433e-09, "loss": 1.0496, "step": 99800 }, { "epoch": 2.942798171900339, "grad_norm": 3.536783288816651, "learning_rate": 8.303051249148597e-09, "loss": 1.0858, "step": 99805 }, { "epoch": 2.9429455992923486, "grad_norm": 3.5987479883748192, "learning_rate": 8.26032286893208e-09, "loss": 1.0355, "step": 99810 }, { "epoch": 2.943093026684358, "grad_norm": 3.6171198962041013, "learning_rate": 8.21770459367821e-09, "loss": 1.0283, "step": 99815 }, { "epoch": 2.9432404540763675, "grad_norm": 3.5336775688153237, "learning_rate": 8.175196424640984e-09, "loss": 1.0548, "step": 99820 }, { "epoch": 2.943387881468377, "grad_norm": 3.5053232765056572, "learning_rate": 8.132798363071487e-09, "loss": 1.0236, "step": 99825 }, { "epoch": 2.9435353088603864, "grad_norm": 3.459987717857297, "learning_rate": 8.090510410217051e-09, "loss": 1.0541, "step": 99830 }, { "epoch": 2.943682736252396, "grad_norm": 3.4775712613116228, "learning_rate": 8.048332567322098e-09, "loss": 1.0354, "step": 99835 }, { "epoch": 2.943830163644405, "grad_norm": 3.483835153573259, "learning_rate": 8.006264835628135e-09, "loss": 1.0236, "step": 99840 }, { "epoch": 2.9439775910364148, "grad_norm": 3.5871881473846305, "learning_rate": 7.964307216372508e-09, "loss": 1.0121, "step": 99845 }, { "epoch": 2.944125018428424, "grad_norm": 3.618961012010801, "learning_rate": 7.922459710790059e-09, "loss": 0.993, "step": 99850 }, { "epoch": 2.9442724458204337, "grad_norm": 3.546618552278449, "learning_rate": 7.880722320112305e-09, "loss": 1.0728, "step": 99855 }, { "epoch": 2.9444198732124427, "grad_norm": 3.3820649352738745, "learning_rate": 7.83909504556743e-09, "loss": 1.0141, "step": 99860 }, { "epoch": 2.944567300604452, "grad_norm": 3.475920381823081, "learning_rate": 7.797577888380285e-09, "loss": 1.0478, "step": 99865 }, { "epoch": 2.9447147279964616, "grad_norm": 3.701546059598892, "learning_rate": 7.756170849772399e-09, "loss": 1.0409, "step": 99870 }, { "epoch": 2.944862155388471, "grad_norm": 3.3818206320352444, "learning_rate": 7.714873930962374e-09, "loss": 1.0364, "step": 99875 }, { "epoch": 2.9450095827804805, "grad_norm": 3.4646597959262486, "learning_rate": 7.673687133165492e-09, "loss": 1.0794, "step": 99880 }, { "epoch": 2.94515701017249, "grad_norm": 3.556985656583086, "learning_rate": 7.632610457593697e-09, "loss": 1.0548, "step": 99885 }, { "epoch": 2.9453044375644994, "grad_norm": 3.741938552797068, "learning_rate": 7.591643905455609e-09, "loss": 1.048, "step": 99890 }, { "epoch": 2.945451864956509, "grad_norm": 3.5620126783301465, "learning_rate": 7.550787477956511e-09, "loss": 1.0647, "step": 99895 }, { "epoch": 2.9455992923485184, "grad_norm": 3.5523470631235665, "learning_rate": 7.510041176299193e-09, "loss": 1.0791, "step": 99900 }, { "epoch": 2.945746719740528, "grad_norm": 3.4974922410700517, "learning_rate": 7.469405001682694e-09, "loss": 1.0586, "step": 99905 }, { "epoch": 2.9458941471325373, "grad_norm": 3.462113625219825, "learning_rate": 7.428878955301893e-09, "loss": 1.0398, "step": 99910 }, { "epoch": 2.9460415745245467, "grad_norm": 3.5245074736857047, "learning_rate": 7.3884630383504185e-09, "loss": 0.9972, "step": 99915 }, { "epoch": 2.946189001916556, "grad_norm": 3.5181201980755064, "learning_rate": 7.348157252016902e-09, "loss": 1.0111, "step": 99920 }, { "epoch": 2.9463364293085657, "grad_norm": 3.5394025200462274, "learning_rate": 7.307961597487478e-09, "loss": 1.0997, "step": 99925 }, { "epoch": 2.946483856700575, "grad_norm": 3.43143617219389, "learning_rate": 7.267876075944951e-09, "loss": 1.0446, "step": 99930 }, { "epoch": 2.9466312840925846, "grad_norm": 3.595618750268392, "learning_rate": 7.227900688568795e-09, "loss": 1.0297, "step": 99935 }, { "epoch": 2.946778711484594, "grad_norm": 3.5976177977725503, "learning_rate": 7.188035436535567e-09, "loss": 1.0493, "step": 99940 }, { "epoch": 2.946926138876603, "grad_norm": 3.570701157675143, "learning_rate": 7.14828032101808e-09, "loss": 1.0371, "step": 99945 }, { "epoch": 2.947073566268613, "grad_norm": 3.613340451893353, "learning_rate": 7.108635343186648e-09, "loss": 1.0715, "step": 99950 }, { "epoch": 2.947220993660622, "grad_norm": 3.4036537366634674, "learning_rate": 7.069100504207421e-09, "loss": 1.0101, "step": 99955 }, { "epoch": 2.9473684210526314, "grad_norm": 3.789425446482867, "learning_rate": 7.029675805243635e-09, "loss": 1.073, "step": 99960 }, { "epoch": 2.947515848444641, "grad_norm": 3.4719420992167445, "learning_rate": 6.990361247456029e-09, "loss": 1.0415, "step": 99965 }, { "epoch": 2.9476632758366503, "grad_norm": 3.4909364045721656, "learning_rate": 6.951156832001176e-09, "loss": 1.078, "step": 99970 }, { "epoch": 2.94781070322866, "grad_norm": 3.296231798268392, "learning_rate": 6.912062560032323e-09, "loss": 1.0481, "step": 99975 }, { "epoch": 2.9479581306206692, "grad_norm": 3.6287578498299764, "learning_rate": 6.873078432700214e-09, "loss": 1.0564, "step": 99980 }, { "epoch": 2.9481055580126787, "grad_norm": 3.3789789490563638, "learning_rate": 6.834204451152265e-09, "loss": 1.0068, "step": 99985 }, { "epoch": 2.948252985404688, "grad_norm": 3.5482843621606115, "learning_rate": 6.795440616531729e-09, "loss": 1.0153, "step": 99990 }, { "epoch": 2.9484004127966976, "grad_norm": 3.5443181810024855, "learning_rate": 6.7567869299797746e-09, "loss": 1.0485, "step": 99995 }, { "epoch": 2.948547840188707, "grad_norm": 3.2777703716961404, "learning_rate": 6.718243392633827e-09, "loss": 1.0383, "step": 100000 }, { "epoch": 2.948547840188707, "eval_loss": 1.0765314102172852, "eval_runtime": 4.1495, "eval_samples_per_second": 95.432, "eval_steps_per_second": 3.133, "step": 100000 }, { "epoch": 2.9486952675807165, "grad_norm": 3.4948417158823357, "learning_rate": 6.679810005627979e-09, "loss": 1.043, "step": 100005 }, { "epoch": 2.948842694972726, "grad_norm": 3.600119706823644, "learning_rate": 6.6414867700929915e-09, "loss": 1.0309, "step": 100010 }, { "epoch": 2.9489901223647355, "grad_norm": 3.610978808012064, "learning_rate": 6.603273687156713e-09, "loss": 1.0429, "step": 100015 }, { "epoch": 2.949137549756745, "grad_norm": 3.501197183465149, "learning_rate": 6.565170757943661e-09, "loss": 0.99, "step": 100020 }, { "epoch": 2.9492849771487544, "grad_norm": 3.407593063328369, "learning_rate": 6.527177983575022e-09, "loss": 1.0158, "step": 100025 }, { "epoch": 2.949432404540764, "grad_norm": 3.7070810714670377, "learning_rate": 6.489295365168651e-09, "loss": 1.0268, "step": 100030 }, { "epoch": 2.9495798319327733, "grad_norm": 3.451935967527109, "learning_rate": 6.451522903839491e-09, "loss": 1.0186, "step": 100035 }, { "epoch": 2.9497272593247823, "grad_norm": 3.46821694053069, "learning_rate": 6.413860600699151e-09, "loss": 1.0062, "step": 100040 }, { "epoch": 2.949874686716792, "grad_norm": 3.6792236508159673, "learning_rate": 6.376308456855495e-09, "loss": 1.0394, "step": 100045 }, { "epoch": 2.950022114108801, "grad_norm": 3.4372832625214316, "learning_rate": 6.33886647341389e-09, "loss": 1.0535, "step": 100050 }, { "epoch": 2.950169541500811, "grad_norm": 3.402907045199684, "learning_rate": 6.301534651475955e-09, "loss": 1.0573, "step": 100055 }, { "epoch": 2.95031696889282, "grad_norm": 3.6253849729394325, "learning_rate": 6.2643129921399755e-09, "loss": 1.0512, "step": 100060 }, { "epoch": 2.9504643962848296, "grad_norm": 3.388826334390734, "learning_rate": 6.227201496501328e-09, "loss": 0.9984, "step": 100065 }, { "epoch": 2.950611823676839, "grad_norm": 3.5920048804311344, "learning_rate": 6.190200165652471e-09, "loss": 1.0673, "step": 100070 }, { "epoch": 2.9507592510688485, "grad_norm": 3.351661774350272, "learning_rate": 6.153309000682117e-09, "loss": 1.0172, "step": 100075 }, { "epoch": 2.950906678460858, "grad_norm": 3.387672706733197, "learning_rate": 6.1165280026756476e-09, "loss": 1.0027, "step": 100080 }, { "epoch": 2.9510541058528674, "grad_norm": 3.4475337896001497, "learning_rate": 6.079857172715114e-09, "loss": 1.0765, "step": 100085 }, { "epoch": 2.951201533244877, "grad_norm": 3.5013508744043906, "learning_rate": 6.04329651188007e-09, "loss": 1.0151, "step": 100090 }, { "epoch": 2.9513489606368863, "grad_norm": 3.5418083947908467, "learning_rate": 6.00684602124632e-09, "loss": 1.0628, "step": 100095 }, { "epoch": 2.951496388028896, "grad_norm": 3.687777904422509, "learning_rate": 5.970505701885925e-09, "loss": 1.075, "step": 100100 }, { "epoch": 2.9516438154209053, "grad_norm": 3.518101612660018, "learning_rate": 5.934275554868862e-09, "loss": 1.063, "step": 100105 }, { "epoch": 2.9517912428129147, "grad_norm": 3.4927291364291575, "learning_rate": 5.898155581260945e-09, "loss": 1.0522, "step": 100110 }, { "epoch": 2.951938670204924, "grad_norm": 3.4022603165548566, "learning_rate": 5.862145782125489e-09, "loss": 1.0204, "step": 100115 }, { "epoch": 2.9520860975969336, "grad_norm": 3.460368415103995, "learning_rate": 5.826246158521231e-09, "loss": 1.0319, "step": 100120 }, { "epoch": 2.952233524988943, "grad_norm": 3.520932548068529, "learning_rate": 5.79045671150566e-09, "loss": 1.0423, "step": 100125 }, { "epoch": 2.9523809523809526, "grad_norm": 3.5038122646332046, "learning_rate": 5.754777442130848e-09, "loss": 1.049, "step": 100130 }, { "epoch": 2.9525283797729616, "grad_norm": 3.3373801838435364, "learning_rate": 5.719208351447208e-09, "loss": 1.0224, "step": 100135 }, { "epoch": 2.9526758071649715, "grad_norm": 3.4764187781761384, "learning_rate": 5.683749440501401e-09, "loss": 1.0373, "step": 100140 }, { "epoch": 2.9528232345569805, "grad_norm": 3.619340072995369, "learning_rate": 5.648400710337176e-09, "loss": 1.0239, "step": 100145 }, { "epoch": 2.9529706619489904, "grad_norm": 3.474081559604156, "learning_rate": 5.613162161994117e-09, "loss": 0.9948, "step": 100150 }, { "epoch": 2.9531180893409994, "grad_norm": 3.561685016308074, "learning_rate": 5.578033796509313e-09, "loss": 1.0414, "step": 100155 }, { "epoch": 2.953265516733009, "grad_norm": 3.602218518868239, "learning_rate": 5.543015614916519e-09, "loss": 1.0606, "step": 100160 }, { "epoch": 2.9534129441250183, "grad_norm": 3.493781288045825, "learning_rate": 5.508107618246577e-09, "loss": 1.0816, "step": 100165 }, { "epoch": 2.9535603715170278, "grad_norm": 3.4790287663910653, "learning_rate": 5.47330980752575e-09, "loss": 1.028, "step": 100170 }, { "epoch": 2.9537077989090372, "grad_norm": 3.542848878890699, "learning_rate": 5.438622183779052e-09, "loss": 1.0505, "step": 100175 }, { "epoch": 2.9538552263010467, "grad_norm": 3.554824340064318, "learning_rate": 5.4040447480264986e-09, "loss": 1.0723, "step": 100180 }, { "epoch": 2.954002653693056, "grad_norm": 3.447940998355036, "learning_rate": 5.369577501286027e-09, "loss": 0.9984, "step": 100185 }, { "epoch": 2.9541500810850656, "grad_norm": 3.647475045515232, "learning_rate": 5.335220444571409e-09, "loss": 1.0129, "step": 100190 }, { "epoch": 2.954297508477075, "grad_norm": 3.4400133036644283, "learning_rate": 5.30097357889392e-09, "loss": 1.0276, "step": 100195 }, { "epoch": 2.9544449358690845, "grad_norm": 3.6326779277621775, "learning_rate": 5.266836905261502e-09, "loss": 1.0936, "step": 100200 }, { "epoch": 2.954592363261094, "grad_norm": 3.381290205243573, "learning_rate": 5.2328104246783525e-09, "loss": 1.0386, "step": 100205 }, { "epoch": 2.9547397906531034, "grad_norm": 3.5791356940077312, "learning_rate": 5.198894138145754e-09, "loss": 1.0329, "step": 100210 }, { "epoch": 2.954887218045113, "grad_norm": 3.4988095269191732, "learning_rate": 5.165088046661659e-09, "loss": 1.0383, "step": 100215 }, { "epoch": 2.9550346454371224, "grad_norm": 3.4656625038765756, "learning_rate": 5.13139215122152e-09, "loss": 1.0222, "step": 100220 }, { "epoch": 2.955182072829132, "grad_norm": 3.3639468953313902, "learning_rate": 5.0978064528157955e-09, "loss": 1.047, "step": 100225 }, { "epoch": 2.955329500221141, "grad_norm": 3.6277886205536207, "learning_rate": 5.064330952433693e-09, "loss": 1.014, "step": 100230 }, { "epoch": 2.9554769276131507, "grad_norm": 3.3646570678173977, "learning_rate": 5.030965651059844e-09, "loss": 1.0786, "step": 100235 }, { "epoch": 2.9556243550051597, "grad_norm": 3.475931788034829, "learning_rate": 4.99771054967596e-09, "loss": 1.0065, "step": 100240 }, { "epoch": 2.9557717823971696, "grad_norm": 3.5019618372646097, "learning_rate": 4.964565649260844e-09, "loss": 1.0072, "step": 100245 }, { "epoch": 2.9559192097891787, "grad_norm": 3.348263431487843, "learning_rate": 4.931530950789964e-09, "loss": 1.0393, "step": 100250 }, { "epoch": 2.956066637181188, "grad_norm": 3.330806553808725, "learning_rate": 4.898606455235044e-09, "loss": 1.0174, "step": 100255 }, { "epoch": 2.9562140645731976, "grad_norm": 3.528180172875083, "learning_rate": 4.865792163565308e-09, "loss": 1.0461, "step": 100260 }, { "epoch": 2.956361491965207, "grad_norm": 3.5398535637566986, "learning_rate": 4.833088076745817e-09, "loss": 0.9913, "step": 100265 }, { "epoch": 2.9565089193572165, "grad_norm": 3.5839988785067574, "learning_rate": 4.800494195739552e-09, "loss": 1.0355, "step": 100270 }, { "epoch": 2.956656346749226, "grad_norm": 3.420167178365081, "learning_rate": 4.768010521504912e-09, "loss": 1.0392, "step": 100275 }, { "epoch": 2.9568037741412354, "grad_norm": 3.4960862150800724, "learning_rate": 4.735637054998631e-09, "loss": 1.0551, "step": 100280 }, { "epoch": 2.956951201533245, "grad_norm": 3.46301855902831, "learning_rate": 4.70337379717245e-09, "loss": 1.0061, "step": 100285 }, { "epoch": 2.9570986289252543, "grad_norm": 3.5791423876504846, "learning_rate": 4.67122074897644e-09, "loss": 1.0337, "step": 100290 }, { "epoch": 2.957246056317264, "grad_norm": 3.4106890658539086, "learning_rate": 4.639177911356512e-09, "loss": 1.0165, "step": 100295 }, { "epoch": 2.9573934837092732, "grad_norm": 3.451216836142393, "learning_rate": 4.607245285255246e-09, "loss": 1.0133, "step": 100300 }, { "epoch": 2.9575409111012827, "grad_norm": 3.4531684814211157, "learning_rate": 4.575422871612722e-09, "loss": 1.0212, "step": 100305 }, { "epoch": 2.957688338493292, "grad_norm": 3.5349809085441857, "learning_rate": 4.543710671365276e-09, "loss": 1.0486, "step": 100310 }, { "epoch": 2.9578357658853016, "grad_norm": 3.624256624534922, "learning_rate": 4.512108685445909e-09, "loss": 1.0915, "step": 100315 }, { "epoch": 2.957983193277311, "grad_norm": 3.463462255652336, "learning_rate": 4.480616914784713e-09, "loss": 1.0296, "step": 100320 }, { "epoch": 2.95813062066932, "grad_norm": 3.4082848324017196, "learning_rate": 4.449235360308446e-09, "loss": 1.0282, "step": 100325 }, { "epoch": 2.95827804806133, "grad_norm": 3.6435399570246254, "learning_rate": 4.417964022940118e-09, "loss": 1.0703, "step": 100330 }, { "epoch": 2.958425475453339, "grad_norm": 3.4218844430338264, "learning_rate": 4.386802903600246e-09, "loss": 1.0222, "step": 100335 }, { "epoch": 2.958572902845349, "grad_norm": 3.6677685711636587, "learning_rate": 4.3557520032060096e-09, "loss": 0.9702, "step": 100340 }, { "epoch": 2.958720330237358, "grad_norm": 3.433281434454042, "learning_rate": 4.32481132267043e-09, "loss": 0.989, "step": 100345 }, { "epoch": 2.9588677576293674, "grad_norm": 3.6462912448532943, "learning_rate": 4.293980862904861e-09, "loss": 1.0541, "step": 100350 }, { "epoch": 2.959015185021377, "grad_norm": 3.5329016542076106, "learning_rate": 4.263260624815663e-09, "loss": 1.0688, "step": 100355 }, { "epoch": 2.9591626124133863, "grad_norm": 3.370957257554723, "learning_rate": 4.232650609307526e-09, "loss": 1.0388, "step": 100360 }, { "epoch": 2.9593100398053958, "grad_norm": 3.5044414087843903, "learning_rate": 4.202150817280564e-09, "loss": 1.0025, "step": 100365 }, { "epoch": 2.959457467197405, "grad_norm": 3.59725755567513, "learning_rate": 4.1717612496328105e-09, "loss": 1.0619, "step": 100370 }, { "epoch": 2.9596048945894147, "grad_norm": 3.5526164744583673, "learning_rate": 4.141481907258132e-09, "loss": 1.0445, "step": 100375 }, { "epoch": 2.959752321981424, "grad_norm": 3.413683423910017, "learning_rate": 4.111312791047484e-09, "loss": 0.9889, "step": 100380 }, { "epoch": 2.9598997493734336, "grad_norm": 3.677025087962548, "learning_rate": 4.08125390188932e-09, "loss": 1.0404, "step": 100385 }, { "epoch": 2.960047176765443, "grad_norm": 3.4392236024074405, "learning_rate": 4.051305240667102e-09, "loss": 1.0273, "step": 100390 }, { "epoch": 2.9601946041574525, "grad_norm": 3.5367870099656447, "learning_rate": 4.021466808263041e-09, "loss": 1.0552, "step": 100395 }, { "epoch": 2.960342031549462, "grad_norm": 3.502529097184374, "learning_rate": 3.991738605554351e-09, "loss": 1.0502, "step": 100400 }, { "epoch": 2.9604894589414714, "grad_norm": 3.3079579136584143, "learning_rate": 3.962120633416583e-09, "loss": 1.0552, "step": 100405 }, { "epoch": 2.960636886333481, "grad_norm": 3.384427528357557, "learning_rate": 3.932612892721121e-09, "loss": 1.0337, "step": 100410 }, { "epoch": 2.9607843137254903, "grad_norm": 3.5838716283914196, "learning_rate": 3.9032153843356066e-09, "loss": 1.0567, "step": 100415 }, { "epoch": 2.9609317411175, "grad_norm": 3.438541697185255, "learning_rate": 3.873928109126012e-09, "loss": 1.0085, "step": 100420 }, { "epoch": 2.9610791685095093, "grad_norm": 3.5631908692753975, "learning_rate": 3.844751067953317e-09, "loss": 1.0269, "step": 100425 }, { "epoch": 2.9612265959015183, "grad_norm": 3.4394660886339556, "learning_rate": 3.8156842616768326e-09, "loss": 1.0758, "step": 100430 }, { "epoch": 2.961374023293528, "grad_norm": 3.567651376463658, "learning_rate": 3.786727691151293e-09, "loss": 0.9995, "step": 100435 }, { "epoch": 2.961521450685537, "grad_norm": 3.555420525011489, "learning_rate": 3.7578813572289314e-09, "loss": 1.0637, "step": 100440 }, { "epoch": 2.961668878077547, "grad_norm": 3.45836557255344, "learning_rate": 3.72914526075907e-09, "loss": 1.0479, "step": 100445 }, { "epoch": 2.961816305469556, "grad_norm": 3.770178804058305, "learning_rate": 3.7005194025864496e-09, "loss": 1.0386, "step": 100450 }, { "epoch": 2.9619637328615656, "grad_norm": 3.60296842880204, "learning_rate": 3.6720037835541453e-09, "loss": 1.0705, "step": 100455 }, { "epoch": 2.962111160253575, "grad_norm": 3.378448658494064, "learning_rate": 3.6435984045010694e-09, "loss": 1.0744, "step": 100460 }, { "epoch": 2.9622585876455845, "grad_norm": 3.542033720790186, "learning_rate": 3.6153032662628038e-09, "loss": 1.037, "step": 100465 }, { "epoch": 2.962406015037594, "grad_norm": 3.5591109898696525, "learning_rate": 3.587118369672432e-09, "loss": 1.0558, "step": 100470 }, { "epoch": 2.9625534424296034, "grad_norm": 3.389494145290893, "learning_rate": 3.5590437155584576e-09, "loss": 1.0288, "step": 100475 }, { "epoch": 2.962700869821613, "grad_norm": 3.5099889022671067, "learning_rate": 3.531079304748136e-09, "loss": 1.032, "step": 100480 }, { "epoch": 2.9628482972136223, "grad_norm": 3.5269038164861377, "learning_rate": 3.503225138063726e-09, "loss": 1.0444, "step": 100485 }, { "epoch": 2.9629957246056318, "grad_norm": 3.4515389680485464, "learning_rate": 3.4754812163249885e-09, "loss": 1.0127, "step": 100490 }, { "epoch": 2.9631431519976412, "grad_norm": 3.607645506808224, "learning_rate": 3.4478475403483535e-09, "loss": 1.0295, "step": 100495 }, { "epoch": 2.9632905793896507, "grad_norm": 3.467137094269231, "learning_rate": 3.4203241109465042e-09, "loss": 1.0245, "step": 100500 }, { "epoch": 2.9632905793896507, "eval_loss": 1.0764837265014648, "eval_runtime": 4.2934, "eval_samples_per_second": 92.235, "eval_steps_per_second": 3.028, "step": 100500 }, { "epoch": 2.96343800678166, "grad_norm": 3.5276562539304686, "learning_rate": 3.392910928930043e-09, "loss": 1.0718, "step": 100505 }, { "epoch": 2.9635854341736696, "grad_norm": 3.39133693943257, "learning_rate": 3.3656079951054074e-09, "loss": 1.024, "step": 100510 }, { "epoch": 2.963732861565679, "grad_norm": 3.4617031139573022, "learning_rate": 3.3384153102761217e-09, "loss": 1.0548, "step": 100515 }, { "epoch": 2.9638802889576885, "grad_norm": 3.4611358022464263, "learning_rate": 3.3113328752419624e-09, "loss": 1.0483, "step": 100520 }, { "epoch": 2.9640277163496975, "grad_norm": 3.4503677665848214, "learning_rate": 3.2843606908002087e-09, "loss": 1.0713, "step": 100525 }, { "epoch": 2.9641751437417074, "grad_norm": 3.534013439490604, "learning_rate": 3.257498757744393e-09, "loss": 1.0452, "step": 100530 }, { "epoch": 2.9643225711337164, "grad_norm": 3.5537902608645324, "learning_rate": 3.2307470768651315e-09, "loss": 1.0167, "step": 100535 }, { "epoch": 2.9644699985257263, "grad_norm": 3.4896519172476657, "learning_rate": 3.204105648949296e-09, "loss": 1.0301, "step": 100540 }, { "epoch": 2.9646174259177354, "grad_norm": 3.5851927619631523, "learning_rate": 3.1775744747808422e-09, "loss": 1.0359, "step": 100545 }, { "epoch": 2.964764853309745, "grad_norm": 3.4225388143768414, "learning_rate": 3.1511535551412283e-09, "loss": 1.0709, "step": 100550 }, { "epoch": 2.9649122807017543, "grad_norm": 3.3665344619996502, "learning_rate": 3.124842890806917e-09, "loss": 1.051, "step": 100555 }, { "epoch": 2.9650597080937637, "grad_norm": 3.328333602206585, "learning_rate": 3.098642482552705e-09, "loss": 1.043, "step": 100560 }, { "epoch": 2.965207135485773, "grad_norm": 3.5484730255685712, "learning_rate": 3.0725523311496416e-09, "loss": 1.0417, "step": 100565 }, { "epoch": 2.9653545628777827, "grad_norm": 3.5626553208058995, "learning_rate": 3.0465724373650305e-09, "loss": 1.0417, "step": 100570 }, { "epoch": 2.965501990269792, "grad_norm": 3.651307591299079, "learning_rate": 3.020702801963676e-09, "loss": 1.0293, "step": 100575 }, { "epoch": 2.9656494176618016, "grad_norm": 3.548880360637863, "learning_rate": 2.9949434257070527e-09, "loss": 1.0648, "step": 100580 }, { "epoch": 2.965796845053811, "grad_norm": 3.5840207676780746, "learning_rate": 2.9692943093524716e-09, "loss": 1.0104, "step": 100585 }, { "epoch": 2.9659442724458205, "grad_norm": 3.544721221918615, "learning_rate": 2.9437554536551614e-09, "loss": 1.0507, "step": 100590 }, { "epoch": 2.96609169983783, "grad_norm": 3.580755197691382, "learning_rate": 2.9183268593666045e-09, "loss": 1.0522, "step": 100595 }, { "epoch": 2.9662391272298394, "grad_norm": 3.4517219750214245, "learning_rate": 2.8930085272349527e-09, "loss": 1.0245, "step": 100600 }, { "epoch": 2.966386554621849, "grad_norm": 3.635834762444177, "learning_rate": 2.8678004580050264e-09, "loss": 1.0734, "step": 100605 }, { "epoch": 2.9665339820138583, "grad_norm": 3.5341097759924267, "learning_rate": 2.842702652419149e-09, "loss": 1.0523, "step": 100610 }, { "epoch": 2.9666814094058678, "grad_norm": 3.426969007298053, "learning_rate": 2.81771511121548e-09, "loss": 1.0168, "step": 100615 }, { "epoch": 2.966828836797877, "grad_norm": 3.3690333709818003, "learning_rate": 2.79283783512968e-09, "loss": 1.0534, "step": 100620 }, { "epoch": 2.9669762641898867, "grad_norm": 3.519733699653025, "learning_rate": 2.7680708248928323e-09, "loss": 1.0372, "step": 100625 }, { "epoch": 2.9671236915818957, "grad_norm": 3.55506729731669, "learning_rate": 2.7434140812347686e-09, "loss": 1.0431, "step": 100630 }, { "epoch": 2.9672711189739056, "grad_norm": 3.54054518180831, "learning_rate": 2.7188676048807426e-09, "loss": 1.0451, "step": 100635 }, { "epoch": 2.9674185463659146, "grad_norm": 3.388255414215881, "learning_rate": 2.694431396553093e-09, "loss": 1.0217, "step": 100640 }, { "epoch": 2.967565973757924, "grad_norm": 3.4296655702575256, "learning_rate": 2.670105456970412e-09, "loss": 1.023, "step": 100645 }, { "epoch": 2.9677134011499335, "grad_norm": 3.4470003815244765, "learning_rate": 2.645889786848793e-09, "loss": 1.0109, "step": 100650 }, { "epoch": 2.967860828541943, "grad_norm": 3.4431555600655765, "learning_rate": 2.6217843869014157e-09, "loss": 0.9905, "step": 100655 }, { "epoch": 2.9680082559339525, "grad_norm": 3.5975607136311463, "learning_rate": 2.59778925783688e-09, "loss": 1.0671, "step": 100660 }, { "epoch": 2.968155683325962, "grad_norm": 3.4958797863491795, "learning_rate": 2.573904400361288e-09, "loss": 1.0108, "step": 100665 }, { "epoch": 2.9683031107179714, "grad_norm": 3.5271297358900866, "learning_rate": 2.5501298151778263e-09, "loss": 1.0589, "step": 100670 }, { "epoch": 2.968450538109981, "grad_norm": 3.45782845562913, "learning_rate": 2.5264655029859364e-09, "loss": 1.0192, "step": 100675 }, { "epoch": 2.9685979655019903, "grad_norm": 3.491675548403731, "learning_rate": 2.5029114644817273e-09, "loss": 1.0215, "step": 100680 }, { "epoch": 2.9687453928939997, "grad_norm": 3.543145060386721, "learning_rate": 2.479467700358812e-09, "loss": 1.022, "step": 100685 }, { "epoch": 2.968892820286009, "grad_norm": 3.5247038087713642, "learning_rate": 2.4561342113066375e-09, "loss": 1.0539, "step": 100690 }, { "epoch": 2.9690402476780187, "grad_norm": 3.370882595611688, "learning_rate": 2.4329109980121555e-09, "loss": 1.0128, "step": 100695 }, { "epoch": 2.969187675070028, "grad_norm": 3.4940496140424764, "learning_rate": 2.409798061158569e-09, "loss": 1.0268, "step": 100700 }, { "epoch": 2.9693351024620376, "grad_norm": 3.391706830483707, "learning_rate": 2.386795401425751e-09, "loss": 1.0478, "step": 100705 }, { "epoch": 2.969482529854047, "grad_norm": 3.8599465509585467, "learning_rate": 2.3639030194906597e-09, "loss": 1.0739, "step": 100710 }, { "epoch": 2.969629957246056, "grad_norm": 3.645072374724694, "learning_rate": 2.3411209160273396e-09, "loss": 1.02, "step": 100715 }, { "epoch": 2.969777384638066, "grad_norm": 3.6933918599864217, "learning_rate": 2.3184490917060876e-09, "loss": 1.072, "step": 100720 }, { "epoch": 2.969924812030075, "grad_norm": 3.510620898606194, "learning_rate": 2.2958875471934536e-09, "loss": 1.0479, "step": 100725 }, { "epoch": 2.970072239422085, "grad_norm": 3.5703582221237395, "learning_rate": 2.273436283153907e-09, "loss": 1.0534, "step": 100730 }, { "epoch": 2.970219666814094, "grad_norm": 3.6273916082656794, "learning_rate": 2.2510953002481682e-09, "loss": 1.0526, "step": 100735 }, { "epoch": 2.9703670942061033, "grad_norm": 3.6032035751256446, "learning_rate": 2.2288645991332122e-09, "loss": 1.0259, "step": 100740 }, { "epoch": 2.970514521598113, "grad_norm": 3.5669274649127907, "learning_rate": 2.206744180463516e-09, "loss": 1.0532, "step": 100745 }, { "epoch": 2.9706619489901223, "grad_norm": 3.4510878254811965, "learning_rate": 2.184734044889808e-09, "loss": 1.0692, "step": 100750 }, { "epoch": 2.9708093763821317, "grad_norm": 3.4767764315046104, "learning_rate": 2.162834193059904e-09, "loss": 1.0308, "step": 100755 }, { "epoch": 2.970956803774141, "grad_norm": 3.530319933777826, "learning_rate": 2.1410446256182883e-09, "loss": 1.0376, "step": 100760 }, { "epoch": 2.9711042311661506, "grad_norm": 3.5887215814886275, "learning_rate": 2.119365343205698e-09, "loss": 1.064, "step": 100765 }, { "epoch": 2.97125165855816, "grad_norm": 3.54002671074105, "learning_rate": 2.0977963464607895e-09, "loss": 1.0453, "step": 100770 }, { "epoch": 2.9713990859501695, "grad_norm": 3.4682319679146403, "learning_rate": 2.0763376360176387e-09, "loss": 0.996, "step": 100775 }, { "epoch": 2.971546513342179, "grad_norm": 3.627245958680893, "learning_rate": 2.054989212507824e-09, "loss": 0.9874, "step": 100780 }, { "epoch": 2.9716939407341885, "grad_norm": 3.5178714538121936, "learning_rate": 2.0337510765600087e-09, "loss": 1.0341, "step": 100785 }, { "epoch": 2.971841368126198, "grad_norm": 3.5791170172590934, "learning_rate": 2.0126232287986936e-09, "loss": 1.0736, "step": 100790 }, { "epoch": 2.9719887955182074, "grad_norm": 3.370104172484074, "learning_rate": 1.991605669845881e-09, "loss": 1.0231, "step": 100795 }, { "epoch": 2.972136222910217, "grad_norm": 3.5929219311080436, "learning_rate": 1.9706984003194093e-09, "loss": 1.0443, "step": 100800 }, { "epoch": 2.9722836503022263, "grad_norm": 3.313424597514908, "learning_rate": 1.9499014208350374e-09, "loss": 1.0141, "step": 100805 }, { "epoch": 2.9724310776942353, "grad_norm": 3.4817365626915047, "learning_rate": 1.9292147320047748e-09, "loss": 1.0425, "step": 100810 }, { "epoch": 2.972578505086245, "grad_norm": 3.436917587942818, "learning_rate": 1.9086383344373016e-09, "loss": 1.0323, "step": 100815 }, { "epoch": 2.9727259324782542, "grad_norm": 3.506235848713338, "learning_rate": 1.8881722287379667e-09, "loss": 1.0443, "step": 100820 }, { "epoch": 2.972873359870264, "grad_norm": 3.454461478236324, "learning_rate": 1.8678164155087886e-09, "loss": 1.0285, "step": 100825 }, { "epoch": 2.973020787262273, "grad_norm": 3.545131275116059, "learning_rate": 1.8475708953492876e-09, "loss": 1.051, "step": 100830 }, { "epoch": 2.9731682146542826, "grad_norm": 3.6287001760826656, "learning_rate": 1.8274356688552374e-09, "loss": 1.0672, "step": 100835 }, { "epoch": 2.973315642046292, "grad_norm": 3.65402334544604, "learning_rate": 1.807410736618248e-09, "loss": 1.0408, "step": 100840 }, { "epoch": 2.9734630694383015, "grad_norm": 3.5824034142169037, "learning_rate": 1.7874960992286804e-09, "loss": 1.0917, "step": 100845 }, { "epoch": 2.973610496830311, "grad_norm": 3.3325865536501285, "learning_rate": 1.7676917572714834e-09, "loss": 1.0495, "step": 100850 }, { "epoch": 2.9737579242223204, "grad_norm": 3.57268422319445, "learning_rate": 1.747997711330357e-09, "loss": 1.0144, "step": 100855 }, { "epoch": 2.97390535161433, "grad_norm": 3.664931046210561, "learning_rate": 1.7284139619844209e-09, "loss": 1.0256, "step": 100860 }, { "epoch": 2.9740527790063394, "grad_norm": 3.628349210031943, "learning_rate": 1.7089405098102972e-09, "loss": 1.0489, "step": 100865 }, { "epoch": 2.974200206398349, "grad_norm": 3.461529998889144, "learning_rate": 1.689577355380445e-09, "loss": 1.0324, "step": 100870 }, { "epoch": 2.9743476337903583, "grad_norm": 3.5034730594138037, "learning_rate": 1.6703244992648247e-09, "loss": 1.0034, "step": 100875 }, { "epoch": 2.9744950611823677, "grad_norm": 3.3988189201506795, "learning_rate": 1.6511819420300667e-09, "loss": 1.0295, "step": 100880 }, { "epoch": 2.974642488574377, "grad_norm": 3.3953343718597724, "learning_rate": 1.63214968423947e-09, "loss": 0.9991, "step": 100885 }, { "epoch": 2.9747899159663866, "grad_norm": 3.3648827270691273, "learning_rate": 1.6132277264530037e-09, "loss": 1.0337, "step": 100890 }, { "epoch": 2.974937343358396, "grad_norm": 3.4812205304424793, "learning_rate": 1.5944160692273058e-09, "loss": 1.0245, "step": 100895 }, { "epoch": 2.9750847707504056, "grad_norm": 3.503213724841743, "learning_rate": 1.575714713116516e-09, "loss": 1.0849, "step": 100900 }, { "epoch": 2.975232198142415, "grad_norm": 3.4711925346381314, "learning_rate": 1.5571236586706116e-09, "loss": 1.0483, "step": 100905 }, { "epoch": 2.9753796255344245, "grad_norm": 3.528449523601122, "learning_rate": 1.5386429064362384e-09, "loss": 1.022, "step": 100910 }, { "epoch": 2.9755270529264335, "grad_norm": 3.573059986015894, "learning_rate": 1.5202724569579607e-09, "loss": 1.0117, "step": 100915 }, { "epoch": 2.9756744803184434, "grad_norm": 3.552657371377489, "learning_rate": 1.5020123107757633e-09, "loss": 1.0548, "step": 100920 }, { "epoch": 2.9758219077104524, "grad_norm": 3.3630826602837405, "learning_rate": 1.483862468427133e-09, "loss": 1.0149, "step": 100925 }, { "epoch": 2.9759693351024623, "grad_norm": 3.5245314214975245, "learning_rate": 1.465822930446642e-09, "loss": 1.0398, "step": 100930 }, { "epoch": 2.9761167624944713, "grad_norm": 3.327489610308921, "learning_rate": 1.447893697364283e-09, "loss": 0.9928, "step": 100935 }, { "epoch": 2.976264189886481, "grad_norm": 3.3849998424503314, "learning_rate": 1.4300747697079676e-09, "loss": 0.9859, "step": 100940 }, { "epoch": 2.9764116172784902, "grad_norm": 3.5712942554553804, "learning_rate": 1.4123661480022754e-09, "loss": 0.9936, "step": 100945 }, { "epoch": 2.9765590446704997, "grad_norm": 3.434629656810135, "learning_rate": 1.3947678327684564e-09, "loss": 1.0726, "step": 100950 }, { "epoch": 2.976706472062509, "grad_norm": 3.743987040497581, "learning_rate": 1.3772798245235973e-09, "loss": 1.0445, "step": 100955 }, { "epoch": 2.9768538994545186, "grad_norm": 3.6454482258513132, "learning_rate": 1.3599021237831188e-09, "loss": 1.0695, "step": 100960 }, { "epoch": 2.977001326846528, "grad_norm": 3.389700803505904, "learning_rate": 1.3426347310574461e-09, "loss": 1.032, "step": 100965 }, { "epoch": 2.9771487542385375, "grad_norm": 3.500316049048491, "learning_rate": 1.3254776468557551e-09, "loss": 1.0028, "step": 100970 }, { "epoch": 2.977296181630547, "grad_norm": 3.462648327828726, "learning_rate": 1.3084308716822258e-09, "loss": 1.0497, "step": 100975 }, { "epoch": 2.9774436090225564, "grad_norm": 3.542427349533616, "learning_rate": 1.2914944060389566e-09, "loss": 1.0223, "step": 100980 }, { "epoch": 2.977591036414566, "grad_norm": 3.415061898025561, "learning_rate": 1.2746682504238827e-09, "loss": 1.0492, "step": 100985 }, { "epoch": 2.9777384638065754, "grad_norm": 3.35356984731704, "learning_rate": 1.2579524053320246e-09, "loss": 1.0282, "step": 100990 }, { "epoch": 2.977885891198585, "grad_norm": 3.5296689114395803, "learning_rate": 1.241346871255905e-09, "loss": 1.0307, "step": 100995 }, { "epoch": 2.9780333185905943, "grad_norm": 3.4642361928255836, "learning_rate": 1.224851648683467e-09, "loss": 1.0212, "step": 101000 }, { "epoch": 2.9780333185905943, "eval_loss": 1.0765130519866943, "eval_runtime": 4.1949, "eval_samples_per_second": 94.4, "eval_steps_per_second": 3.099, "step": 101000 }, { "epoch": 2.9781807459826037, "grad_norm": 3.4780764844811576, "learning_rate": 1.208466738100572e-09, "loss": 1.0177, "step": 101005 }, { "epoch": 2.9783281733746128, "grad_norm": 3.578505302309684, "learning_rate": 1.1921921399893343e-09, "loss": 1.0343, "step": 101010 }, { "epoch": 2.9784756007666227, "grad_norm": 3.501341482963037, "learning_rate": 1.1760278548285374e-09, "loss": 1.0141, "step": 101015 }, { "epoch": 2.9786230281586317, "grad_norm": 3.4322692165815014, "learning_rate": 1.1599738830936346e-09, "loss": 1.0386, "step": 101020 }, { "epoch": 2.9787704555506416, "grad_norm": 3.5124242055636423, "learning_rate": 1.1440302252571643e-09, "loss": 1.0724, "step": 101025 }, { "epoch": 2.9789178829426506, "grad_norm": 3.441267717096939, "learning_rate": 1.128196881788751e-09, "loss": 0.9757, "step": 101030 }, { "epoch": 2.97906531033466, "grad_norm": 3.605028591840654, "learning_rate": 1.1124738531534395e-09, "loss": 1.0721, "step": 101035 }, { "epoch": 2.9792127377266695, "grad_norm": 3.5182178585874224, "learning_rate": 1.096861139814609e-09, "loss": 1.0114, "step": 101040 }, { "epoch": 2.979360165118679, "grad_norm": 3.6112919384262434, "learning_rate": 1.0813587422314753e-09, "loss": 1.0871, "step": 101045 }, { "epoch": 2.9795075925106884, "grad_norm": 3.6427611754999476, "learning_rate": 1.0659666608603402e-09, "loss": 1.0705, "step": 101050 }, { "epoch": 2.979655019902698, "grad_norm": 3.4410974273060737, "learning_rate": 1.0506848961537585e-09, "loss": 1.0303, "step": 101055 }, { "epoch": 2.9798024472947073, "grad_norm": 3.4255610359334896, "learning_rate": 1.0355134485613705e-09, "loss": 1.0165, "step": 101060 }, { "epoch": 2.979949874686717, "grad_norm": 3.518099437282749, "learning_rate": 1.0204523185303183e-09, "loss": 1.0463, "step": 101065 }, { "epoch": 2.9800973020787263, "grad_norm": 3.6946436726446112, "learning_rate": 1.0055015065031647e-09, "loss": 1.0527, "step": 101070 }, { "epoch": 2.9802447294707357, "grad_norm": 3.448413224342889, "learning_rate": 9.906610129199744e-10, "loss": 1.0322, "step": 101075 }, { "epoch": 2.980392156862745, "grad_norm": 3.402023415946032, "learning_rate": 9.759308382174814e-10, "loss": 0.9987, "step": 101080 }, { "epoch": 2.9805395842547546, "grad_norm": 3.342541522760159, "learning_rate": 9.613109828290889e-10, "loss": 1.0905, "step": 101085 }, { "epoch": 2.980687011646764, "grad_norm": 3.454606506925015, "learning_rate": 9.468014471852859e-10, "loss": 1.0602, "step": 101090 }, { "epoch": 2.9808344390387735, "grad_norm": 3.564885357788969, "learning_rate": 9.324022317128144e-10, "loss": 1.0696, "step": 101095 }, { "epoch": 2.980981866430783, "grad_norm": 3.526371490811913, "learning_rate": 9.181133368350858e-10, "loss": 1.0453, "step": 101100 }, { "epoch": 2.981129293822792, "grad_norm": 3.4591675287004926, "learning_rate": 9.039347629730133e-10, "loss": 1.0324, "step": 101105 }, { "epoch": 2.981276721214802, "grad_norm": 3.61466606312567, "learning_rate": 8.898665105437631e-10, "loss": 1.0631, "step": 101110 }, { "epoch": 2.981424148606811, "grad_norm": 3.4959713738858214, "learning_rate": 8.75908579961171e-10, "loss": 1.0306, "step": 101115 }, { "epoch": 2.981571575998821, "grad_norm": 3.3763188408032794, "learning_rate": 8.620609716361583e-10, "loss": 1.0512, "step": 101120 }, { "epoch": 2.98171900339083, "grad_norm": 3.7657892330093325, "learning_rate": 8.483236859758991e-10, "loss": 1.0523, "step": 101125 }, { "epoch": 2.9818664307828393, "grad_norm": 3.4594441903672095, "learning_rate": 8.3469672338507e-10, "loss": 1.0465, "step": 101130 }, { "epoch": 2.9820138581748488, "grad_norm": 3.4597460711912955, "learning_rate": 8.211800842641837e-10, "loss": 1.0685, "step": 101135 }, { "epoch": 2.982161285566858, "grad_norm": 3.478891287082443, "learning_rate": 8.077737690112552e-10, "loss": 1.0099, "step": 101140 }, { "epoch": 2.9823087129588677, "grad_norm": 3.5790413855179763, "learning_rate": 7.944777780205525e-10, "loss": 1.0418, "step": 101145 }, { "epoch": 2.982456140350877, "grad_norm": 3.5825051688671548, "learning_rate": 7.812921116834293e-10, "loss": 1.0199, "step": 101150 }, { "epoch": 2.9826035677428866, "grad_norm": 3.72426110666006, "learning_rate": 7.682167703883247e-10, "loss": 1.0118, "step": 101155 }, { "epoch": 2.982750995134896, "grad_norm": 3.2327017015211887, "learning_rate": 7.552517545195148e-10, "loss": 1.0427, "step": 101160 }, { "epoch": 2.9828984225269055, "grad_norm": 3.491633347366196, "learning_rate": 7.423970644583611e-10, "loss": 1.0209, "step": 101165 }, { "epoch": 2.983045849918915, "grad_norm": 3.453507886484639, "learning_rate": 7.296527005833109e-10, "loss": 1.0415, "step": 101170 }, { "epoch": 2.9831932773109244, "grad_norm": 3.5454516722266076, "learning_rate": 7.170186632698972e-10, "loss": 1.0758, "step": 101175 }, { "epoch": 2.983340704702934, "grad_norm": 3.6460624808611453, "learning_rate": 7.044949528890732e-10, "loss": 1.0672, "step": 101180 }, { "epoch": 2.9834881320949433, "grad_norm": 3.5711970795526264, "learning_rate": 6.920815698097105e-10, "loss": 1.0506, "step": 101185 }, { "epoch": 2.983635559486953, "grad_norm": 3.273061716783228, "learning_rate": 6.7977851439735e-10, "loss": 1.0261, "step": 101190 }, { "epoch": 2.9837829868789623, "grad_norm": 3.4808884364076214, "learning_rate": 6.675857870137858e-10, "loss": 1.0396, "step": 101195 }, { "epoch": 2.9839304142709713, "grad_norm": 3.5016438609583447, "learning_rate": 6.55503388017481e-10, "loss": 1.0476, "step": 101200 }, { "epoch": 2.984077841662981, "grad_norm": 3.5703386916353916, "learning_rate": 6.435313177644009e-10, "loss": 1.0052, "step": 101205 }, { "epoch": 2.98422526905499, "grad_norm": 3.4695232862227834, "learning_rate": 6.316695766071801e-10, "loss": 1.0615, "step": 101210 }, { "epoch": 2.984372696447, "grad_norm": 3.4624847737998463, "learning_rate": 6.199181648938734e-10, "loss": 1.0163, "step": 101215 }, { "epoch": 2.984520123839009, "grad_norm": 3.4876069721782663, "learning_rate": 6.082770829708706e-10, "loss": 1.0602, "step": 101220 }, { "epoch": 2.9846675512310186, "grad_norm": 3.532602947885916, "learning_rate": 5.967463311808141e-10, "loss": 1.0071, "step": 101225 }, { "epoch": 2.984814978623028, "grad_norm": 3.4541837850505153, "learning_rate": 5.85325909863016e-10, "loss": 1.0531, "step": 101230 }, { "epoch": 2.9849624060150375, "grad_norm": 3.4316843411771694, "learning_rate": 5.740158193530409e-10, "loss": 1.0539, "step": 101235 }, { "epoch": 2.985109833407047, "grad_norm": 3.6204892058067792, "learning_rate": 5.628160599843723e-10, "loss": 1.0215, "step": 101240 }, { "epoch": 2.9852572607990564, "grad_norm": 3.4593649070013583, "learning_rate": 5.517266320859138e-10, "loss": 1.0409, "step": 101245 }, { "epoch": 2.985404688191066, "grad_norm": 3.4946329775197937, "learning_rate": 5.407475359844871e-10, "loss": 1.0042, "step": 101250 }, { "epoch": 2.9855521155830753, "grad_norm": 3.440147903718295, "learning_rate": 5.298787720031673e-10, "loss": 1.0216, "step": 101255 }, { "epoch": 2.9856995429750848, "grad_norm": 3.641877983015855, "learning_rate": 5.191203404612821e-10, "loss": 0.9965, "step": 101260 }, { "epoch": 2.9858469703670942, "grad_norm": 3.382978868155368, "learning_rate": 5.084722416760779e-10, "loss": 1.0572, "step": 101265 }, { "epoch": 2.9859943977591037, "grad_norm": 3.525243679247285, "learning_rate": 4.979344759602212e-10, "loss": 1.0051, "step": 101270 }, { "epoch": 2.986141825151113, "grad_norm": 3.4344871646063306, "learning_rate": 4.875070436242968e-10, "loss": 1.0362, "step": 101275 }, { "epoch": 2.9862892525431226, "grad_norm": 3.482552862049224, "learning_rate": 4.771899449751427e-10, "loss": 1.0216, "step": 101280 }, { "epoch": 2.986436679935132, "grad_norm": 3.4315918878145184, "learning_rate": 4.669831803158498e-10, "loss": 0.9928, "step": 101285 }, { "epoch": 2.9865841073271415, "grad_norm": 3.640120820952677, "learning_rate": 4.568867499474272e-10, "loss": 1.0284, "step": 101290 }, { "epoch": 2.986731534719151, "grad_norm": 3.5196349297443454, "learning_rate": 4.4690065416630456e-10, "loss": 1.0819, "step": 101295 }, { "epoch": 2.9868789621111604, "grad_norm": 3.5351800333635204, "learning_rate": 4.370248932668297e-10, "loss": 1.0291, "step": 101300 }, { "epoch": 2.9870263895031695, "grad_norm": 3.4792705042417307, "learning_rate": 4.2725946753960353e-10, "loss": 1.0562, "step": 101305 }, { "epoch": 2.9871738168951794, "grad_norm": 3.6087924082531595, "learning_rate": 4.176043772714799e-10, "loss": 1.0518, "step": 101310 }, { "epoch": 2.9873212442871884, "grad_norm": 3.6417532312929315, "learning_rate": 4.0805962274723106e-10, "loss": 1.0671, "step": 101315 }, { "epoch": 2.987468671679198, "grad_norm": 3.598314667206711, "learning_rate": 3.9862520424746586e-10, "loss": 1.0464, "step": 101320 }, { "epoch": 2.9876160990712073, "grad_norm": 3.630617656529265, "learning_rate": 3.8930112204946246e-10, "loss": 1.0709, "step": 101325 }, { "epoch": 2.9877635264632167, "grad_norm": 3.5212189965174616, "learning_rate": 3.8008737642800105e-10, "loss": 1.0562, "step": 101330 }, { "epoch": 2.987910953855226, "grad_norm": 3.5690933880773175, "learning_rate": 3.709839676541149e-10, "loss": 1.0734, "step": 101335 }, { "epoch": 2.9880583812472357, "grad_norm": 3.467680786941367, "learning_rate": 3.619908959955065e-10, "loss": 1.0557, "step": 101340 }, { "epoch": 2.988205808639245, "grad_norm": 3.581249403310713, "learning_rate": 3.53108161716964e-10, "loss": 1.0339, "step": 101345 }, { "epoch": 2.9883532360312546, "grad_norm": 3.5472784568206666, "learning_rate": 3.44335765079945e-10, "loss": 1.0384, "step": 101350 }, { "epoch": 2.988500663423264, "grad_norm": 3.451518254268168, "learning_rate": 3.3567370634257633e-10, "loss": 1.0677, "step": 101355 }, { "epoch": 2.9886480908152735, "grad_norm": 3.5719134626976357, "learning_rate": 3.2712198575965414e-10, "loss": 1.0635, "step": 101360 }, { "epoch": 2.988795518207283, "grad_norm": 3.432616193253789, "learning_rate": 3.186806035830603e-10, "loss": 1.043, "step": 101365 }, { "epoch": 2.9889429455992924, "grad_norm": 3.3624266843713224, "learning_rate": 3.103495600605133e-10, "loss": 1.045, "step": 101370 }, { "epoch": 2.989090372991302, "grad_norm": 3.466143176735262, "learning_rate": 3.0212885543806636e-10, "loss": 1.0477, "step": 101375 }, { "epoch": 2.9892378003833113, "grad_norm": 3.4021280419706437, "learning_rate": 2.940184899567766e-10, "loss": 1.0231, "step": 101380 }, { "epoch": 2.989385227775321, "grad_norm": 3.482349740258068, "learning_rate": 2.8601846385603594e-10, "loss": 1.013, "step": 101385 }, { "epoch": 2.9895326551673302, "grad_norm": 3.3774775631369285, "learning_rate": 2.7812877737065644e-10, "loss": 1.061, "step": 101390 }, { "epoch": 2.9896800825593397, "grad_norm": 3.392442747807713, "learning_rate": 2.703494307333687e-10, "loss": 1.0186, "step": 101395 }, { "epoch": 2.9898275099513487, "grad_norm": 3.460299618114275, "learning_rate": 2.6268042417232354e-10, "loss": 1.0409, "step": 101400 }, { "epoch": 2.9899749373433586, "grad_norm": 3.344860649341353, "learning_rate": 2.551217579140064e-10, "loss": 1.0053, "step": 101405 }, { "epoch": 2.9901223647353676, "grad_norm": 3.4736930796092076, "learning_rate": 2.476734321803231e-10, "loss": 1.029, "step": 101410 }, { "epoch": 2.9902697921273775, "grad_norm": 3.551410509978409, "learning_rate": 2.403354471910979e-10, "loss": 1.0635, "step": 101415 }, { "epoch": 2.9904172195193865, "grad_norm": 3.467831834220946, "learning_rate": 2.3310780316115886e-10, "loss": 1.0249, "step": 101420 }, { "epoch": 2.990564646911396, "grad_norm": 3.5589342007969487, "learning_rate": 2.2599050030408518e-10, "loss": 1.0117, "step": 101425 }, { "epoch": 2.9907120743034055, "grad_norm": 3.417192542375895, "learning_rate": 2.1898353882887635e-10, "loss": 1.0377, "step": 101430 }, { "epoch": 2.990859501695415, "grad_norm": 3.4742568728720356, "learning_rate": 2.120869189420338e-10, "loss": 1.0835, "step": 101435 }, { "epoch": 2.9910069290874244, "grad_norm": 3.4750269372761315, "learning_rate": 2.0530064084631207e-10, "loss": 1.0402, "step": 101440 }, { "epoch": 2.991154356479434, "grad_norm": 3.4408230104144435, "learning_rate": 1.9862470474155125e-10, "loss": 1.0565, "step": 101445 }, { "epoch": 2.9913017838714433, "grad_norm": 3.4402703797857574, "learning_rate": 1.9205911082384454e-10, "loss": 1.0445, "step": 101450 }, { "epoch": 2.9914492112634528, "grad_norm": 3.5966249365895306, "learning_rate": 1.8560385928678703e-10, "loss": 1.0588, "step": 101455 }, { "epoch": 2.991596638655462, "grad_norm": 3.598684815000747, "learning_rate": 1.7925895032022688e-10, "loss": 1.0179, "step": 101460 }, { "epoch": 2.9917440660474717, "grad_norm": 3.351735520513655, "learning_rate": 1.7302438411068156e-10, "loss": 1.0233, "step": 101465 }, { "epoch": 2.991891493439481, "grad_norm": 3.4684775161292056, "learning_rate": 1.669001608417542e-10, "loss": 1.0367, "step": 101470 }, { "epoch": 2.9920389208314906, "grad_norm": 3.4699842103572895, "learning_rate": 1.6088628069371724e-10, "loss": 1.0699, "step": 101475 }, { "epoch": 2.9921863482235, "grad_norm": 3.607935043815634, "learning_rate": 1.5498274384351252e-10, "loss": 1.0481, "step": 101480 }, { "epoch": 2.9923337756155095, "grad_norm": 3.7157662384137633, "learning_rate": 1.4918955046475113e-10, "loss": 1.031, "step": 101485 }, { "epoch": 2.992481203007519, "grad_norm": 3.4233429587255833, "learning_rate": 1.4350670072812988e-10, "loss": 1.0738, "step": 101490 }, { "epoch": 2.992628630399528, "grad_norm": 3.45468407123128, "learning_rate": 1.3793419480059856e-10, "loss": 1.074, "step": 101495 }, { "epoch": 2.992776057791538, "grad_norm": 3.583597140930575, "learning_rate": 1.3247203284619258e-10, "loss": 1.0432, "step": 101500 }, { "epoch": 2.992776057791538, "eval_loss": 1.0764915943145752, "eval_runtime": 4.254, "eval_samples_per_second": 93.088, "eval_steps_per_second": 3.056, "step": 101500 }, { "epoch": 2.992923485183547, "grad_norm": 3.5355851788415706, "learning_rate": 1.2712021502561677e-10, "loss": 1.0772, "step": 101505 }, { "epoch": 2.993070912575557, "grad_norm": 3.4939571531411224, "learning_rate": 1.2187874149666156e-10, "loss": 1.0127, "step": 101510 }, { "epoch": 2.993218339967566, "grad_norm": 3.470063825606587, "learning_rate": 1.1674761241295405e-10, "loss": 1.0239, "step": 101515 }, { "epoch": 2.9933657673595753, "grad_norm": 3.560649499716591, "learning_rate": 1.1172682792603972e-10, "loss": 1.0152, "step": 101520 }, { "epoch": 2.9935131947515847, "grad_norm": 3.4247028373360533, "learning_rate": 1.0681638818371698e-10, "loss": 1.0436, "step": 101525 }, { "epoch": 2.993660622143594, "grad_norm": 3.4755116301999167, "learning_rate": 1.0201629333003726e-10, "loss": 1.0526, "step": 101530 }, { "epoch": 2.9938080495356036, "grad_norm": 3.481811389027158, "learning_rate": 9.732654350655401e-11, "loss": 1.0692, "step": 101535 }, { "epoch": 2.993955476927613, "grad_norm": 3.5753634136086236, "learning_rate": 9.274713885107366e-11, "loss": 1.0479, "step": 101540 }, { "epoch": 2.9941029043196226, "grad_norm": 3.5325899938280814, "learning_rate": 8.827807949848831e-11, "loss": 1.0252, "step": 101545 }, { "epoch": 2.994250331711632, "grad_norm": 3.4004667810107727, "learning_rate": 8.39193655803594e-11, "loss": 1.0304, "step": 101550 }, { "epoch": 2.9943977591036415, "grad_norm": 3.5140789472054865, "learning_rate": 7.967099722491767e-11, "loss": 1.0391, "step": 101555 }, { "epoch": 2.994545186495651, "grad_norm": 3.5834466636312294, "learning_rate": 7.553297455664687e-11, "loss": 1.0463, "step": 101560 }, { "epoch": 2.9946926138876604, "grad_norm": 3.5101558172953538, "learning_rate": 7.150529769836544e-11, "loss": 1.0687, "step": 101565 }, { "epoch": 2.99484004127967, "grad_norm": 3.4503968065645565, "learning_rate": 6.75879667678958e-11, "loss": 1.0365, "step": 101570 }, { "epoch": 2.9949874686716793, "grad_norm": 3.4777642071518486, "learning_rate": 6.378098188014602e-11, "loss": 1.0362, "step": 101575 }, { "epoch": 2.9951348960636888, "grad_norm": 3.496130635484323, "learning_rate": 6.008434314835887e-11, "loss": 1.0588, "step": 101580 }, { "epoch": 2.9952823234556982, "grad_norm": 3.645770543256753, "learning_rate": 5.64980506799484e-11, "loss": 1.0623, "step": 101585 }, { "epoch": 2.9954297508477072, "grad_norm": 3.3236397532043496, "learning_rate": 5.3022104581496036e-11, "loss": 0.9774, "step": 101590 }, { "epoch": 2.995577178239717, "grad_norm": 3.400033187644708, "learning_rate": 4.965650495458718e-11, "loss": 1.071, "step": 101595 }, { "epoch": 2.995724605631726, "grad_norm": 3.520058189013029, "learning_rate": 4.640125189872557e-11, "loss": 1.0006, "step": 101600 }, { "epoch": 2.995872033023736, "grad_norm": 3.39774628663836, "learning_rate": 4.3256345509251616e-11, "loss": 1.0381, "step": 101605 }, { "epoch": 2.996019460415745, "grad_norm": 3.3864876142348836, "learning_rate": 4.022178587900771e-11, "loss": 1.0383, "step": 101610 }, { "epoch": 2.9961668878077545, "grad_norm": 3.385010992472616, "learning_rate": 3.7297573097505586e-11, "loss": 1.0082, "step": 101615 }, { "epoch": 2.996314315199764, "grad_norm": 3.552251985271931, "learning_rate": 3.4483707250093645e-11, "loss": 1.036, "step": 101620 }, { "epoch": 2.9964617425917734, "grad_norm": 3.4661072663723482, "learning_rate": 3.1780188420454934e-11, "loss": 1.0048, "step": 101625 }, { "epoch": 2.996609169983783, "grad_norm": 3.551376018059265, "learning_rate": 2.9187016687692856e-11, "loss": 1.0143, "step": 101630 }, { "epoch": 2.9967565973757924, "grad_norm": 3.588832998457119, "learning_rate": 2.670419212758013e-11, "loss": 1.0488, "step": 101635 }, { "epoch": 2.996904024767802, "grad_norm": 3.396140604846354, "learning_rate": 2.4331714814224136e-11, "loss": 1.0508, "step": 101640 }, { "epoch": 2.9970514521598113, "grad_norm": 3.4687608645192762, "learning_rate": 2.2069584816736265e-11, "loss": 1.0623, "step": 101645 }, { "epoch": 2.9971988795518207, "grad_norm": 3.4540543392812477, "learning_rate": 1.991780220214623e-11, "loss": 1.0335, "step": 101650 }, { "epoch": 2.99734630694383, "grad_norm": 3.4548007843882336, "learning_rate": 1.7876367033320406e-11, "loss": 1.0271, "step": 101655 }, { "epoch": 2.9974937343358397, "grad_norm": 3.5303449387835713, "learning_rate": 1.594527937021084e-11, "loss": 1.0264, "step": 101660 }, { "epoch": 2.997641161727849, "grad_norm": 3.498365409650255, "learning_rate": 1.4124539270271575e-11, "loss": 1.0587, "step": 101665 }, { "epoch": 2.9977885891198586, "grad_norm": 3.5036397863956443, "learning_rate": 1.2414146786793312e-11, "loss": 1.0159, "step": 101670 }, { "epoch": 2.997936016511868, "grad_norm": 3.5737974450898156, "learning_rate": 1.0814101970152424e-11, "loss": 1.0514, "step": 101675 }, { "epoch": 2.9980834439038775, "grad_norm": 3.426289934547559, "learning_rate": 9.324404866978276e-12, "loss": 1.048, "step": 101680 }, { "epoch": 2.9982308712958865, "grad_norm": 3.537525912002454, "learning_rate": 7.945055521818567e-12, "loss": 1.058, "step": 101685 }, { "epoch": 2.9983782986878964, "grad_norm": 3.422937332568118, "learning_rate": 6.6760539746413274e-12, "loss": 1.0753, "step": 101690 }, { "epoch": 2.9985257260799054, "grad_norm": 3.46618341561677, "learning_rate": 5.517400262916583e-12, "loss": 1.0251, "step": 101695 }, { "epoch": 2.9986731534719153, "grad_norm": 3.473010086771127, "learning_rate": 4.469094421200026e-12, "loss": 1.061, "step": 101700 }, { "epoch": 2.9988205808639243, "grad_norm": 3.498755935582447, "learning_rate": 3.5311364803003454e-12, "loss": 1.0218, "step": 101705 }, { "epoch": 2.998968008255934, "grad_norm": 3.391075897936316, "learning_rate": 2.7035264668628933e-12, "loss": 1.0147, "step": 101710 }, { "epoch": 2.9991154356479433, "grad_norm": 3.625795522250744, "learning_rate": 1.986264406284022e-12, "loss": 0.9831, "step": 101715 }, { "epoch": 2.9992628630399527, "grad_norm": 3.563423605835511, "learning_rate": 1.3793503193804124e-12, "loss": 1.0555, "step": 101720 }, { "epoch": 2.999410290431962, "grad_norm": 3.567012479198932, "learning_rate": 8.827842240544115e-13, "loss": 1.0615, "step": 101725 }, { "epoch": 2.9995577178239716, "grad_norm": 3.4603339435714893, "learning_rate": 4.965661344613626e-13, "loss": 1.0368, "step": 101730 }, { "epoch": 2.999705145215981, "grad_norm": 3.5829890417701162, "learning_rate": 2.2069606267494103e-13, "loss": 0.9983, "step": 101735 }, { "epoch": 2.9998525726079905, "grad_norm": 3.451722998370578, "learning_rate": 5.51740161891523e-14, "loss": 1.0379, "step": 101740 }, { "epoch": 3.0, "grad_norm": 3.447175611271937, "learning_rate": 0.0, "loss": 1.0307, "step": 101745 }, { "epoch": 3.0, "step": 101745, "total_flos": 1.06516665335808e+16, "train_loss": 1.2970494811368156, "train_runtime": 158321.4888, "train_samples_per_second": 20.564, "train_steps_per_second": 0.643 } ], "logging_steps": 5, "max_steps": 101745, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.06516665335808e+16, "train_batch_size": 4, "trial_name": null, "trial_params": null }