{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9999896050976601, "eval_steps": 500, "global_step": 48100, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "grad_norm": 5.09375, "learning_rate": 1.2474012474012475e-08, "loss": 2.414, "step": 1 }, { "epoch": 0.0, "grad_norm": 3.890625, "learning_rate": 1.2474012474012475e-07, "loss": 2.316, "step": 10 }, { "epoch": 0.0, "grad_norm": 4.96875, "learning_rate": 2.494802494802495e-07, "loss": 2.3409, "step": 20 }, { "epoch": 0.0, "grad_norm": 3.5, "learning_rate": 3.7422037422037426e-07, "loss": 2.3086, "step": 30 }, { "epoch": 0.0, "grad_norm": 3.6875, "learning_rate": 4.98960498960499e-07, "loss": 2.3287, "step": 40 }, { "epoch": 0.0, "grad_norm": 4.375, "learning_rate": 6.237006237006237e-07, "loss": 2.3292, "step": 50 }, { "epoch": 0.0, "grad_norm": 4.09375, "learning_rate": 7.484407484407485e-07, "loss": 2.3661, "step": 60 }, { "epoch": 0.0, "grad_norm": 3.875, "learning_rate": 8.731808731808732e-07, "loss": 2.3403, "step": 70 }, { "epoch": 0.0, "grad_norm": 4.0625, "learning_rate": 9.97920997920998e-07, "loss": 2.3216, "step": 80 }, { "epoch": 0.0, "grad_norm": 3.3125, "learning_rate": 1.1226611226611228e-06, "loss": 2.3108, "step": 90 }, { "epoch": 0.0, "grad_norm": 2.34375, "learning_rate": 1.2474012474012474e-06, "loss": 2.3116, "step": 100 }, { "epoch": 0.0, "grad_norm": 3.578125, "learning_rate": 1.3721413721413722e-06, "loss": 2.3412, "step": 110 }, { "epoch": 0.0, "grad_norm": 2.109375, "learning_rate": 1.496881496881497e-06, "loss": 2.3043, "step": 120 }, { "epoch": 0.0, "grad_norm": 2.15625, "learning_rate": 1.6216216216216219e-06, "loss": 2.2906, "step": 130 }, { "epoch": 0.0, "grad_norm": 1.8671875, "learning_rate": 1.7463617463617464e-06, "loss": 2.295, "step": 140 }, { "epoch": 0.0, "grad_norm": 1.3671875, "learning_rate": 1.8711018711018713e-06, "loss": 2.2948, "step": 150 }, { "epoch": 0.0, "grad_norm": 1.65625, "learning_rate": 1.995841995841996e-06, "loss": 2.282, "step": 160 }, { "epoch": 0.0, "grad_norm": 1.390625, "learning_rate": 2.120582120582121e-06, "loss": 2.3002, "step": 170 }, { "epoch": 0.0, "grad_norm": 0.98828125, "learning_rate": 2.2453222453222457e-06, "loss": 2.2917, "step": 180 }, { "epoch": 0.0, "grad_norm": 1.03125, "learning_rate": 2.37006237006237e-06, "loss": 2.2711, "step": 190 }, { "epoch": 0.0, "grad_norm": 0.73046875, "learning_rate": 2.494802494802495e-06, "loss": 2.29, "step": 200 }, { "epoch": 0.0, "grad_norm": 0.83203125, "learning_rate": 2.6195426195426197e-06, "loss": 2.2714, "step": 210 }, { "epoch": 0.0, "grad_norm": 3.015625, "learning_rate": 2.7442827442827445e-06, "loss": 2.2805, "step": 220 }, { "epoch": 0.0, "grad_norm": 0.67578125, "learning_rate": 2.8690228690228693e-06, "loss": 2.2621, "step": 230 }, { "epoch": 0.0, "grad_norm": 0.62890625, "learning_rate": 2.993762993762994e-06, "loss": 2.2599, "step": 240 }, { "epoch": 0.01, "grad_norm": 0.5625, "learning_rate": 3.118503118503119e-06, "loss": 2.2501, "step": 250 }, { "epoch": 0.01, "grad_norm": 0.67578125, "learning_rate": 3.2432432432432437e-06, "loss": 2.255, "step": 260 }, { "epoch": 0.01, "grad_norm": 0.65234375, "learning_rate": 3.367983367983368e-06, "loss": 2.2622, "step": 270 }, { "epoch": 0.01, "grad_norm": 0.58203125, "learning_rate": 3.492723492723493e-06, "loss": 2.302, "step": 280 }, { "epoch": 0.01, "grad_norm": 0.65234375, "learning_rate": 3.6174636174636177e-06, "loss": 2.2871, "step": 290 }, { "epoch": 0.01, "grad_norm": 0.50390625, "learning_rate": 3.7422037422037425e-06, "loss": 2.2389, "step": 300 }, { "epoch": 0.01, "grad_norm": 0.51953125, "learning_rate": 3.866943866943867e-06, "loss": 2.251, "step": 310 }, { "epoch": 0.01, "grad_norm": 0.5234375, "learning_rate": 3.991683991683992e-06, "loss": 2.263, "step": 320 }, { "epoch": 0.01, "grad_norm": 0.50390625, "learning_rate": 4.116424116424117e-06, "loss": 2.2557, "step": 330 }, { "epoch": 0.01, "grad_norm": 0.53515625, "learning_rate": 4.241164241164242e-06, "loss": 2.2275, "step": 340 }, { "epoch": 0.01, "grad_norm": 0.59765625, "learning_rate": 4.3659043659043665e-06, "loss": 2.2436, "step": 350 }, { "epoch": 0.01, "grad_norm": 0.55078125, "learning_rate": 4.490644490644491e-06, "loss": 2.2556, "step": 360 }, { "epoch": 0.01, "grad_norm": 0.5, "learning_rate": 4.615384615384616e-06, "loss": 2.2675, "step": 370 }, { "epoch": 0.01, "grad_norm": 0.515625, "learning_rate": 4.74012474012474e-06, "loss": 2.2425, "step": 380 }, { "epoch": 0.01, "grad_norm": 0.59375, "learning_rate": 4.864864864864865e-06, "loss": 2.2649, "step": 390 }, { "epoch": 0.01, "grad_norm": 0.55078125, "learning_rate": 4.98960498960499e-06, "loss": 2.2611, "step": 400 }, { "epoch": 0.01, "grad_norm": 0.61328125, "learning_rate": 5.1143451143451145e-06, "loss": 2.2612, "step": 410 }, { "epoch": 0.01, "grad_norm": 0.498046875, "learning_rate": 5.239085239085239e-06, "loss": 2.2397, "step": 420 }, { "epoch": 0.01, "grad_norm": 0.65625, "learning_rate": 5.363825363825364e-06, "loss": 2.2806, "step": 430 }, { "epoch": 0.01, "grad_norm": 0.53515625, "learning_rate": 5.488565488565489e-06, "loss": 2.25, "step": 440 }, { "epoch": 0.01, "grad_norm": 0.474609375, "learning_rate": 5.613305613305614e-06, "loss": 2.2517, "step": 450 }, { "epoch": 0.01, "grad_norm": 0.51953125, "learning_rate": 5.7380457380457386e-06, "loss": 2.2445, "step": 460 }, { "epoch": 0.01, "grad_norm": 0.52734375, "learning_rate": 5.862785862785863e-06, "loss": 2.2713, "step": 470 }, { "epoch": 0.01, "grad_norm": 0.478515625, "learning_rate": 5.987525987525988e-06, "loss": 2.253, "step": 480 }, { "epoch": 0.01, "grad_norm": 0.51171875, "learning_rate": 6.112266112266113e-06, "loss": 2.2655, "step": 490 }, { "epoch": 0.01, "grad_norm": 0.54296875, "learning_rate": 6.237006237006238e-06, "loss": 2.2432, "step": 500 }, { "epoch": 0.01, "eval_accuracy": 0.5358239483476676, "eval_loss": 2.1239066123962402, "eval_runtime": 16.4503, "eval_samples_per_second": 36.17, "eval_steps_per_second": 1.155, "step": 500 }, { "epoch": 0.01, "grad_norm": 0.47265625, "learning_rate": 6.361746361746363e-06, "loss": 2.2518, "step": 510 }, { "epoch": 0.01, "grad_norm": 0.50390625, "learning_rate": 6.486486486486487e-06, "loss": 2.2418, "step": 520 }, { "epoch": 0.01, "grad_norm": 0.5, "learning_rate": 6.611226611226611e-06, "loss": 2.2301, "step": 530 }, { "epoch": 0.01, "grad_norm": 0.46484375, "learning_rate": 6.735966735966736e-06, "loss": 2.2528, "step": 540 }, { "epoch": 0.01, "grad_norm": 0.470703125, "learning_rate": 6.860706860706861e-06, "loss": 2.277, "step": 550 }, { "epoch": 0.01, "grad_norm": 0.48046875, "learning_rate": 6.985446985446986e-06, "loss": 2.2413, "step": 560 }, { "epoch": 0.01, "grad_norm": 0.462890625, "learning_rate": 7.110187110187111e-06, "loss": 2.2362, "step": 570 }, { "epoch": 0.01, "grad_norm": 0.50390625, "learning_rate": 7.234927234927235e-06, "loss": 2.2555, "step": 580 }, { "epoch": 0.01, "grad_norm": 0.482421875, "learning_rate": 7.35966735966736e-06, "loss": 2.2618, "step": 590 }, { "epoch": 0.01, "grad_norm": 0.48046875, "learning_rate": 7.484407484407485e-06, "loss": 2.2621, "step": 600 }, { "epoch": 0.01, "grad_norm": 0.515625, "learning_rate": 7.60914760914761e-06, "loss": 2.2627, "step": 610 }, { "epoch": 0.01, "grad_norm": 0.5390625, "learning_rate": 7.733887733887735e-06, "loss": 2.2253, "step": 620 }, { "epoch": 0.01, "grad_norm": 0.46875, "learning_rate": 7.85862785862786e-06, "loss": 2.2166, "step": 630 }, { "epoch": 0.01, "grad_norm": 0.5703125, "learning_rate": 7.983367983367984e-06, "loss": 2.2339, "step": 640 }, { "epoch": 0.01, "grad_norm": 0.51953125, "learning_rate": 8.108108108108109e-06, "loss": 2.2368, "step": 650 }, { "epoch": 0.01, "grad_norm": 0.4765625, "learning_rate": 8.232848232848234e-06, "loss": 2.2122, "step": 660 }, { "epoch": 0.01, "grad_norm": 0.4609375, "learning_rate": 8.357588357588359e-06, "loss": 2.2463, "step": 670 }, { "epoch": 0.01, "grad_norm": 0.455078125, "learning_rate": 8.482328482328483e-06, "loss": 2.1995, "step": 680 }, { "epoch": 0.01, "grad_norm": 0.50390625, "learning_rate": 8.607068607068608e-06, "loss": 2.2342, "step": 690 }, { "epoch": 0.01, "grad_norm": 0.45703125, "learning_rate": 8.731808731808733e-06, "loss": 2.2459, "step": 700 }, { "epoch": 0.01, "grad_norm": 0.5078125, "learning_rate": 8.856548856548858e-06, "loss": 2.2487, "step": 710 }, { "epoch": 0.01, "grad_norm": 0.6171875, "learning_rate": 8.981288981288983e-06, "loss": 2.2721, "step": 720 }, { "epoch": 0.02, "grad_norm": 0.478515625, "learning_rate": 9.106029106029107e-06, "loss": 2.2112, "step": 730 }, { "epoch": 0.02, "grad_norm": 0.46484375, "learning_rate": 9.230769230769232e-06, "loss": 2.2318, "step": 740 }, { "epoch": 0.02, "grad_norm": 0.447265625, "learning_rate": 9.355509355509355e-06, "loss": 2.2131, "step": 750 }, { "epoch": 0.02, "grad_norm": 0.4609375, "learning_rate": 9.48024948024948e-06, "loss": 2.2346, "step": 760 }, { "epoch": 0.02, "grad_norm": 0.455078125, "learning_rate": 9.604989604989605e-06, "loss": 2.2356, "step": 770 }, { "epoch": 0.02, "grad_norm": 0.50390625, "learning_rate": 9.72972972972973e-06, "loss": 2.2433, "step": 780 }, { "epoch": 0.02, "grad_norm": 0.52734375, "learning_rate": 9.854469854469855e-06, "loss": 2.2118, "step": 790 }, { "epoch": 0.02, "grad_norm": 0.48046875, "learning_rate": 9.97920997920998e-06, "loss": 2.2434, "step": 800 }, { "epoch": 0.02, "grad_norm": 0.50390625, "learning_rate": 1.0103950103950104e-05, "loss": 2.2281, "step": 810 }, { "epoch": 0.02, "grad_norm": 0.5546875, "learning_rate": 1.0228690228690229e-05, "loss": 2.2246, "step": 820 }, { "epoch": 0.02, "grad_norm": 0.4921875, "learning_rate": 1.0353430353430354e-05, "loss": 2.2342, "step": 830 }, { "epoch": 0.02, "grad_norm": 0.458984375, "learning_rate": 1.0478170478170479e-05, "loss": 2.2294, "step": 840 }, { "epoch": 0.02, "grad_norm": 0.482421875, "learning_rate": 1.0602910602910603e-05, "loss": 2.2337, "step": 850 }, { "epoch": 0.02, "grad_norm": 0.484375, "learning_rate": 1.0727650727650728e-05, "loss": 2.2471, "step": 860 }, { "epoch": 0.02, "grad_norm": 0.46484375, "learning_rate": 1.0852390852390853e-05, "loss": 2.2424, "step": 870 }, { "epoch": 0.02, "grad_norm": 0.4375, "learning_rate": 1.0977130977130978e-05, "loss": 2.2182, "step": 880 }, { "epoch": 0.02, "grad_norm": 0.46484375, "learning_rate": 1.1101871101871103e-05, "loss": 2.2314, "step": 890 }, { "epoch": 0.02, "grad_norm": 0.46484375, "learning_rate": 1.1226611226611228e-05, "loss": 2.2541, "step": 900 }, { "epoch": 0.02, "grad_norm": 0.451171875, "learning_rate": 1.1351351351351352e-05, "loss": 2.2618, "step": 910 }, { "epoch": 0.02, "grad_norm": 0.451171875, "learning_rate": 1.1476091476091477e-05, "loss": 2.2213, "step": 920 }, { "epoch": 0.02, "grad_norm": 0.46875, "learning_rate": 1.1600831600831602e-05, "loss": 2.2102, "step": 930 }, { "epoch": 0.02, "grad_norm": 0.49609375, "learning_rate": 1.1725571725571727e-05, "loss": 2.2121, "step": 940 }, { "epoch": 0.02, "grad_norm": 0.484375, "learning_rate": 1.1850311850311852e-05, "loss": 2.2016, "step": 950 }, { "epoch": 0.02, "grad_norm": 0.50390625, "learning_rate": 1.1975051975051976e-05, "loss": 2.2033, "step": 960 }, { "epoch": 0.02, "grad_norm": 0.515625, "learning_rate": 1.2099792099792101e-05, "loss": 2.2137, "step": 970 }, { "epoch": 0.02, "grad_norm": 0.54296875, "learning_rate": 1.2224532224532226e-05, "loss": 2.1972, "step": 980 }, { "epoch": 0.02, "grad_norm": 0.455078125, "learning_rate": 1.234927234927235e-05, "loss": 2.2055, "step": 990 }, { "epoch": 0.02, "grad_norm": 0.4609375, "learning_rate": 1.2474012474012476e-05, "loss": 2.209, "step": 1000 }, { "epoch": 0.02, "eval_accuracy": 0.5404174573055028, "eval_loss": 2.0921764373779297, "eval_runtime": 16.4273, "eval_samples_per_second": 36.22, "eval_steps_per_second": 1.157, "step": 1000 }, { "epoch": 0.02, "grad_norm": 0.462890625, "learning_rate": 1.25987525987526e-05, "loss": 2.2232, "step": 1010 }, { "epoch": 0.02, "grad_norm": 0.484375, "learning_rate": 1.2723492723492725e-05, "loss": 2.2188, "step": 1020 }, { "epoch": 0.02, "grad_norm": 0.4765625, "learning_rate": 1.284823284823285e-05, "loss": 2.2393, "step": 1030 }, { "epoch": 0.02, "grad_norm": 0.498046875, "learning_rate": 1.2972972972972975e-05, "loss": 2.2115, "step": 1040 }, { "epoch": 0.02, "grad_norm": 0.462890625, "learning_rate": 1.3097713097713098e-05, "loss": 2.2207, "step": 1050 }, { "epoch": 0.02, "grad_norm": 0.466796875, "learning_rate": 1.3222453222453223e-05, "loss": 2.2665, "step": 1060 }, { "epoch": 0.02, "grad_norm": 0.5234375, "learning_rate": 1.3347193347193348e-05, "loss": 2.2192, "step": 1070 }, { "epoch": 0.02, "grad_norm": 0.546875, "learning_rate": 1.3471933471933472e-05, "loss": 2.246, "step": 1080 }, { "epoch": 0.02, "grad_norm": 0.4453125, "learning_rate": 1.3596673596673597e-05, "loss": 2.2309, "step": 1090 }, { "epoch": 0.02, "grad_norm": 0.48828125, "learning_rate": 1.3721413721413722e-05, "loss": 2.2237, "step": 1100 }, { "epoch": 0.02, "grad_norm": 0.44921875, "learning_rate": 1.3846153846153847e-05, "loss": 2.2247, "step": 1110 }, { "epoch": 0.02, "grad_norm": 0.61328125, "learning_rate": 1.3970893970893972e-05, "loss": 2.2323, "step": 1120 }, { "epoch": 0.02, "grad_norm": 0.470703125, "learning_rate": 1.4095634095634096e-05, "loss": 2.2141, "step": 1130 }, { "epoch": 0.02, "grad_norm": 0.466796875, "learning_rate": 1.4220374220374221e-05, "loss": 2.239, "step": 1140 }, { "epoch": 0.02, "grad_norm": 0.48046875, "learning_rate": 1.4345114345114346e-05, "loss": 2.2332, "step": 1150 }, { "epoch": 0.02, "grad_norm": 0.470703125, "learning_rate": 1.446985446985447e-05, "loss": 2.1918, "step": 1160 }, { "epoch": 0.02, "grad_norm": 0.50390625, "learning_rate": 1.4594594594594596e-05, "loss": 2.1926, "step": 1170 }, { "epoch": 0.02, "grad_norm": 0.49609375, "learning_rate": 1.471933471933472e-05, "loss": 2.2116, "step": 1180 }, { "epoch": 0.02, "grad_norm": 0.6328125, "learning_rate": 1.4844074844074845e-05, "loss": 2.2029, "step": 1190 }, { "epoch": 0.02, "grad_norm": 0.470703125, "learning_rate": 1.496881496881497e-05, "loss": 2.2405, "step": 1200 }, { "epoch": 0.03, "grad_norm": 0.439453125, "learning_rate": 1.5093555093555095e-05, "loss": 2.2376, "step": 1210 }, { "epoch": 0.03, "grad_norm": 0.466796875, "learning_rate": 1.521829521829522e-05, "loss": 2.2264, "step": 1220 }, { "epoch": 0.03, "grad_norm": 0.47265625, "learning_rate": 1.5343035343035344e-05, "loss": 2.213, "step": 1230 }, { "epoch": 0.03, "grad_norm": 0.439453125, "learning_rate": 1.546777546777547e-05, "loss": 2.2164, "step": 1240 }, { "epoch": 0.03, "grad_norm": 0.439453125, "learning_rate": 1.5592515592515594e-05, "loss": 2.2008, "step": 1250 }, { "epoch": 0.03, "grad_norm": 0.4921875, "learning_rate": 1.571725571725572e-05, "loss": 2.2186, "step": 1260 }, { "epoch": 0.03, "grad_norm": 0.4609375, "learning_rate": 1.5841995841995844e-05, "loss": 2.1956, "step": 1270 }, { "epoch": 0.03, "grad_norm": 0.462890625, "learning_rate": 1.596673596673597e-05, "loss": 2.2043, "step": 1280 }, { "epoch": 0.03, "grad_norm": 0.498046875, "learning_rate": 1.6091476091476093e-05, "loss": 2.2153, "step": 1290 }, { "epoch": 0.03, "grad_norm": 0.482421875, "learning_rate": 1.6216216216216218e-05, "loss": 2.2334, "step": 1300 }, { "epoch": 0.03, "grad_norm": 0.484375, "learning_rate": 1.6340956340956343e-05, "loss": 2.224, "step": 1310 }, { "epoch": 0.03, "grad_norm": 0.4609375, "learning_rate": 1.6465696465696468e-05, "loss": 2.1958, "step": 1320 }, { "epoch": 0.03, "grad_norm": 0.474609375, "learning_rate": 1.6590436590436593e-05, "loss": 2.1913, "step": 1330 }, { "epoch": 0.03, "grad_norm": 0.47265625, "learning_rate": 1.6715176715176717e-05, "loss": 2.2135, "step": 1340 }, { "epoch": 0.03, "grad_norm": 0.478515625, "learning_rate": 1.6839916839916842e-05, "loss": 2.1942, "step": 1350 }, { "epoch": 0.03, "grad_norm": 0.462890625, "learning_rate": 1.6964656964656967e-05, "loss": 2.1935, "step": 1360 }, { "epoch": 0.03, "grad_norm": 0.470703125, "learning_rate": 1.7089397089397092e-05, "loss": 2.196, "step": 1370 }, { "epoch": 0.03, "grad_norm": 0.4375, "learning_rate": 1.7214137214137217e-05, "loss": 2.2106, "step": 1380 }, { "epoch": 0.03, "grad_norm": 0.478515625, "learning_rate": 1.733887733887734e-05, "loss": 2.191, "step": 1390 }, { "epoch": 0.03, "grad_norm": 0.4453125, "learning_rate": 1.7463617463617466e-05, "loss": 2.2065, "step": 1400 }, { "epoch": 0.03, "grad_norm": 0.46875, "learning_rate": 1.758835758835759e-05, "loss": 2.1872, "step": 1410 }, { "epoch": 0.03, "grad_norm": 0.48046875, "learning_rate": 1.7713097713097716e-05, "loss": 2.2071, "step": 1420 }, { "epoch": 0.03, "grad_norm": 0.455078125, "learning_rate": 1.783783783783784e-05, "loss": 2.1725, "step": 1430 }, { "epoch": 0.03, "grad_norm": 0.44140625, "learning_rate": 1.7962577962577965e-05, "loss": 2.203, "step": 1440 }, { "epoch": 0.03, "grad_norm": 0.546875, "learning_rate": 1.808731808731809e-05, "loss": 2.2122, "step": 1450 }, { "epoch": 0.03, "grad_norm": 0.474609375, "learning_rate": 1.8212058212058215e-05, "loss": 2.1848, "step": 1460 }, { "epoch": 0.03, "grad_norm": 0.45703125, "learning_rate": 1.833679833679834e-05, "loss": 2.1927, "step": 1470 }, { "epoch": 0.03, "grad_norm": 0.498046875, "learning_rate": 1.8461538461538465e-05, "loss": 2.2109, "step": 1480 }, { "epoch": 0.03, "grad_norm": 0.490234375, "learning_rate": 1.858627858627859e-05, "loss": 2.2122, "step": 1490 }, { "epoch": 0.03, "grad_norm": 0.466796875, "learning_rate": 1.871101871101871e-05, "loss": 2.1988, "step": 1500 }, { "epoch": 0.03, "eval_accuracy": 0.5435980843950483, "eval_loss": 2.074204683303833, "eval_runtime": 16.4328, "eval_samples_per_second": 36.208, "eval_steps_per_second": 1.156, "step": 1500 }, { "epoch": 0.03, "grad_norm": 0.5078125, "learning_rate": 1.8835758835758836e-05, "loss": 2.1792, "step": 1510 }, { "epoch": 0.03, "grad_norm": 0.494140625, "learning_rate": 1.896049896049896e-05, "loss": 2.1899, "step": 1520 }, { "epoch": 0.03, "grad_norm": 0.458984375, "learning_rate": 1.9085239085239085e-05, "loss": 2.2182, "step": 1530 }, { "epoch": 0.03, "grad_norm": 0.462890625, "learning_rate": 1.920997920997921e-05, "loss": 2.2034, "step": 1540 }, { "epoch": 0.03, "grad_norm": 0.466796875, "learning_rate": 1.9334719334719335e-05, "loss": 2.2021, "step": 1550 }, { "epoch": 0.03, "grad_norm": 0.462890625, "learning_rate": 1.945945945945946e-05, "loss": 2.2105, "step": 1560 }, { "epoch": 0.03, "grad_norm": 0.46484375, "learning_rate": 1.9584199584199584e-05, "loss": 2.2134, "step": 1570 }, { "epoch": 0.03, "grad_norm": 0.470703125, "learning_rate": 1.970893970893971e-05, "loss": 2.237, "step": 1580 }, { "epoch": 0.03, "grad_norm": 0.455078125, "learning_rate": 1.9833679833679834e-05, "loss": 2.1847, "step": 1590 }, { "epoch": 0.03, "grad_norm": 0.484375, "learning_rate": 1.995841995841996e-05, "loss": 2.2396, "step": 1600 }, { "epoch": 0.03, "grad_norm": 0.451171875, "learning_rate": 2.0083160083160084e-05, "loss": 2.2503, "step": 1610 }, { "epoch": 0.03, "grad_norm": 0.447265625, "learning_rate": 2.020790020790021e-05, "loss": 2.185, "step": 1620 }, { "epoch": 0.03, "grad_norm": 0.46484375, "learning_rate": 2.0332640332640333e-05, "loss": 2.2091, "step": 1630 }, { "epoch": 0.03, "grad_norm": 0.462890625, "learning_rate": 2.0457380457380458e-05, "loss": 2.1969, "step": 1640 }, { "epoch": 0.03, "grad_norm": 0.46875, "learning_rate": 2.0582120582120583e-05, "loss": 2.2218, "step": 1650 }, { "epoch": 0.03, "grad_norm": 0.447265625, "learning_rate": 2.0706860706860708e-05, "loss": 2.1999, "step": 1660 }, { "epoch": 0.03, "grad_norm": 0.486328125, "learning_rate": 2.0831600831600833e-05, "loss": 2.213, "step": 1670 }, { "epoch": 0.03, "grad_norm": 0.46484375, "learning_rate": 2.0956340956340957e-05, "loss": 2.207, "step": 1680 }, { "epoch": 0.04, "grad_norm": 0.4453125, "learning_rate": 2.1081081081081082e-05, "loss": 2.1633, "step": 1690 }, { "epoch": 0.04, "grad_norm": 0.453125, "learning_rate": 2.1205821205821207e-05, "loss": 2.2196, "step": 1700 }, { "epoch": 0.04, "grad_norm": 0.5, "learning_rate": 2.1330561330561332e-05, "loss": 2.2069, "step": 1710 }, { "epoch": 0.04, "grad_norm": 0.447265625, "learning_rate": 2.1455301455301457e-05, "loss": 2.2024, "step": 1720 }, { "epoch": 0.04, "grad_norm": 0.4609375, "learning_rate": 2.158004158004158e-05, "loss": 2.2111, "step": 1730 }, { "epoch": 0.04, "grad_norm": 0.474609375, "learning_rate": 2.1704781704781706e-05, "loss": 2.2228, "step": 1740 }, { "epoch": 0.04, "grad_norm": 0.4609375, "learning_rate": 2.182952182952183e-05, "loss": 2.2005, "step": 1750 }, { "epoch": 0.04, "grad_norm": 0.48046875, "learning_rate": 2.1954261954261956e-05, "loss": 2.1753, "step": 1760 }, { "epoch": 0.04, "grad_norm": 0.439453125, "learning_rate": 2.207900207900208e-05, "loss": 2.1739, "step": 1770 }, { "epoch": 0.04, "grad_norm": 0.4453125, "learning_rate": 2.2203742203742205e-05, "loss": 2.2136, "step": 1780 }, { "epoch": 0.04, "grad_norm": 0.453125, "learning_rate": 2.232848232848233e-05, "loss": 2.1799, "step": 1790 }, { "epoch": 0.04, "grad_norm": 0.462890625, "learning_rate": 2.2453222453222455e-05, "loss": 2.1874, "step": 1800 }, { "epoch": 0.04, "grad_norm": 0.5, "learning_rate": 2.257796257796258e-05, "loss": 2.1957, "step": 1810 }, { "epoch": 0.04, "grad_norm": 0.4609375, "learning_rate": 2.2702702702702705e-05, "loss": 2.19, "step": 1820 }, { "epoch": 0.04, "grad_norm": 0.462890625, "learning_rate": 2.282744282744283e-05, "loss": 2.2404, "step": 1830 }, { "epoch": 0.04, "grad_norm": 0.44140625, "learning_rate": 2.2952182952182954e-05, "loss": 2.2362, "step": 1840 }, { "epoch": 0.04, "grad_norm": 0.482421875, "learning_rate": 2.307692307692308e-05, "loss": 2.173, "step": 1850 }, { "epoch": 0.04, "grad_norm": 0.451171875, "learning_rate": 2.3201663201663204e-05, "loss": 2.1802, "step": 1860 }, { "epoch": 0.04, "grad_norm": 0.466796875, "learning_rate": 2.332640332640333e-05, "loss": 2.2011, "step": 1870 }, { "epoch": 0.04, "grad_norm": 0.451171875, "learning_rate": 2.3451143451143453e-05, "loss": 2.1798, "step": 1880 }, { "epoch": 0.04, "grad_norm": 0.44921875, "learning_rate": 2.3575883575883578e-05, "loss": 2.2108, "step": 1890 }, { "epoch": 0.04, "grad_norm": 0.46875, "learning_rate": 2.3700623700623703e-05, "loss": 2.1934, "step": 1900 }, { "epoch": 0.04, "grad_norm": 0.4375, "learning_rate": 2.3825363825363828e-05, "loss": 2.1969, "step": 1910 }, { "epoch": 0.04, "grad_norm": 1.0546875, "learning_rate": 2.3950103950103953e-05, "loss": 2.194, "step": 1920 }, { "epoch": 0.04, "grad_norm": 0.45703125, "learning_rate": 2.4074844074844078e-05, "loss": 2.2106, "step": 1930 }, { "epoch": 0.04, "grad_norm": 0.439453125, "learning_rate": 2.4199584199584202e-05, "loss": 2.2011, "step": 1940 }, { "epoch": 0.04, "grad_norm": 0.4453125, "learning_rate": 2.4324324324324327e-05, "loss": 2.2109, "step": 1950 }, { "epoch": 0.04, "grad_norm": 0.45703125, "learning_rate": 2.4449064449064452e-05, "loss": 2.1533, "step": 1960 }, { "epoch": 0.04, "grad_norm": 0.53515625, "learning_rate": 2.4573804573804577e-05, "loss": 2.2166, "step": 1970 }, { "epoch": 0.04, "grad_norm": 0.45703125, "learning_rate": 2.46985446985447e-05, "loss": 2.216, "step": 1980 }, { "epoch": 0.04, "grad_norm": 0.443359375, "learning_rate": 2.4823284823284826e-05, "loss": 2.165, "step": 1990 }, { "epoch": 0.04, "grad_norm": 0.451171875, "learning_rate": 2.494802494802495e-05, "loss": 2.1877, "step": 2000 }, { "epoch": 0.04, "eval_accuracy": 0.5462529879987186, "eval_loss": 2.0614967346191406, "eval_runtime": 16.4441, "eval_samples_per_second": 36.183, "eval_steps_per_second": 1.155, "step": 2000 }, { "epoch": 0.04, "grad_norm": 0.43359375, "learning_rate": 2.5072765072765076e-05, "loss": 2.1612, "step": 2010 }, { "epoch": 0.04, "grad_norm": 0.5, "learning_rate": 2.51975051975052e-05, "loss": 2.1958, "step": 2020 }, { "epoch": 0.04, "grad_norm": 0.439453125, "learning_rate": 2.5322245322245326e-05, "loss": 2.1965, "step": 2030 }, { "epoch": 0.04, "grad_norm": 0.458984375, "learning_rate": 2.544698544698545e-05, "loss": 2.1974, "step": 2040 }, { "epoch": 0.04, "grad_norm": 0.4453125, "learning_rate": 2.5571725571725575e-05, "loss": 2.1879, "step": 2050 }, { "epoch": 0.04, "grad_norm": 0.451171875, "learning_rate": 2.56964656964657e-05, "loss": 2.1704, "step": 2060 }, { "epoch": 0.04, "grad_norm": 0.45703125, "learning_rate": 2.5821205821205825e-05, "loss": 2.2034, "step": 2070 }, { "epoch": 0.04, "grad_norm": 0.44140625, "learning_rate": 2.594594594594595e-05, "loss": 2.1839, "step": 2080 }, { "epoch": 0.04, "grad_norm": 0.474609375, "learning_rate": 2.6070686070686074e-05, "loss": 2.1888, "step": 2090 }, { "epoch": 0.04, "grad_norm": 0.44140625, "learning_rate": 2.6195426195426196e-05, "loss": 2.1959, "step": 2100 }, { "epoch": 0.04, "grad_norm": 0.439453125, "learning_rate": 2.632016632016632e-05, "loss": 2.1911, "step": 2110 }, { "epoch": 0.04, "grad_norm": 0.453125, "learning_rate": 2.6444906444906445e-05, "loss": 2.1735, "step": 2120 }, { "epoch": 0.04, "grad_norm": 0.51171875, "learning_rate": 2.656964656964657e-05, "loss": 2.2117, "step": 2130 }, { "epoch": 0.04, "grad_norm": 0.4765625, "learning_rate": 2.6694386694386695e-05, "loss": 2.1977, "step": 2140 }, { "epoch": 0.04, "grad_norm": 0.51171875, "learning_rate": 2.681912681912682e-05, "loss": 2.1918, "step": 2150 }, { "epoch": 0.04, "grad_norm": 0.4453125, "learning_rate": 2.6943866943866945e-05, "loss": 2.2017, "step": 2160 }, { "epoch": 0.05, "grad_norm": 0.439453125, "learning_rate": 2.706860706860707e-05, "loss": 2.1722, "step": 2170 }, { "epoch": 0.05, "grad_norm": 0.447265625, "learning_rate": 2.7193347193347194e-05, "loss": 2.2026, "step": 2180 }, { "epoch": 0.05, "grad_norm": 0.4375, "learning_rate": 2.731808731808732e-05, "loss": 2.2064, "step": 2190 }, { "epoch": 0.05, "grad_norm": 0.59765625, "learning_rate": 2.7442827442827444e-05, "loss": 2.1722, "step": 2200 }, { "epoch": 0.05, "grad_norm": 0.46875, "learning_rate": 2.756756756756757e-05, "loss": 2.2206, "step": 2210 }, { "epoch": 0.05, "grad_norm": 0.451171875, "learning_rate": 2.7692307692307694e-05, "loss": 2.2084, "step": 2220 }, { "epoch": 0.05, "grad_norm": 0.4765625, "learning_rate": 2.781704781704782e-05, "loss": 2.1849, "step": 2230 }, { "epoch": 0.05, "grad_norm": 0.427734375, "learning_rate": 2.7941787941787943e-05, "loss": 2.1716, "step": 2240 }, { "epoch": 0.05, "grad_norm": 0.44140625, "learning_rate": 2.8066528066528068e-05, "loss": 2.1911, "step": 2250 }, { "epoch": 0.05, "grad_norm": 0.44140625, "learning_rate": 2.8191268191268193e-05, "loss": 2.2008, "step": 2260 }, { "epoch": 0.05, "grad_norm": 0.4375, "learning_rate": 2.8316008316008318e-05, "loss": 2.1709, "step": 2270 }, { "epoch": 0.05, "grad_norm": 0.427734375, "learning_rate": 2.8440748440748442e-05, "loss": 2.1745, "step": 2280 }, { "epoch": 0.05, "grad_norm": 0.44140625, "learning_rate": 2.8565488565488567e-05, "loss": 2.1732, "step": 2290 }, { "epoch": 0.05, "grad_norm": 0.46484375, "learning_rate": 2.8690228690228692e-05, "loss": 2.1823, "step": 2300 }, { "epoch": 0.05, "grad_norm": 0.4375, "learning_rate": 2.8814968814968817e-05, "loss": 2.1953, "step": 2310 }, { "epoch": 0.05, "grad_norm": 0.482421875, "learning_rate": 2.893970893970894e-05, "loss": 2.1624, "step": 2320 }, { "epoch": 0.05, "grad_norm": 0.4609375, "learning_rate": 2.9064449064449066e-05, "loss": 2.2022, "step": 2330 }, { "epoch": 0.05, "grad_norm": 0.63671875, "learning_rate": 2.918918918918919e-05, "loss": 2.1941, "step": 2340 }, { "epoch": 0.05, "grad_norm": 0.45703125, "learning_rate": 2.9313929313929316e-05, "loss": 2.1828, "step": 2350 }, { "epoch": 0.05, "grad_norm": 0.44921875, "learning_rate": 2.943866943866944e-05, "loss": 2.1982, "step": 2360 }, { "epoch": 0.05, "grad_norm": 0.435546875, "learning_rate": 2.9563409563409566e-05, "loss": 2.1808, "step": 2370 }, { "epoch": 0.05, "grad_norm": 0.4375, "learning_rate": 2.968814968814969e-05, "loss": 2.1894, "step": 2380 }, { "epoch": 0.05, "grad_norm": 0.88671875, "learning_rate": 2.9812889812889815e-05, "loss": 2.175, "step": 2390 }, { "epoch": 0.05, "grad_norm": 0.435546875, "learning_rate": 2.993762993762994e-05, "loss": 2.194, "step": 2400 }, { "epoch": 0.05, "grad_norm": 0.4609375, "learning_rate": 2.9999999113734896e-05, "loss": 2.2173, "step": 2410 }, { "epoch": 0.05, "grad_norm": 0.44921875, "learning_rate": 2.9999992023614688e-05, "loss": 2.2032, "step": 2420 }, { "epoch": 0.05, "grad_norm": 0.56640625, "learning_rate": 2.9999977843377625e-05, "loss": 2.1914, "step": 2430 }, { "epoch": 0.05, "grad_norm": 0.435546875, "learning_rate": 2.999995657303041e-05, "loss": 2.1739, "step": 2440 }, { "epoch": 0.05, "grad_norm": 0.43359375, "learning_rate": 2.9999928212583095e-05, "loss": 2.2069, "step": 2450 }, { "epoch": 0.05, "grad_norm": 0.431640625, "learning_rate": 2.9999892762049083e-05, "loss": 2.173, "step": 2460 }, { "epoch": 0.05, "grad_norm": 0.46875, "learning_rate": 2.999985022144514e-05, "loss": 2.1694, "step": 2470 }, { "epoch": 0.05, "grad_norm": 0.45703125, "learning_rate": 2.9999800590791363e-05, "loss": 2.1733, "step": 2480 }, { "epoch": 0.05, "grad_norm": 0.94921875, "learning_rate": 2.9999743870111215e-05, "loss": 2.191, "step": 2490 }, { "epoch": 0.05, "grad_norm": 0.4609375, "learning_rate": 2.999968005943151e-05, "loss": 2.1743, "step": 2500 }, { "epoch": 0.05, "eval_accuracy": 0.5478597304024249, "eval_loss": 2.0514397621154785, "eval_runtime": 16.4515, "eval_samples_per_second": 36.167, "eval_steps_per_second": 1.155, "step": 2500 }, { "epoch": 0.05, "grad_norm": 0.44921875, "learning_rate": 2.9999609158782405e-05, "loss": 2.2029, "step": 2510 }, { "epoch": 0.05, "grad_norm": 0.44921875, "learning_rate": 2.9999531168197414e-05, "loss": 2.1817, "step": 2520 }, { "epoch": 0.05, "grad_norm": 0.640625, "learning_rate": 2.99994460877134e-05, "loss": 2.2111, "step": 2530 }, { "epoch": 0.05, "grad_norm": 0.4453125, "learning_rate": 2.9999353917370584e-05, "loss": 2.1583, "step": 2540 }, { "epoch": 0.05, "grad_norm": 0.427734375, "learning_rate": 2.999925465721253e-05, "loss": 2.1684, "step": 2550 }, { "epoch": 0.05, "grad_norm": 0.44140625, "learning_rate": 2.999914830728615e-05, "loss": 2.1606, "step": 2560 }, { "epoch": 0.05, "grad_norm": 0.439453125, "learning_rate": 2.999903486764172e-05, "loss": 2.1909, "step": 2570 }, { "epoch": 0.05, "grad_norm": 0.478515625, "learning_rate": 2.9998914338332854e-05, "loss": 2.1801, "step": 2580 }, { "epoch": 0.05, "grad_norm": 0.45703125, "learning_rate": 2.9998786719416534e-05, "loss": 2.208, "step": 2590 }, { "epoch": 0.05, "grad_norm": 0.458984375, "learning_rate": 2.9998652010953068e-05, "loss": 2.1974, "step": 2600 }, { "epoch": 0.05, "grad_norm": 0.4453125, "learning_rate": 2.9998510213006138e-05, "loss": 2.1862, "step": 2610 }, { "epoch": 0.05, "grad_norm": 0.45703125, "learning_rate": 2.999836132564277e-05, "loss": 2.1778, "step": 2620 }, { "epoch": 0.05, "grad_norm": 0.458984375, "learning_rate": 2.9998205348933333e-05, "loss": 2.1937, "step": 2630 }, { "epoch": 0.05, "grad_norm": 0.46484375, "learning_rate": 2.9998042282951557e-05, "loss": 2.1744, "step": 2640 }, { "epoch": 0.06, "grad_norm": 0.453125, "learning_rate": 2.999787212777452e-05, "loss": 2.2019, "step": 2650 }, { "epoch": 0.06, "grad_norm": 0.5, "learning_rate": 2.999769488348264e-05, "loss": 2.1894, "step": 2660 }, { "epoch": 0.06, "grad_norm": 0.443359375, "learning_rate": 2.9997510550159716e-05, "loss": 2.1506, "step": 2670 }, { "epoch": 0.06, "grad_norm": 0.51171875, "learning_rate": 2.9997319127892858e-05, "loss": 2.1915, "step": 2680 }, { "epoch": 0.06, "grad_norm": 0.451171875, "learning_rate": 2.999712061677256e-05, "loss": 2.2194, "step": 2690 }, { "epoch": 0.06, "grad_norm": 0.48828125, "learning_rate": 2.9996915016892644e-05, "loss": 2.207, "step": 2700 }, { "epoch": 0.06, "grad_norm": 0.44921875, "learning_rate": 2.999670232835029e-05, "loss": 2.1725, "step": 2710 }, { "epoch": 0.06, "grad_norm": 0.43359375, "learning_rate": 2.9996482551246048e-05, "loss": 2.1666, "step": 2720 }, { "epoch": 0.06, "grad_norm": 0.419921875, "learning_rate": 2.999625568568378e-05, "loss": 2.186, "step": 2730 }, { "epoch": 0.06, "grad_norm": 0.45703125, "learning_rate": 2.999602173177073e-05, "loss": 2.1696, "step": 2740 }, { "epoch": 0.06, "grad_norm": 3.015625, "learning_rate": 2.9995780689617485e-05, "loss": 2.1701, "step": 2750 }, { "epoch": 0.06, "grad_norm": 0.44921875, "learning_rate": 2.999553255933797e-05, "loss": 2.1956, "step": 2760 }, { "epoch": 0.06, "grad_norm": 0.455078125, "learning_rate": 2.999527734104948e-05, "loss": 2.1865, "step": 2770 }, { "epoch": 0.06, "grad_norm": 0.427734375, "learning_rate": 2.9995015034872642e-05, "loss": 2.1913, "step": 2780 }, { "epoch": 0.06, "grad_norm": 0.51171875, "learning_rate": 2.9994745640931443e-05, "loss": 2.2192, "step": 2790 }, { "epoch": 0.06, "grad_norm": 0.46484375, "learning_rate": 2.999446915935322e-05, "loss": 2.1883, "step": 2800 }, { "epoch": 0.06, "grad_norm": 0.486328125, "learning_rate": 2.9994185590268666e-05, "loss": 2.1691, "step": 2810 }, { "epoch": 0.06, "grad_norm": 0.439453125, "learning_rate": 2.9993894933811806e-05, "loss": 2.1925, "step": 2820 }, { "epoch": 0.06, "grad_norm": 0.46484375, "learning_rate": 2.999359719012003e-05, "loss": 2.1876, "step": 2830 }, { "epoch": 0.06, "grad_norm": 0.41796875, "learning_rate": 2.9993292359334078e-05, "loss": 2.1517, "step": 2840 }, { "epoch": 0.06, "grad_norm": 0.439453125, "learning_rate": 2.9992980441598026e-05, "loss": 2.1792, "step": 2850 }, { "epoch": 0.06, "grad_norm": 0.44140625, "learning_rate": 2.999266143705932e-05, "loss": 2.181, "step": 2860 }, { "epoch": 0.06, "grad_norm": 0.44140625, "learning_rate": 2.9992335345868743e-05, "loss": 2.1869, "step": 2870 }, { "epoch": 0.06, "grad_norm": 0.4609375, "learning_rate": 2.9992002168180424e-05, "loss": 2.1378, "step": 2880 }, { "epoch": 0.06, "grad_norm": 0.4765625, "learning_rate": 2.999166190415185e-05, "loss": 2.1861, "step": 2890 }, { "epoch": 0.06, "grad_norm": 0.45703125, "learning_rate": 2.999131455394386e-05, "loss": 2.1751, "step": 2900 }, { "epoch": 0.06, "grad_norm": 0.4765625, "learning_rate": 2.9990960117720638e-05, "loss": 2.1862, "step": 2910 }, { "epoch": 0.06, "grad_norm": 0.431640625, "learning_rate": 2.9990598595649708e-05, "loss": 2.1737, "step": 2920 }, { "epoch": 0.06, "grad_norm": 0.4375, "learning_rate": 2.999022998790196e-05, "loss": 2.175, "step": 2930 }, { "epoch": 0.06, "grad_norm": 0.44921875, "learning_rate": 2.9989854294651622e-05, "loss": 2.1782, "step": 2940 }, { "epoch": 0.06, "grad_norm": 0.451171875, "learning_rate": 2.998947151607628e-05, "loss": 2.1847, "step": 2950 }, { "epoch": 0.06, "grad_norm": 0.478515625, "learning_rate": 2.9989081652356853e-05, "loss": 2.177, "step": 2960 }, { "epoch": 0.06, "grad_norm": 0.443359375, "learning_rate": 2.998868470367763e-05, "loss": 2.1798, "step": 2970 }, { "epoch": 0.06, "grad_norm": 0.431640625, "learning_rate": 2.998828067022624e-05, "loss": 2.1934, "step": 2980 }, { "epoch": 0.06, "grad_norm": 0.42578125, "learning_rate": 2.9987869552193653e-05, "loss": 2.2019, "step": 2990 }, { "epoch": 0.06, "grad_norm": 0.453125, "learning_rate": 2.998745134977419e-05, "loss": 2.1885, "step": 3000 }, { "epoch": 0.06, "eval_accuracy": 0.5495206880406122, "eval_loss": 2.0427331924438477, "eval_runtime": 16.4333, "eval_samples_per_second": 36.207, "eval_steps_per_second": 1.156, "step": 3000 }, { "epoch": 0.06, "grad_norm": 0.44140625, "learning_rate": 2.9987026063165534e-05, "loss": 2.16, "step": 3010 }, { "epoch": 0.06, "grad_norm": 0.44921875, "learning_rate": 2.9986593692568706e-05, "loss": 2.2021, "step": 3020 }, { "epoch": 0.06, "grad_norm": 0.439453125, "learning_rate": 2.9986154238188072e-05, "loss": 2.1832, "step": 3030 }, { "epoch": 0.06, "grad_norm": 0.4609375, "learning_rate": 2.9985707700231352e-05, "loss": 2.1827, "step": 3040 }, { "epoch": 0.06, "grad_norm": 0.427734375, "learning_rate": 2.998525407890962e-05, "loss": 2.1906, "step": 3050 }, { "epoch": 0.06, "grad_norm": 0.443359375, "learning_rate": 2.9984793374437283e-05, "loss": 2.1986, "step": 3060 }, { "epoch": 0.06, "grad_norm": 0.44140625, "learning_rate": 2.9984325587032103e-05, "loss": 2.1927, "step": 3070 }, { "epoch": 0.06, "grad_norm": 0.45703125, "learning_rate": 2.99838507169152e-05, "loss": 2.1772, "step": 3080 }, { "epoch": 0.06, "grad_norm": 0.45703125, "learning_rate": 2.998336876431103e-05, "loss": 2.1857, "step": 3090 }, { "epoch": 0.06, "grad_norm": 0.466796875, "learning_rate": 2.9982879729447396e-05, "loss": 2.1608, "step": 3100 }, { "epoch": 0.06, "grad_norm": 0.45703125, "learning_rate": 2.9982383612555453e-05, "loss": 2.1536, "step": 3110 }, { "epoch": 0.06, "grad_norm": 0.443359375, "learning_rate": 2.9981880413869707e-05, "loss": 2.1958, "step": 3120 }, { "epoch": 0.07, "grad_norm": 0.44140625, "learning_rate": 2.9981370133628008e-05, "loss": 2.196, "step": 3130 }, { "epoch": 0.07, "grad_norm": 0.458984375, "learning_rate": 2.9980852772071543e-05, "loss": 2.1935, "step": 3140 }, { "epoch": 0.07, "grad_norm": 0.45703125, "learning_rate": 2.9980328329444867e-05, "loss": 2.1475, "step": 3150 }, { "epoch": 0.07, "grad_norm": 0.451171875, "learning_rate": 2.9979796805995864e-05, "loss": 2.1844, "step": 3160 }, { "epoch": 0.07, "grad_norm": 0.44140625, "learning_rate": 2.9979258201975777e-05, "loss": 2.208, "step": 3170 }, { "epoch": 0.07, "grad_norm": 0.59375, "learning_rate": 2.9978712517639182e-05, "loss": 2.1819, "step": 3180 }, { "epoch": 0.07, "grad_norm": 0.439453125, "learning_rate": 2.9978159753244016e-05, "loss": 2.1717, "step": 3190 }, { "epoch": 0.07, "grad_norm": 0.44140625, "learning_rate": 2.9977599909051558e-05, "loss": 2.1665, "step": 3200 }, { "epoch": 0.07, "grad_norm": 0.427734375, "learning_rate": 2.9977032985326433e-05, "loss": 2.1821, "step": 3210 }, { "epoch": 0.07, "grad_norm": 0.458984375, "learning_rate": 2.9976458982336606e-05, "loss": 2.1882, "step": 3220 }, { "epoch": 0.07, "grad_norm": 0.435546875, "learning_rate": 2.9975877900353396e-05, "loss": 2.1881, "step": 3230 }, { "epoch": 0.07, "grad_norm": 0.435546875, "learning_rate": 2.9975289739651468e-05, "loss": 2.1546, "step": 3240 }, { "epoch": 0.07, "grad_norm": 0.451171875, "learning_rate": 2.997469450050883e-05, "loss": 2.1857, "step": 3250 }, { "epoch": 0.07, "grad_norm": 0.453125, "learning_rate": 2.9974092183206833e-05, "loss": 2.1829, "step": 3260 }, { "epoch": 0.07, "grad_norm": 0.4609375, "learning_rate": 2.997348278803018e-05, "loss": 2.1552, "step": 3270 }, { "epoch": 0.07, "grad_norm": 0.4609375, "learning_rate": 2.997286631526692e-05, "loss": 2.1917, "step": 3280 }, { "epoch": 0.07, "grad_norm": 0.43359375, "learning_rate": 2.9972242765208432e-05, "loss": 2.1862, "step": 3290 }, { "epoch": 0.07, "grad_norm": 0.435546875, "learning_rate": 2.997161213814947e-05, "loss": 2.178, "step": 3300 }, { "epoch": 0.07, "grad_norm": 0.427734375, "learning_rate": 2.9970974434388105e-05, "loss": 2.1697, "step": 3310 }, { "epoch": 0.07, "grad_norm": 0.435546875, "learning_rate": 2.997032965422576e-05, "loss": 2.162, "step": 3320 }, { "epoch": 0.07, "grad_norm": 0.4453125, "learning_rate": 2.996967779796721e-05, "loss": 2.1701, "step": 3330 }, { "epoch": 0.07, "grad_norm": 0.71875, "learning_rate": 2.9969018865920576e-05, "loss": 2.1776, "step": 3340 }, { "epoch": 0.07, "grad_norm": 0.435546875, "learning_rate": 2.9968352858397312e-05, "loss": 2.1803, "step": 3350 }, { "epoch": 0.07, "grad_norm": 0.4375, "learning_rate": 2.996767977571223e-05, "loss": 2.173, "step": 3360 }, { "epoch": 0.07, "grad_norm": 0.4453125, "learning_rate": 2.996699961818347e-05, "loss": 2.1556, "step": 3370 }, { "epoch": 0.07, "grad_norm": 0.443359375, "learning_rate": 2.9966312386132533e-05, "loss": 2.1581, "step": 3380 }, { "epoch": 0.07, "grad_norm": 0.416015625, "learning_rate": 2.996561807988425e-05, "loss": 2.1964, "step": 3390 }, { "epoch": 0.07, "grad_norm": 0.45703125, "learning_rate": 2.9964916699766805e-05, "loss": 2.1648, "step": 3400 }, { "epoch": 0.07, "grad_norm": 0.443359375, "learning_rate": 2.9964208246111723e-05, "loss": 2.208, "step": 3410 }, { "epoch": 0.07, "grad_norm": 0.431640625, "learning_rate": 2.9963492719253873e-05, "loss": 2.1908, "step": 3420 }, { "epoch": 0.07, "grad_norm": 0.416015625, "learning_rate": 2.9962770119531465e-05, "loss": 2.1643, "step": 3430 }, { "epoch": 0.07, "grad_norm": 0.51171875, "learning_rate": 2.9962040447286053e-05, "loss": 2.1964, "step": 3440 }, { "epoch": 0.07, "grad_norm": 0.462890625, "learning_rate": 2.9961303702862538e-05, "loss": 2.2022, "step": 3450 }, { "epoch": 0.07, "grad_norm": 0.45703125, "learning_rate": 2.9960559886609153e-05, "loss": 2.1782, "step": 3460 }, { "epoch": 0.07, "grad_norm": 0.625, "learning_rate": 2.9959808998877487e-05, "loss": 2.1755, "step": 3470 }, { "epoch": 0.07, "grad_norm": 0.443359375, "learning_rate": 2.9959051040022468e-05, "loss": 2.1573, "step": 3480 }, { "epoch": 0.07, "grad_norm": 0.453125, "learning_rate": 2.995828601040236e-05, "loss": 2.1782, "step": 3490 }, { "epoch": 0.07, "grad_norm": 0.427734375, "learning_rate": 2.9957513910378772e-05, "loss": 2.1883, "step": 3500 }, { "epoch": 0.07, "eval_accuracy": 0.5508530685001273, "eval_loss": 2.035475969314575, "eval_runtime": 16.4438, "eval_samples_per_second": 36.184, "eval_steps_per_second": 1.155, "step": 3500 }, { "epoch": 0.07, "grad_norm": 0.458984375, "learning_rate": 2.995673474031666e-05, "loss": 2.1762, "step": 3510 }, { "epoch": 0.07, "grad_norm": 0.431640625, "learning_rate": 2.9955948500584312e-05, "loss": 2.1798, "step": 3520 }, { "epoch": 0.07, "grad_norm": 0.482421875, "learning_rate": 2.995515519155337e-05, "loss": 2.1742, "step": 3530 }, { "epoch": 0.07, "grad_norm": 0.427734375, "learning_rate": 2.995435481359881e-05, "loss": 2.1825, "step": 3540 }, { "epoch": 0.07, "grad_norm": 0.42578125, "learning_rate": 2.9953547367098945e-05, "loss": 2.1573, "step": 3550 }, { "epoch": 0.07, "grad_norm": 0.45703125, "learning_rate": 2.995273285243544e-05, "loss": 2.1711, "step": 3560 }, { "epoch": 0.07, "grad_norm": 0.431640625, "learning_rate": 2.99519112699933e-05, "loss": 2.1965, "step": 3570 }, { "epoch": 0.07, "grad_norm": 0.42578125, "learning_rate": 2.9951082620160857e-05, "loss": 2.1536, "step": 3580 }, { "epoch": 0.07, "grad_norm": 0.447265625, "learning_rate": 2.9950246903329795e-05, "loss": 2.1725, "step": 3590 }, { "epoch": 0.07, "grad_norm": 0.4609375, "learning_rate": 2.9949404119895138e-05, "loss": 2.1544, "step": 3600 }, { "epoch": 0.08, "grad_norm": 0.435546875, "learning_rate": 2.9948554270255248e-05, "loss": 2.1752, "step": 3610 }, { "epoch": 0.08, "grad_norm": 0.43359375, "learning_rate": 2.9947697354811822e-05, "loss": 2.1633, "step": 3620 }, { "epoch": 0.08, "grad_norm": 0.46484375, "learning_rate": 2.9946833373969915e-05, "loss": 2.1848, "step": 3630 }, { "epoch": 0.08, "grad_norm": 0.458984375, "learning_rate": 2.9945962328137898e-05, "loss": 2.1922, "step": 3640 }, { "epoch": 0.08, "grad_norm": 0.455078125, "learning_rate": 2.99450842177275e-05, "loss": 2.1747, "step": 3650 }, { "epoch": 0.08, "grad_norm": 0.427734375, "learning_rate": 2.9944199043153768e-05, "loss": 2.1722, "step": 3660 }, { "epoch": 0.08, "grad_norm": 0.44140625, "learning_rate": 2.994330680483512e-05, "loss": 2.1634, "step": 3670 }, { "epoch": 0.08, "grad_norm": 0.44921875, "learning_rate": 2.994240750319328e-05, "loss": 2.1441, "step": 3680 }, { "epoch": 0.08, "grad_norm": 0.462890625, "learning_rate": 2.994150113865333e-05, "loss": 2.1594, "step": 3690 }, { "epoch": 0.08, "grad_norm": 0.46875, "learning_rate": 2.9940587711643693e-05, "loss": 2.1746, "step": 3700 }, { "epoch": 0.08, "grad_norm": 0.419921875, "learning_rate": 2.993966722259611e-05, "loss": 2.1848, "step": 3710 }, { "epoch": 0.08, "grad_norm": 0.482421875, "learning_rate": 2.9938739671945682e-05, "loss": 2.1767, "step": 3720 }, { "epoch": 0.08, "grad_norm": 0.44921875, "learning_rate": 2.9937805060130832e-05, "loss": 2.1459, "step": 3730 }, { "epoch": 0.08, "grad_norm": 0.44921875, "learning_rate": 2.9936863387593335e-05, "loss": 2.1789, "step": 3740 }, { "epoch": 0.08, "grad_norm": 0.46484375, "learning_rate": 2.9935914654778285e-05, "loss": 2.1755, "step": 3750 }, { "epoch": 0.08, "grad_norm": 0.4375, "learning_rate": 2.9934958862134138e-05, "loss": 2.1622, "step": 3760 }, { "epoch": 0.08, "grad_norm": 0.439453125, "learning_rate": 2.993399601011266e-05, "loss": 2.1468, "step": 3770 }, { "epoch": 0.08, "grad_norm": 0.447265625, "learning_rate": 2.993302609916898e-05, "loss": 2.1678, "step": 3780 }, { "epoch": 0.08, "grad_norm": 0.42578125, "learning_rate": 2.9932049129761536e-05, "loss": 2.1863, "step": 3790 }, { "epoch": 0.08, "grad_norm": 0.431640625, "learning_rate": 2.9931065102352127e-05, "loss": 2.1686, "step": 3800 }, { "epoch": 0.08, "grad_norm": 0.46875, "learning_rate": 2.9930074017405872e-05, "loss": 2.1671, "step": 3810 }, { "epoch": 0.08, "grad_norm": 0.435546875, "learning_rate": 2.9929075875391237e-05, "loss": 2.1884, "step": 3820 }, { "epoch": 0.08, "grad_norm": 0.427734375, "learning_rate": 2.9928070676780016e-05, "loss": 2.1645, "step": 3830 }, { "epoch": 0.08, "grad_norm": 0.4453125, "learning_rate": 2.992705842204734e-05, "loss": 2.1535, "step": 3840 }, { "epoch": 0.08, "grad_norm": 0.5390625, "learning_rate": 2.9926039111671676e-05, "loss": 2.1627, "step": 3850 }, { "epoch": 0.08, "grad_norm": 0.421875, "learning_rate": 2.992501274613483e-05, "loss": 2.1422, "step": 3860 }, { "epoch": 0.08, "grad_norm": 0.41796875, "learning_rate": 2.9923979325921936e-05, "loss": 2.2044, "step": 3870 }, { "epoch": 0.08, "grad_norm": 0.50390625, "learning_rate": 2.992293885152146e-05, "loss": 2.2004, "step": 3880 }, { "epoch": 0.08, "grad_norm": 0.458984375, "learning_rate": 2.9921891323425224e-05, "loss": 2.1717, "step": 3890 }, { "epoch": 0.08, "grad_norm": 0.423828125, "learning_rate": 2.9920836742128352e-05, "loss": 2.1866, "step": 3900 }, { "epoch": 0.08, "grad_norm": 0.4296875, "learning_rate": 2.9919775108129325e-05, "loss": 2.1442, "step": 3910 }, { "epoch": 0.08, "grad_norm": 0.42578125, "learning_rate": 2.9918706421929945e-05, "loss": 2.1765, "step": 3920 }, { "epoch": 0.08, "grad_norm": 0.427734375, "learning_rate": 2.991763068403536e-05, "loss": 2.1461, "step": 3930 }, { "epoch": 0.08, "grad_norm": 0.439453125, "learning_rate": 2.9916547894954042e-05, "loss": 2.1789, "step": 3940 }, { "epoch": 0.08, "grad_norm": 0.431640625, "learning_rate": 2.9915458055197797e-05, "loss": 2.1713, "step": 3950 }, { "epoch": 0.08, "grad_norm": 0.4296875, "learning_rate": 2.9914361165281765e-05, "loss": 2.178, "step": 3960 }, { "epoch": 0.08, "grad_norm": 0.44921875, "learning_rate": 2.9913257225724417e-05, "loss": 2.1776, "step": 3970 }, { "epoch": 0.08, "grad_norm": 0.447265625, "learning_rate": 2.991214623704756e-05, "loss": 2.169, "step": 3980 }, { "epoch": 0.08, "grad_norm": 0.4296875, "learning_rate": 2.991102819977633e-05, "loss": 2.1448, "step": 3990 }, { "epoch": 0.08, "grad_norm": 0.57421875, "learning_rate": 2.990990311443919e-05, "loss": 2.1954, "step": 4000 }, { "epoch": 0.08, "eval_accuracy": 0.5519078012436647, "eval_loss": 2.029829263687134, "eval_runtime": 16.4327, "eval_samples_per_second": 36.208, "eval_steps_per_second": 1.156, "step": 4000 }, { "epoch": 0.08, "grad_norm": 0.435546875, "learning_rate": 2.9908770981567944e-05, "loss": 2.1643, "step": 4010 }, { "epoch": 0.08, "grad_norm": 0.4453125, "learning_rate": 2.9907631801697724e-05, "loss": 2.169, "step": 4020 }, { "epoch": 0.08, "grad_norm": 0.43359375, "learning_rate": 2.990648557536699e-05, "loss": 2.1123, "step": 4030 }, { "epoch": 0.08, "grad_norm": 0.470703125, "learning_rate": 2.990533230311753e-05, "loss": 2.1751, "step": 4040 }, { "epoch": 0.08, "grad_norm": 0.453125, "learning_rate": 2.990417198549447e-05, "loss": 2.1754, "step": 4050 }, { "epoch": 0.08, "grad_norm": 0.431640625, "learning_rate": 2.9903004623046264e-05, "loss": 2.1633, "step": 4060 }, { "epoch": 0.08, "grad_norm": 0.52734375, "learning_rate": 2.9901830216324694e-05, "loss": 2.203, "step": 4070 }, { "epoch": 0.08, "grad_norm": 0.453125, "learning_rate": 2.990064876588487e-05, "loss": 2.1776, "step": 4080 }, { "epoch": 0.09, "grad_norm": 0.443359375, "learning_rate": 2.9899460272285237e-05, "loss": 2.1538, "step": 4090 }, { "epoch": 0.09, "grad_norm": 0.4375, "learning_rate": 2.9898264736087565e-05, "loss": 2.1649, "step": 4100 }, { "epoch": 0.09, "grad_norm": 0.4609375, "learning_rate": 2.9897062157856955e-05, "loss": 2.1647, "step": 4110 }, { "epoch": 0.09, "grad_norm": 0.431640625, "learning_rate": 2.9895852538161826e-05, "loss": 2.1567, "step": 4120 }, { "epoch": 0.09, "grad_norm": 0.4375, "learning_rate": 2.989463587757395e-05, "loss": 2.1186, "step": 4130 }, { "epoch": 0.09, "grad_norm": 0.44921875, "learning_rate": 2.9893412176668396e-05, "loss": 2.1593, "step": 4140 }, { "epoch": 0.09, "grad_norm": 0.439453125, "learning_rate": 2.9892181436023592e-05, "loss": 2.1578, "step": 4150 }, { "epoch": 0.09, "grad_norm": 0.439453125, "learning_rate": 2.9890943656221262e-05, "loss": 2.2018, "step": 4160 }, { "epoch": 0.09, "grad_norm": 0.462890625, "learning_rate": 2.988969883784649e-05, "loss": 2.168, "step": 4170 }, { "epoch": 0.09, "grad_norm": 0.474609375, "learning_rate": 2.9888446981487656e-05, "loss": 2.1636, "step": 4180 }, { "epoch": 0.09, "grad_norm": 0.451171875, "learning_rate": 2.988718808773649e-05, "loss": 2.1722, "step": 4190 }, { "epoch": 0.09, "grad_norm": 0.46875, "learning_rate": 2.988592215718803e-05, "loss": 2.1896, "step": 4200 }, { "epoch": 0.09, "grad_norm": 0.470703125, "learning_rate": 2.9884649190440663e-05, "loss": 2.175, "step": 4210 }, { "epoch": 0.09, "grad_norm": 0.453125, "learning_rate": 2.9883369188096074e-05, "loss": 2.1928, "step": 4220 }, { "epoch": 0.09, "grad_norm": 0.478515625, "learning_rate": 2.9882082150759295e-05, "loss": 2.1761, "step": 4230 }, { "epoch": 0.09, "grad_norm": 0.421875, "learning_rate": 2.9880788079038675e-05, "loss": 2.1722, "step": 4240 }, { "epoch": 0.09, "grad_norm": 0.44140625, "learning_rate": 2.9879486973545892e-05, "loss": 2.1427, "step": 4250 }, { "epoch": 0.09, "grad_norm": 0.46484375, "learning_rate": 2.987817883489594e-05, "loss": 2.1735, "step": 4260 }, { "epoch": 0.09, "grad_norm": 0.439453125, "learning_rate": 2.9876863663707147e-05, "loss": 2.1916, "step": 4270 }, { "epoch": 0.09, "grad_norm": 0.423828125, "learning_rate": 2.9875541460601156e-05, "loss": 2.1778, "step": 4280 }, { "epoch": 0.09, "grad_norm": 0.44921875, "learning_rate": 2.9874212226202946e-05, "loss": 2.1595, "step": 4290 }, { "epoch": 0.09, "grad_norm": 0.443359375, "learning_rate": 2.987287596114081e-05, "loss": 2.1578, "step": 4300 }, { "epoch": 0.09, "grad_norm": 0.431640625, "learning_rate": 2.987153266604636e-05, "loss": 2.1737, "step": 4310 }, { "epoch": 0.09, "grad_norm": 0.455078125, "learning_rate": 2.9870182341554552e-05, "loss": 2.1528, "step": 4320 }, { "epoch": 0.09, "grad_norm": 0.4921875, "learning_rate": 2.9868824988303637e-05, "loss": 2.1525, "step": 4330 }, { "epoch": 0.09, "grad_norm": 0.443359375, "learning_rate": 2.986746060693521e-05, "loss": 2.1484, "step": 4340 }, { "epoch": 0.09, "grad_norm": 0.44140625, "learning_rate": 2.986608919809417e-05, "loss": 2.149, "step": 4350 }, { "epoch": 0.09, "grad_norm": 0.4375, "learning_rate": 2.9864710762428755e-05, "loss": 2.1657, "step": 4360 }, { "epoch": 0.09, "grad_norm": 0.466796875, "learning_rate": 2.9863325300590517e-05, "loss": 2.1317, "step": 4370 }, { "epoch": 0.09, "grad_norm": 0.44921875, "learning_rate": 2.9861932813234324e-05, "loss": 2.1697, "step": 4380 }, { "epoch": 0.09, "grad_norm": 0.4140625, "learning_rate": 2.9860533301018376e-05, "loss": 2.1679, "step": 4390 }, { "epoch": 0.09, "grad_norm": 0.458984375, "learning_rate": 2.9859126764604185e-05, "loss": 2.1864, "step": 4400 }, { "epoch": 0.09, "grad_norm": 0.5078125, "learning_rate": 2.9857713204656576e-05, "loss": 2.1578, "step": 4410 }, { "epoch": 0.09, "grad_norm": 0.447265625, "learning_rate": 2.985629262184371e-05, "loss": 2.153, "step": 4420 }, { "epoch": 0.09, "grad_norm": 0.44140625, "learning_rate": 2.985486501683707e-05, "loss": 2.1819, "step": 4430 }, { "epoch": 0.09, "grad_norm": 0.4140625, "learning_rate": 2.9853430390311434e-05, "loss": 2.1473, "step": 4440 }, { "epoch": 0.09, "grad_norm": 0.44140625, "learning_rate": 2.9851988742944916e-05, "loss": 2.1792, "step": 4450 }, { "epoch": 0.09, "grad_norm": 0.431640625, "learning_rate": 2.9850540075418957e-05, "loss": 2.1412, "step": 4460 }, { "epoch": 0.09, "grad_norm": 0.435546875, "learning_rate": 2.9849084388418293e-05, "loss": 2.1679, "step": 4470 }, { "epoch": 0.09, "grad_norm": 0.455078125, "learning_rate": 2.9847621682631e-05, "loss": 2.1457, "step": 4480 }, { "epoch": 0.09, "grad_norm": 0.427734375, "learning_rate": 2.9846151958748453e-05, "loss": 2.1461, "step": 4490 }, { "epoch": 0.09, "grad_norm": 0.6640625, "learning_rate": 2.984467521746536e-05, "loss": 2.1597, "step": 4500 }, { "epoch": 0.09, "eval_accuracy": 0.5525633127151154, "eval_loss": 2.025430202484131, "eval_runtime": 16.4538, "eval_samples_per_second": 36.162, "eval_steps_per_second": 1.155, "step": 4500 }, { "epoch": 0.09, "grad_norm": 0.451171875, "learning_rate": 2.9843191459479735e-05, "loss": 2.1518, "step": 4510 }, { "epoch": 0.09, "grad_norm": 0.439453125, "learning_rate": 2.9841700685492918e-05, "loss": 2.1793, "step": 4520 }, { "epoch": 0.09, "grad_norm": 0.451171875, "learning_rate": 2.9840202896209557e-05, "loss": 2.1419, "step": 4530 }, { "epoch": 0.09, "grad_norm": 0.435546875, "learning_rate": 2.983869809233762e-05, "loss": 2.1445, "step": 4540 }, { "epoch": 0.09, "grad_norm": 0.416015625, "learning_rate": 2.9837186274588388e-05, "loss": 2.1577, "step": 4550 }, { "epoch": 0.09, "grad_norm": 0.43359375, "learning_rate": 2.983566744367646e-05, "loss": 2.1443, "step": 4560 }, { "epoch": 0.1, "grad_norm": 0.421875, "learning_rate": 2.983414160031975e-05, "loss": 2.1516, "step": 4570 }, { "epoch": 0.1, "grad_norm": 0.44140625, "learning_rate": 2.9832608745239484e-05, "loss": 2.154, "step": 4580 }, { "epoch": 0.1, "grad_norm": 0.427734375, "learning_rate": 2.98310688791602e-05, "loss": 2.1485, "step": 4590 }, { "epoch": 0.1, "grad_norm": 0.44140625, "learning_rate": 2.9829522002809764e-05, "loss": 2.1782, "step": 4600 }, { "epoch": 0.1, "grad_norm": 0.43359375, "learning_rate": 2.9827968116919338e-05, "loss": 2.1538, "step": 4610 }, { "epoch": 0.1, "grad_norm": 0.4296875, "learning_rate": 2.982640722222341e-05, "loss": 2.1917, "step": 4620 }, { "epoch": 0.1, "grad_norm": 0.484375, "learning_rate": 2.9824839319459766e-05, "loss": 2.1765, "step": 4630 }, { "epoch": 0.1, "grad_norm": 0.453125, "learning_rate": 2.982326440936952e-05, "loss": 2.1733, "step": 4640 }, { "epoch": 0.1, "grad_norm": 0.416015625, "learning_rate": 2.982168249269709e-05, "loss": 2.1609, "step": 4650 }, { "epoch": 0.1, "grad_norm": 0.43359375, "learning_rate": 2.9820093570190217e-05, "loss": 2.1473, "step": 4660 }, { "epoch": 0.1, "grad_norm": 0.453125, "learning_rate": 2.981849764259993e-05, "loss": 2.1841, "step": 4670 }, { "epoch": 0.1, "grad_norm": 0.4609375, "learning_rate": 2.9816894710680595e-05, "loss": 2.1769, "step": 4680 }, { "epoch": 0.1, "grad_norm": 0.427734375, "learning_rate": 2.981528477518987e-05, "loss": 2.1594, "step": 4690 }, { "epoch": 0.1, "grad_norm": 0.4296875, "learning_rate": 2.9813667836888737e-05, "loss": 2.157, "step": 4700 }, { "epoch": 0.1, "grad_norm": 0.4609375, "learning_rate": 2.9812043896541477e-05, "loss": 2.1651, "step": 4710 }, { "epoch": 0.1, "grad_norm": 0.44140625, "learning_rate": 2.981041295491569e-05, "loss": 2.1504, "step": 4720 }, { "epoch": 0.1, "grad_norm": 0.42578125, "learning_rate": 2.9808775012782273e-05, "loss": 2.1422, "step": 4730 }, { "epoch": 0.1, "grad_norm": 0.53515625, "learning_rate": 2.9807130070915448e-05, "loss": 2.183, "step": 4740 }, { "epoch": 0.1, "grad_norm": 0.431640625, "learning_rate": 2.9805478130092735e-05, "loss": 2.1432, "step": 4750 }, { "epoch": 0.1, "grad_norm": 0.427734375, "learning_rate": 2.9803819191094962e-05, "loss": 2.1552, "step": 4760 }, { "epoch": 0.1, "grad_norm": 0.4453125, "learning_rate": 2.9802153254706272e-05, "loss": 2.1795, "step": 4770 }, { "epoch": 0.1, "grad_norm": 0.439453125, "learning_rate": 2.9800480321714107e-05, "loss": 2.1725, "step": 4780 }, { "epoch": 0.1, "grad_norm": 0.427734375, "learning_rate": 2.979880039290922e-05, "loss": 2.1677, "step": 4790 }, { "epoch": 0.1, "grad_norm": 0.44140625, "learning_rate": 2.9797113469085676e-05, "loss": 2.1296, "step": 4800 }, { "epoch": 0.1, "grad_norm": 0.46484375, "learning_rate": 2.9795419551040836e-05, "loss": 2.1279, "step": 4810 }, { "epoch": 0.1, "grad_norm": 0.4453125, "learning_rate": 2.9793718639575376e-05, "loss": 2.1503, "step": 4820 }, { "epoch": 0.1, "grad_norm": 0.48046875, "learning_rate": 2.979201073549327e-05, "loss": 2.1773, "step": 4830 }, { "epoch": 0.1, "grad_norm": 0.423828125, "learning_rate": 2.97902958396018e-05, "loss": 2.1607, "step": 4840 }, { "epoch": 0.1, "grad_norm": 0.431640625, "learning_rate": 2.978857395271156e-05, "loss": 2.1747, "step": 4850 }, { "epoch": 0.1, "grad_norm": 0.462890625, "learning_rate": 2.978684507563644e-05, "loss": 2.1379, "step": 4860 }, { "epoch": 0.1, "grad_norm": 0.44921875, "learning_rate": 2.9785109209193633e-05, "loss": 2.179, "step": 4870 }, { "epoch": 0.1, "grad_norm": 0.42578125, "learning_rate": 2.9783366354203646e-05, "loss": 2.1831, "step": 4880 }, { "epoch": 0.1, "grad_norm": 0.4375, "learning_rate": 2.9781616511490278e-05, "loss": 2.1485, "step": 4890 }, { "epoch": 0.1, "grad_norm": 0.466796875, "learning_rate": 2.977985968188063e-05, "loss": 2.1787, "step": 4900 }, { "epoch": 0.1, "grad_norm": 0.435546875, "learning_rate": 2.977809586620512e-05, "loss": 2.1773, "step": 4910 }, { "epoch": 0.1, "grad_norm": 0.4609375, "learning_rate": 2.9776325065297455e-05, "loss": 2.151, "step": 4920 }, { "epoch": 0.1, "grad_norm": 0.427734375, "learning_rate": 2.9774547279994648e-05, "loss": 2.1736, "step": 4930 }, { "epoch": 0.1, "grad_norm": 0.43359375, "learning_rate": 2.9772762511137018e-05, "loss": 2.1672, "step": 4940 }, { "epoch": 0.1, "grad_norm": 0.443359375, "learning_rate": 2.977097075956817e-05, "loss": 2.1245, "step": 4950 }, { "epoch": 0.1, "grad_norm": 0.435546875, "learning_rate": 2.976917202613503e-05, "loss": 2.1383, "step": 4960 }, { "epoch": 0.1, "grad_norm": 0.458984375, "learning_rate": 2.9767366311687802e-05, "loss": 2.1776, "step": 4970 }, { "epoch": 0.1, "grad_norm": 0.4453125, "learning_rate": 2.9765553617080016e-05, "loss": 2.1864, "step": 4980 }, { "epoch": 0.1, "grad_norm": 0.43359375, "learning_rate": 2.9763733943168478e-05, "loss": 2.123, "step": 4990 }, { "epoch": 0.1, "grad_norm": 0.4140625, "learning_rate": 2.97619072908133e-05, "loss": 2.1763, "step": 5000 }, { "epoch": 0.1, "eval_accuracy": 0.5531876093545923, "eval_loss": 2.0221822261810303, "eval_runtime": 16.4327, "eval_samples_per_second": 36.208, "eval_steps_per_second": 1.156, "step": 5000 }, { "epoch": 0.1, "grad_norm": 0.44921875, "learning_rate": 2.9760073660877904e-05, "loss": 2.1835, "step": 5010 }, { "epoch": 0.1, "grad_norm": 0.42578125, "learning_rate": 2.975823305422899e-05, "loss": 2.1592, "step": 5020 }, { "epoch": 0.1, "grad_norm": 0.431640625, "learning_rate": 2.9756385471736573e-05, "loss": 2.1762, "step": 5030 }, { "epoch": 0.1, "grad_norm": 0.45703125, "learning_rate": 2.9754530914273953e-05, "loss": 2.1572, "step": 5040 }, { "epoch": 0.1, "grad_norm": 0.4296875, "learning_rate": 2.9752669382717735e-05, "loss": 2.1721, "step": 5050 }, { "epoch": 0.11, "grad_norm": 0.4453125, "learning_rate": 2.975080087794782e-05, "loss": 2.1808, "step": 5060 }, { "epoch": 0.11, "grad_norm": 0.52734375, "learning_rate": 2.97489254008474e-05, "loss": 2.1674, "step": 5070 }, { "epoch": 0.11, "grad_norm": 0.421875, "learning_rate": 2.974704295230297e-05, "loss": 2.1795, "step": 5080 }, { "epoch": 0.11, "grad_norm": 0.439453125, "learning_rate": 2.974515353320431e-05, "loss": 2.1636, "step": 5090 }, { "epoch": 0.11, "grad_norm": 0.4296875, "learning_rate": 2.9743257144444504e-05, "loss": 2.1463, "step": 5100 }, { "epoch": 0.11, "grad_norm": 0.435546875, "learning_rate": 2.9741353786919925e-05, "loss": 2.1325, "step": 5110 }, { "epoch": 0.11, "grad_norm": 0.435546875, "learning_rate": 2.9739443461530248e-05, "loss": 2.1762, "step": 5120 }, { "epoch": 0.11, "grad_norm": 0.46484375, "learning_rate": 2.9737526169178425e-05, "loss": 2.156, "step": 5130 }, { "epoch": 0.11, "grad_norm": 0.484375, "learning_rate": 2.9735601910770723e-05, "loss": 2.1744, "step": 5140 }, { "epoch": 0.11, "grad_norm": 0.453125, "learning_rate": 2.9733670687216676e-05, "loss": 2.1716, "step": 5150 }, { "epoch": 0.11, "grad_norm": 0.455078125, "learning_rate": 2.973173249942914e-05, "loss": 2.163, "step": 5160 }, { "epoch": 0.11, "grad_norm": 0.435546875, "learning_rate": 2.9729787348324243e-05, "loss": 2.1575, "step": 5170 }, { "epoch": 0.11, "grad_norm": 0.44140625, "learning_rate": 2.97278352348214e-05, "loss": 2.137, "step": 5180 }, { "epoch": 0.11, "grad_norm": 0.482421875, "learning_rate": 2.9725876159843334e-05, "loss": 2.1418, "step": 5190 }, { "epoch": 0.11, "grad_norm": 0.43359375, "learning_rate": 2.972391012431605e-05, "loss": 2.1677, "step": 5200 }, { "epoch": 0.11, "grad_norm": 0.443359375, "learning_rate": 2.9721937129168837e-05, "loss": 2.1643, "step": 5210 }, { "epoch": 0.11, "grad_norm": 0.427734375, "learning_rate": 2.9719957175334288e-05, "loss": 2.1604, "step": 5220 }, { "epoch": 0.11, "grad_norm": 0.427734375, "learning_rate": 2.9717970263748272e-05, "loss": 2.1921, "step": 5230 }, { "epoch": 0.11, "grad_norm": 0.453125, "learning_rate": 2.9715976395349954e-05, "loss": 2.1282, "step": 5240 }, { "epoch": 0.11, "grad_norm": 0.462890625, "learning_rate": 2.9713975571081785e-05, "loss": 2.1811, "step": 5250 }, { "epoch": 0.11, "grad_norm": 0.6484375, "learning_rate": 2.97119677918895e-05, "loss": 2.1442, "step": 5260 }, { "epoch": 0.11, "grad_norm": 0.4296875, "learning_rate": 2.9709953058722134e-05, "loss": 2.1748, "step": 5270 }, { "epoch": 0.11, "grad_norm": 0.458984375, "learning_rate": 2.970793137253199e-05, "loss": 2.1495, "step": 5280 }, { "epoch": 0.11, "grad_norm": 0.44921875, "learning_rate": 2.970590273427468e-05, "loss": 2.1817, "step": 5290 }, { "epoch": 0.11, "grad_norm": 0.47265625, "learning_rate": 2.9703867144909077e-05, "loss": 2.134, "step": 5300 }, { "epoch": 0.11, "grad_norm": 0.435546875, "learning_rate": 2.970182460539736e-05, "loss": 2.1547, "step": 5310 }, { "epoch": 0.11, "grad_norm": 0.4453125, "learning_rate": 2.9699775116704987e-05, "loss": 2.1353, "step": 5320 }, { "epoch": 0.11, "grad_norm": 0.43359375, "learning_rate": 2.96977186798007e-05, "loss": 2.1861, "step": 5330 }, { "epoch": 0.11, "grad_norm": 0.443359375, "learning_rate": 2.9695655295656513e-05, "loss": 2.1293, "step": 5340 }, { "epoch": 0.11, "grad_norm": 0.46875, "learning_rate": 2.9693584965247748e-05, "loss": 2.1599, "step": 5350 }, { "epoch": 0.11, "grad_norm": 0.4296875, "learning_rate": 2.9691507689553e-05, "loss": 2.1656, "step": 5360 }, { "epoch": 0.11, "grad_norm": 0.45703125, "learning_rate": 2.968942346955413e-05, "loss": 2.1774, "step": 5370 }, { "epoch": 0.11, "grad_norm": 0.451171875, "learning_rate": 2.968733230623631e-05, "loss": 2.1422, "step": 5380 }, { "epoch": 0.11, "grad_norm": 0.4609375, "learning_rate": 2.968523420058797e-05, "loss": 2.1833, "step": 5390 }, { "epoch": 0.11, "grad_norm": 0.431640625, "learning_rate": 2.9683129153600843e-05, "loss": 2.1832, "step": 5400 }, { "epoch": 0.11, "grad_norm": 0.423828125, "learning_rate": 2.968101716626992e-05, "loss": 2.1365, "step": 5410 }, { "epoch": 0.11, "grad_norm": 0.451171875, "learning_rate": 2.9678898239593494e-05, "loss": 2.1635, "step": 5420 }, { "epoch": 0.11, "grad_norm": 0.466796875, "learning_rate": 2.9676772374573118e-05, "loss": 2.1438, "step": 5430 }, { "epoch": 0.11, "grad_norm": 0.439453125, "learning_rate": 2.9674639572213638e-05, "loss": 2.1447, "step": 5440 }, { "epoch": 0.11, "grad_norm": 0.4296875, "learning_rate": 2.9672499833523183e-05, "loss": 2.1231, "step": 5450 }, { "epoch": 0.11, "grad_norm": 0.435546875, "learning_rate": 2.9670353159513148e-05, "loss": 2.1225, "step": 5460 }, { "epoch": 0.11, "grad_norm": 0.458984375, "learning_rate": 2.9668199551198205e-05, "loss": 2.1661, "step": 5470 }, { "epoch": 0.11, "grad_norm": 0.45703125, "learning_rate": 2.966603900959632e-05, "loss": 2.1532, "step": 5480 }, { "epoch": 0.11, "grad_norm": 0.47265625, "learning_rate": 2.9663871535728725e-05, "loss": 2.1839, "step": 5490 }, { "epoch": 0.11, "grad_norm": 0.44140625, "learning_rate": 2.9661697130619924e-05, "loss": 2.1413, "step": 5500 }, { "epoch": 0.11, "eval_accuracy": 0.5540747677370068, "eval_loss": 2.0195205211639404, "eval_runtime": 16.4587, "eval_samples_per_second": 36.151, "eval_steps_per_second": 1.154, "step": 5500 }, { "epoch": 0.11, "grad_norm": 0.4609375, "learning_rate": 2.9659515795297703e-05, "loss": 2.1639, "step": 5510 }, { "epoch": 0.11, "grad_norm": 0.47265625, "learning_rate": 2.9657327530793137e-05, "loss": 2.1437, "step": 5520 }, { "epoch": 0.11, "grad_norm": 0.423828125, "learning_rate": 2.965513233814055e-05, "loss": 2.1736, "step": 5530 }, { "epoch": 0.12, "grad_norm": 0.4375, "learning_rate": 2.965293021837756e-05, "loss": 2.1426, "step": 5540 }, { "epoch": 0.12, "grad_norm": 0.462890625, "learning_rate": 2.9650721172545048e-05, "loss": 2.1912, "step": 5550 }, { "epoch": 0.12, "grad_norm": 0.43359375, "learning_rate": 2.9648505201687184e-05, "loss": 2.1682, "step": 5560 }, { "epoch": 0.12, "grad_norm": 0.55078125, "learning_rate": 2.964628230685139e-05, "loss": 2.1736, "step": 5570 }, { "epoch": 0.12, "grad_norm": 0.470703125, "learning_rate": 2.9644052489088376e-05, "loss": 2.1522, "step": 5580 }, { "epoch": 0.12, "grad_norm": 0.44140625, "learning_rate": 2.964181574945213e-05, "loss": 2.1567, "step": 5590 }, { "epoch": 0.12, "grad_norm": 0.455078125, "learning_rate": 2.9639572088999886e-05, "loss": 2.1937, "step": 5600 }, { "epoch": 0.12, "grad_norm": 0.427734375, "learning_rate": 2.9637321508792175e-05, "loss": 2.1349, "step": 5610 }, { "epoch": 0.12, "grad_norm": 0.439453125, "learning_rate": 2.963506400989279e-05, "loss": 2.1544, "step": 5620 }, { "epoch": 0.12, "grad_norm": 0.458984375, "learning_rate": 2.963279959336879e-05, "loss": 2.1762, "step": 5630 }, { "epoch": 0.12, "grad_norm": 0.4296875, "learning_rate": 2.963052826029051e-05, "loss": 2.1809, "step": 5640 }, { "epoch": 0.12, "grad_norm": 0.4296875, "learning_rate": 2.962825001173155e-05, "loss": 2.1406, "step": 5650 }, { "epoch": 0.12, "grad_norm": 0.435546875, "learning_rate": 2.9625964848768788e-05, "loss": 2.1486, "step": 5660 }, { "epoch": 0.12, "grad_norm": 0.443359375, "learning_rate": 2.9623672772482348e-05, "loss": 2.168, "step": 5670 }, { "epoch": 0.12, "grad_norm": 0.455078125, "learning_rate": 2.9621373783955647e-05, "loss": 2.1897, "step": 5680 }, { "epoch": 0.12, "grad_norm": 0.53515625, "learning_rate": 2.961906788427536e-05, "loss": 2.1471, "step": 5690 }, { "epoch": 0.12, "grad_norm": 0.427734375, "learning_rate": 2.9616755074531416e-05, "loss": 2.1711, "step": 5700 }, { "epoch": 0.12, "grad_norm": 0.4375, "learning_rate": 2.9614435355817035e-05, "loss": 2.1391, "step": 5710 }, { "epoch": 0.12, "grad_norm": 0.431640625, "learning_rate": 2.9612108729228678e-05, "loss": 2.1792, "step": 5720 }, { "epoch": 0.12, "grad_norm": 0.7421875, "learning_rate": 2.960977519586609e-05, "loss": 2.1334, "step": 5730 }, { "epoch": 0.12, "grad_norm": 0.453125, "learning_rate": 2.9607434756832273e-05, "loss": 2.1689, "step": 5740 }, { "epoch": 0.12, "grad_norm": 0.44921875, "learning_rate": 2.9605087413233494e-05, "loss": 2.1577, "step": 5750 }, { "epoch": 0.12, "grad_norm": 0.474609375, "learning_rate": 2.960273316617928e-05, "loss": 2.1605, "step": 5760 }, { "epoch": 0.12, "grad_norm": 0.4609375, "learning_rate": 2.9600372016782422e-05, "loss": 2.1539, "step": 5770 }, { "epoch": 0.12, "grad_norm": 0.4296875, "learning_rate": 2.959800396615898e-05, "loss": 2.1924, "step": 5780 }, { "epoch": 0.12, "grad_norm": 0.5390625, "learning_rate": 2.959562901542827e-05, "loss": 2.152, "step": 5790 }, { "epoch": 0.12, "grad_norm": 0.427734375, "learning_rate": 2.959324716571287e-05, "loss": 2.1738, "step": 5800 }, { "epoch": 0.12, "grad_norm": 0.451171875, "learning_rate": 2.959085841813862e-05, "loss": 2.1182, "step": 5810 }, { "epoch": 0.12, "grad_norm": 0.44921875, "learning_rate": 2.9588462773834626e-05, "loss": 2.1577, "step": 5820 }, { "epoch": 0.12, "grad_norm": 0.435546875, "learning_rate": 2.958606023393324e-05, "loss": 2.1534, "step": 5830 }, { "epoch": 0.12, "grad_norm": 0.44921875, "learning_rate": 2.958365079957009e-05, "loss": 2.1774, "step": 5840 }, { "epoch": 0.12, "grad_norm": 0.43359375, "learning_rate": 2.9581234471884047e-05, "loss": 2.1479, "step": 5850 }, { "epoch": 0.12, "grad_norm": 0.45703125, "learning_rate": 2.9578811252017258e-05, "loss": 2.1877, "step": 5860 }, { "epoch": 0.12, "grad_norm": 0.447265625, "learning_rate": 2.9576381141115105e-05, "loss": 2.1492, "step": 5870 }, { "epoch": 0.12, "grad_norm": 0.4453125, "learning_rate": 2.957394414032625e-05, "loss": 2.1479, "step": 5880 }, { "epoch": 0.12, "grad_norm": 0.4453125, "learning_rate": 2.9571500250802596e-05, "loss": 2.1611, "step": 5890 }, { "epoch": 0.12, "grad_norm": 0.44140625, "learning_rate": 2.956904947369931e-05, "loss": 2.1381, "step": 5900 }, { "epoch": 0.12, "grad_norm": 0.439453125, "learning_rate": 2.9566591810174815e-05, "loss": 2.1702, "step": 5910 }, { "epoch": 0.12, "grad_norm": 0.455078125, "learning_rate": 2.956412726139078e-05, "loss": 2.1371, "step": 5920 }, { "epoch": 0.12, "grad_norm": 0.451171875, "learning_rate": 2.956165582851214e-05, "loss": 2.1248, "step": 5930 }, { "epoch": 0.12, "grad_norm": 0.421875, "learning_rate": 2.9559177512707078e-05, "loss": 2.1574, "step": 5940 }, { "epoch": 0.12, "grad_norm": 0.443359375, "learning_rate": 2.9556692315147034e-05, "loss": 2.1891, "step": 5950 }, { "epoch": 0.12, "grad_norm": 0.59765625, "learning_rate": 2.955420023700669e-05, "loss": 2.1647, "step": 5960 }, { "epoch": 0.12, "grad_norm": 0.455078125, "learning_rate": 2.9551701279463992e-05, "loss": 2.1345, "step": 5970 }, { "epoch": 0.12, "grad_norm": 0.484375, "learning_rate": 2.954919544370014e-05, "loss": 2.1311, "step": 5980 }, { "epoch": 0.12, "grad_norm": 0.427734375, "learning_rate": 2.954668273089957e-05, "loss": 2.1413, "step": 5990 }, { "epoch": 0.12, "grad_norm": 0.419921875, "learning_rate": 2.954416314224998e-05, "loss": 2.1812, "step": 6000 }, { "epoch": 0.12, "eval_accuracy": 0.554531490015361, "eval_loss": 2.016944408416748, "eval_runtime": 16.4992, "eval_samples_per_second": 36.062, "eval_steps_per_second": 1.152, "step": 6000 }, { "epoch": 0.12, "grad_norm": 0.453125, "learning_rate": 2.9541636678942322e-05, "loss": 2.1293, "step": 6010 }, { "epoch": 0.13, "grad_norm": 0.451171875, "learning_rate": 2.9539103342170785e-05, "loss": 2.1938, "step": 6020 }, { "epoch": 0.13, "grad_norm": 0.431640625, "learning_rate": 2.9536563133132817e-05, "loss": 2.1605, "step": 6030 }, { "epoch": 0.13, "grad_norm": 0.44140625, "learning_rate": 2.9534016053029107e-05, "loss": 2.1362, "step": 6040 }, { "epoch": 0.13, "grad_norm": 0.453125, "learning_rate": 2.95314621030636e-05, "loss": 2.1439, "step": 6050 }, { "epoch": 0.13, "grad_norm": 0.427734375, "learning_rate": 2.9528901284443476e-05, "loss": 2.1792, "step": 6060 }, { "epoch": 0.13, "grad_norm": 0.4375, "learning_rate": 2.9526333598379177e-05, "loss": 2.1395, "step": 6070 }, { "epoch": 0.13, "grad_norm": 0.44140625, "learning_rate": 2.952375904608438e-05, "loss": 2.1469, "step": 6080 }, { "epoch": 0.13, "grad_norm": 0.53125, "learning_rate": 2.952117762877601e-05, "loss": 2.1316, "step": 6090 }, { "epoch": 0.13, "grad_norm": 0.43359375, "learning_rate": 2.9518589347674243e-05, "loss": 2.1183, "step": 6100 }, { "epoch": 0.13, "grad_norm": 0.45703125, "learning_rate": 2.9515994204002485e-05, "loss": 2.1296, "step": 6110 }, { "epoch": 0.13, "grad_norm": 0.435546875, "learning_rate": 2.9513392198987398e-05, "loss": 2.1385, "step": 6120 }, { "epoch": 0.13, "grad_norm": 0.486328125, "learning_rate": 2.9510783333858885e-05, "loss": 2.1327, "step": 6130 }, { "epoch": 0.13, "grad_norm": 0.431640625, "learning_rate": 2.9508167609850098e-05, "loss": 2.16, "step": 6140 }, { "epoch": 0.13, "grad_norm": 0.48046875, "learning_rate": 2.950554502819741e-05, "loss": 2.1903, "step": 6150 }, { "epoch": 0.13, "grad_norm": 0.4375, "learning_rate": 2.950291559014046e-05, "loss": 2.1592, "step": 6160 }, { "epoch": 0.13, "grad_norm": 0.43359375, "learning_rate": 2.950027929692211e-05, "loss": 2.1423, "step": 6170 }, { "epoch": 0.13, "grad_norm": 0.4375, "learning_rate": 2.9497636149788468e-05, "loss": 2.156, "step": 6180 }, { "epoch": 0.13, "grad_norm": 0.447265625, "learning_rate": 2.949498614998889e-05, "loss": 2.1443, "step": 6190 }, { "epoch": 0.13, "grad_norm": 0.44921875, "learning_rate": 2.9492329298775963e-05, "loss": 2.1783, "step": 6200 }, { "epoch": 0.13, "grad_norm": 0.45703125, "learning_rate": 2.9489665597405508e-05, "loss": 2.1605, "step": 6210 }, { "epoch": 0.13, "grad_norm": 0.435546875, "learning_rate": 2.948699504713659e-05, "loss": 2.1638, "step": 6220 }, { "epoch": 0.13, "grad_norm": 0.455078125, "learning_rate": 2.9484317649231514e-05, "loss": 2.1552, "step": 6230 }, { "epoch": 0.13, "grad_norm": 0.462890625, "learning_rate": 2.9481633404955816e-05, "loss": 2.1483, "step": 6240 }, { "epoch": 0.13, "grad_norm": 0.4453125, "learning_rate": 2.9478942315578274e-05, "loss": 2.1713, "step": 6250 }, { "epoch": 0.13, "grad_norm": 0.451171875, "learning_rate": 2.947624438237089e-05, "loss": 2.1149, "step": 6260 }, { "epoch": 0.13, "grad_norm": 0.4453125, "learning_rate": 2.9473539606608918e-05, "loss": 2.1578, "step": 6270 }, { "epoch": 0.13, "grad_norm": 0.4375, "learning_rate": 2.947082798957083e-05, "loss": 2.156, "step": 6280 }, { "epoch": 0.13, "grad_norm": 0.443359375, "learning_rate": 2.9468109532538346e-05, "loss": 2.1458, "step": 6290 }, { "epoch": 0.13, "grad_norm": 0.462890625, "learning_rate": 2.9465384236796402e-05, "loss": 2.1533, "step": 6300 }, { "epoch": 0.13, "grad_norm": 0.443359375, "learning_rate": 2.9462652103633184e-05, "loss": 2.1582, "step": 6310 }, { "epoch": 0.13, "grad_norm": 0.451171875, "learning_rate": 2.94599131343401e-05, "loss": 2.1627, "step": 6320 }, { "epoch": 0.13, "grad_norm": 0.46484375, "learning_rate": 2.9457167330211793e-05, "loss": 2.152, "step": 6330 }, { "epoch": 0.13, "grad_norm": 0.443359375, "learning_rate": 2.9454414692546137e-05, "loss": 2.1506, "step": 6340 }, { "epoch": 0.13, "grad_norm": 0.44921875, "learning_rate": 2.945165522264422e-05, "loss": 2.1307, "step": 6350 }, { "epoch": 0.13, "grad_norm": 0.453125, "learning_rate": 2.9448888921810394e-05, "loss": 2.1414, "step": 6360 }, { "epoch": 0.13, "grad_norm": 0.412109375, "learning_rate": 2.9446115791352207e-05, "loss": 2.1924, "step": 6370 }, { "epoch": 0.13, "grad_norm": 0.44921875, "learning_rate": 2.9443335832580453e-05, "loss": 2.1812, "step": 6380 }, { "epoch": 0.13, "grad_norm": 0.427734375, "learning_rate": 2.9440549046809146e-05, "loss": 2.1624, "step": 6390 }, { "epoch": 0.13, "grad_norm": 0.42578125, "learning_rate": 2.9437755435355526e-05, "loss": 2.1411, "step": 6400 }, { "epoch": 0.13, "grad_norm": 0.451171875, "learning_rate": 2.943495499954007e-05, "loss": 2.1604, "step": 6410 }, { "epoch": 0.13, "grad_norm": 0.4375, "learning_rate": 2.9432147740686464e-05, "loss": 2.1493, "step": 6420 }, { "epoch": 0.13, "grad_norm": 0.455078125, "learning_rate": 2.9429333660121636e-05, "loss": 2.1437, "step": 6430 }, { "epoch": 0.13, "grad_norm": 0.453125, "learning_rate": 2.9426512759175725e-05, "loss": 2.169, "step": 6440 }, { "epoch": 0.13, "grad_norm": 0.451171875, "learning_rate": 2.9423685039182104e-05, "loss": 2.136, "step": 6450 }, { "epoch": 0.13, "grad_norm": 0.431640625, "learning_rate": 2.9420850501477364e-05, "loss": 2.1452, "step": 6460 }, { "epoch": 0.13, "grad_norm": 0.4765625, "learning_rate": 2.9418009147401317e-05, "loss": 2.1624, "step": 6470 }, { "epoch": 0.13, "grad_norm": 0.4609375, "learning_rate": 2.9415160978297002e-05, "loss": 2.1795, "step": 6480 }, { "epoch": 0.13, "grad_norm": 0.431640625, "learning_rate": 2.941230599551067e-05, "loss": 2.1615, "step": 6490 }, { "epoch": 0.14, "grad_norm": 0.466796875, "learning_rate": 2.9409444200391807e-05, "loss": 2.1526, "step": 6500 }, { "epoch": 0.14, "eval_accuracy": 0.5546777068598701, "eval_loss": 2.014756441116333, "eval_runtime": 16.4559, "eval_samples_per_second": 36.157, "eval_steps_per_second": 1.155, "step": 6500 }, { "epoch": 0.14, "grad_norm": 0.42578125, "learning_rate": 2.940657559429311e-05, "loss": 2.1502, "step": 6510 }, { "epoch": 0.14, "grad_norm": 0.4453125, "learning_rate": 2.9403700178570498e-05, "loss": 2.1632, "step": 6520 }, { "epoch": 0.14, "grad_norm": 0.435546875, "learning_rate": 2.94008179545831e-05, "loss": 2.1565, "step": 6530 }, { "epoch": 0.14, "grad_norm": 0.435546875, "learning_rate": 2.9397928923693273e-05, "loss": 2.1224, "step": 6540 }, { "epoch": 0.14, "grad_norm": 0.423828125, "learning_rate": 2.9395033087266592e-05, "loss": 2.1264, "step": 6550 }, { "epoch": 0.14, "grad_norm": 0.42578125, "learning_rate": 2.9392130446671842e-05, "loss": 2.1165, "step": 6560 }, { "epoch": 0.14, "grad_norm": 0.44921875, "learning_rate": 2.938922100328103e-05, "loss": 2.1481, "step": 6570 }, { "epoch": 0.14, "grad_norm": 0.578125, "learning_rate": 2.9386304758469382e-05, "loss": 2.1674, "step": 6580 }, { "epoch": 0.14, "grad_norm": 0.431640625, "learning_rate": 2.9383381713615323e-05, "loss": 2.1659, "step": 6590 }, { "epoch": 0.14, "grad_norm": 0.490234375, "learning_rate": 2.9380451870100508e-05, "loss": 2.1412, "step": 6600 }, { "epoch": 0.14, "grad_norm": 0.462890625, "learning_rate": 2.9377515229309792e-05, "loss": 2.1342, "step": 6610 }, { "epoch": 0.14, "grad_norm": 0.482421875, "learning_rate": 2.9374571792631266e-05, "loss": 2.132, "step": 6620 }, { "epoch": 0.14, "grad_norm": 0.43359375, "learning_rate": 2.9371621561456205e-05, "loss": 2.1494, "step": 6630 }, { "epoch": 0.14, "grad_norm": 0.439453125, "learning_rate": 2.9368664537179114e-05, "loss": 2.1398, "step": 6640 }, { "epoch": 0.14, "grad_norm": 0.4453125, "learning_rate": 2.9365700721197705e-05, "loss": 2.1788, "step": 6650 }, { "epoch": 0.14, "grad_norm": 0.451171875, "learning_rate": 2.9362730114912892e-05, "loss": 2.1385, "step": 6660 }, { "epoch": 0.14, "grad_norm": 0.4453125, "learning_rate": 2.9359752719728813e-05, "loss": 2.1504, "step": 6670 }, { "epoch": 0.14, "grad_norm": 0.451171875, "learning_rate": 2.9356768537052803e-05, "loss": 2.1316, "step": 6680 }, { "epoch": 0.14, "grad_norm": 0.546875, "learning_rate": 2.935377756829541e-05, "loss": 2.1379, "step": 6690 }, { "epoch": 0.14, "grad_norm": 0.43359375, "learning_rate": 2.9350779814870394e-05, "loss": 2.1564, "step": 6700 }, { "epoch": 0.14, "grad_norm": 0.47265625, "learning_rate": 2.9347775278194712e-05, "loss": 2.1336, "step": 6710 }, { "epoch": 0.14, "grad_norm": 0.51171875, "learning_rate": 2.9344763959688532e-05, "loss": 2.1451, "step": 6720 }, { "epoch": 0.14, "grad_norm": 0.4296875, "learning_rate": 2.9341745860775234e-05, "loss": 2.1514, "step": 6730 }, { "epoch": 0.14, "grad_norm": 0.458984375, "learning_rate": 2.933872098288139e-05, "loss": 2.1128, "step": 6740 }, { "epoch": 0.14, "grad_norm": 0.455078125, "learning_rate": 2.933568932743679e-05, "loss": 2.1736, "step": 6750 }, { "epoch": 0.14, "grad_norm": 0.427734375, "learning_rate": 2.9332650895874413e-05, "loss": 2.1599, "step": 6760 }, { "epoch": 0.14, "grad_norm": 0.435546875, "learning_rate": 2.932960568963046e-05, "loss": 2.1582, "step": 6770 }, { "epoch": 0.14, "grad_norm": 0.4375, "learning_rate": 2.932655371014431e-05, "loss": 2.1563, "step": 6780 }, { "epoch": 0.14, "grad_norm": 0.447265625, "learning_rate": 2.932349495885856e-05, "loss": 2.1311, "step": 6790 }, { "epoch": 0.14, "grad_norm": 0.435546875, "learning_rate": 2.9320429437219014e-05, "loss": 2.1547, "step": 6800 }, { "epoch": 0.14, "grad_norm": 0.439453125, "learning_rate": 2.931735714667466e-05, "loss": 2.1565, "step": 6810 }, { "epoch": 0.14, "grad_norm": 0.443359375, "learning_rate": 2.9314278088677686e-05, "loss": 2.1244, "step": 6820 }, { "epoch": 0.14, "grad_norm": 0.447265625, "learning_rate": 2.931119226468349e-05, "loss": 2.1797, "step": 6830 }, { "epoch": 0.14, "grad_norm": 0.44921875, "learning_rate": 2.9308099676150666e-05, "loss": 2.1404, "step": 6840 }, { "epoch": 0.14, "grad_norm": 0.474609375, "learning_rate": 2.9305000324540997e-05, "loss": 2.1614, "step": 6850 }, { "epoch": 0.14, "grad_norm": 0.421875, "learning_rate": 2.9301894211319475e-05, "loss": 2.1416, "step": 6860 }, { "epoch": 0.14, "grad_norm": 0.455078125, "learning_rate": 2.9298781337954277e-05, "loss": 2.1494, "step": 6870 }, { "epoch": 0.14, "grad_norm": 0.43359375, "learning_rate": 2.929566170591678e-05, "loss": 2.1702, "step": 6880 }, { "epoch": 0.14, "grad_norm": 0.447265625, "learning_rate": 2.929253531668155e-05, "loss": 2.172, "step": 6890 }, { "epoch": 0.14, "grad_norm": 0.435546875, "learning_rate": 2.9289402171726362e-05, "loss": 2.1513, "step": 6900 }, { "epoch": 0.14, "grad_norm": 0.4375, "learning_rate": 2.9286262272532164e-05, "loss": 2.1364, "step": 6910 }, { "epoch": 0.14, "grad_norm": 0.85546875, "learning_rate": 2.9283115620583116e-05, "loss": 2.1401, "step": 6920 }, { "epoch": 0.14, "grad_norm": 0.4296875, "learning_rate": 2.9279962217366557e-05, "loss": 2.1598, "step": 6930 }, { "epoch": 0.14, "grad_norm": 0.443359375, "learning_rate": 2.927680206437302e-05, "loss": 2.1284, "step": 6940 }, { "epoch": 0.14, "grad_norm": 0.4296875, "learning_rate": 2.927363516309623e-05, "loss": 2.1634, "step": 6950 }, { "epoch": 0.14, "grad_norm": 0.51171875, "learning_rate": 2.92704615150331e-05, "loss": 2.1751, "step": 6960 }, { "epoch": 0.14, "grad_norm": 0.453125, "learning_rate": 2.9267281121683738e-05, "loss": 2.1339, "step": 6970 }, { "epoch": 0.15, "grad_norm": 0.4453125, "learning_rate": 2.926409398455143e-05, "loss": 2.1391, "step": 6980 }, { "epoch": 0.15, "grad_norm": 0.42578125, "learning_rate": 2.9260900105142657e-05, "loss": 2.1198, "step": 6990 }, { "epoch": 0.15, "grad_norm": 0.443359375, "learning_rate": 2.9257699484967084e-05, "loss": 2.155, "step": 7000 }, { "epoch": 0.15, "eval_accuracy": 0.5553512900761478, "eval_loss": 2.013082265853882, "eval_runtime": 16.4276, "eval_samples_per_second": 36.22, "eval_steps_per_second": 1.157, "step": 7000 }, { "epoch": 0.15, "grad_norm": 0.4609375, "learning_rate": 2.9254492125537565e-05, "loss": 2.127, "step": 7010 }, { "epoch": 0.15, "grad_norm": 0.419921875, "learning_rate": 2.9251278028370137e-05, "loss": 2.1111, "step": 7020 }, { "epoch": 0.15, "grad_norm": 0.458984375, "learning_rate": 2.9248057194984024e-05, "loss": 2.1568, "step": 7030 }, { "epoch": 0.15, "grad_norm": 0.447265625, "learning_rate": 2.924482962690163e-05, "loss": 2.1332, "step": 7040 }, { "epoch": 0.15, "grad_norm": 0.435546875, "learning_rate": 2.924159532564855e-05, "loss": 2.1805, "step": 7050 }, { "epoch": 0.15, "grad_norm": 0.44921875, "learning_rate": 2.9238354292753545e-05, "loss": 2.1403, "step": 7060 }, { "epoch": 0.15, "grad_norm": 0.453125, "learning_rate": 2.923510652974858e-05, "loss": 2.1518, "step": 7070 }, { "epoch": 0.15, "grad_norm": 0.48828125, "learning_rate": 2.9231852038168784e-05, "loss": 2.1383, "step": 7080 }, { "epoch": 0.15, "grad_norm": 0.41796875, "learning_rate": 2.922859081955248e-05, "loss": 2.1245, "step": 7090 }, { "epoch": 0.15, "grad_norm": 0.421875, "learning_rate": 2.9225322875441158e-05, "loss": 2.1309, "step": 7100 }, { "epoch": 0.15, "grad_norm": 0.431640625, "learning_rate": 2.9222048207379494e-05, "loss": 2.1364, "step": 7110 }, { "epoch": 0.15, "grad_norm": 0.443359375, "learning_rate": 2.921876681691534e-05, "loss": 2.1302, "step": 7120 }, { "epoch": 0.15, "grad_norm": 0.435546875, "learning_rate": 2.921547870559973e-05, "loss": 2.1439, "step": 7130 }, { "epoch": 0.15, "grad_norm": 0.44140625, "learning_rate": 2.9212183874986866e-05, "loss": 2.1611, "step": 7140 }, { "epoch": 0.15, "grad_norm": 0.44140625, "learning_rate": 2.920888232663413e-05, "loss": 2.184, "step": 7150 }, { "epoch": 0.15, "grad_norm": 0.458984375, "learning_rate": 2.920557406210209e-05, "loss": 2.1542, "step": 7160 }, { "epoch": 0.15, "grad_norm": 0.474609375, "learning_rate": 2.9202259082954464e-05, "loss": 2.1725, "step": 7170 }, { "epoch": 0.15, "grad_norm": 0.474609375, "learning_rate": 2.9198937390758176e-05, "loss": 2.1364, "step": 7180 }, { "epoch": 0.15, "grad_norm": 0.423828125, "learning_rate": 2.919560898708329e-05, "loss": 2.1639, "step": 7190 }, { "epoch": 0.15, "grad_norm": 0.439453125, "learning_rate": 2.919227387350307e-05, "loss": 2.1327, "step": 7200 }, { "epoch": 0.15, "grad_norm": 0.453125, "learning_rate": 2.9188932051593934e-05, "loss": 2.1416, "step": 7210 }, { "epoch": 0.15, "grad_norm": 0.4375, "learning_rate": 2.9185583522935476e-05, "loss": 2.1131, "step": 7220 }, { "epoch": 0.15, "grad_norm": 0.451171875, "learning_rate": 2.9182228289110468e-05, "loss": 2.1464, "step": 7230 }, { "epoch": 0.15, "grad_norm": 0.5, "learning_rate": 2.9178866351704834e-05, "loss": 2.1135, "step": 7240 }, { "epoch": 0.15, "grad_norm": 0.421875, "learning_rate": 2.917549771230768e-05, "loss": 2.1434, "step": 7250 }, { "epoch": 0.15, "grad_norm": 0.451171875, "learning_rate": 2.9172122372511282e-05, "loss": 2.1705, "step": 7260 }, { "epoch": 0.15, "grad_norm": 0.578125, "learning_rate": 2.9168740333911074e-05, "loss": 2.1587, "step": 7270 }, { "epoch": 0.15, "grad_norm": 0.50390625, "learning_rate": 2.9165351598105657e-05, "loss": 2.1584, "step": 7280 }, { "epoch": 0.15, "grad_norm": 0.42578125, "learning_rate": 2.9161956166696805e-05, "loss": 2.1481, "step": 7290 }, { "epoch": 0.15, "grad_norm": 0.474609375, "learning_rate": 2.9158554041289454e-05, "loss": 2.144, "step": 7300 }, { "epoch": 0.15, "grad_norm": 0.451171875, "learning_rate": 2.91551452234917e-05, "loss": 2.1433, "step": 7310 }, { "epoch": 0.15, "grad_norm": 0.5078125, "learning_rate": 2.9151729714914805e-05, "loss": 2.1611, "step": 7320 }, { "epoch": 0.15, "grad_norm": 0.466796875, "learning_rate": 2.914830751717319e-05, "loss": 2.1217, "step": 7330 }, { "epoch": 0.15, "grad_norm": 0.5078125, "learning_rate": 2.914487863188445e-05, "loss": 2.123, "step": 7340 }, { "epoch": 0.15, "grad_norm": 0.48828125, "learning_rate": 2.914144306066932e-05, "loss": 2.1235, "step": 7350 }, { "epoch": 0.15, "grad_norm": 0.4765625, "learning_rate": 2.913800080515172e-05, "loss": 2.1306, "step": 7360 }, { "epoch": 0.15, "grad_norm": 0.45703125, "learning_rate": 2.9134551866958713e-05, "loss": 2.1256, "step": 7370 }, { "epoch": 0.15, "grad_norm": 0.43359375, "learning_rate": 2.9131096247720524e-05, "loss": 2.1392, "step": 7380 }, { "epoch": 0.15, "grad_norm": 0.44140625, "learning_rate": 2.9127633949070536e-05, "loss": 2.1704, "step": 7390 }, { "epoch": 0.15, "grad_norm": 0.45703125, "learning_rate": 2.912416497264529e-05, "loss": 2.1692, "step": 7400 }, { "epoch": 0.15, "grad_norm": 0.41796875, "learning_rate": 2.912068932008448e-05, "loss": 2.1582, "step": 7410 }, { "epoch": 0.15, "grad_norm": 0.451171875, "learning_rate": 2.9117206993030967e-05, "loss": 2.1192, "step": 7420 }, { "epoch": 0.15, "grad_norm": 0.4609375, "learning_rate": 2.9113717993130754e-05, "loss": 2.1469, "step": 7430 }, { "epoch": 0.15, "grad_norm": 0.5234375, "learning_rate": 2.911022232203301e-05, "loss": 2.1331, "step": 7440 }, { "epoch": 0.15, "grad_norm": 0.453125, "learning_rate": 2.9106719981390037e-05, "loss": 2.1411, "step": 7450 }, { "epoch": 0.16, "grad_norm": 0.458984375, "learning_rate": 2.910321097285731e-05, "loss": 2.1278, "step": 7460 }, { "epoch": 0.16, "grad_norm": 0.48828125, "learning_rate": 2.9099695298093454e-05, "loss": 2.1665, "step": 7470 }, { "epoch": 0.16, "grad_norm": 0.474609375, "learning_rate": 2.909617295876023e-05, "loss": 2.1481, "step": 7480 }, { "epoch": 0.16, "grad_norm": 0.453125, "learning_rate": 2.9092643956522568e-05, "loss": 2.1107, "step": 7490 }, { "epoch": 0.16, "grad_norm": 0.412109375, "learning_rate": 2.908910829304853e-05, "loss": 2.1594, "step": 7500 }, { "epoch": 0.16, "eval_accuracy": 0.5557669402071679, "eval_loss": 2.011016845703125, "eval_runtime": 16.4416, "eval_samples_per_second": 36.189, "eval_steps_per_second": 1.156, "step": 7500 }, { "epoch": 0.16, "grad_norm": 0.447265625, "learning_rate": 2.9085565970009337e-05, "loss": 2.1623, "step": 7510 }, { "epoch": 0.16, "grad_norm": 0.44140625, "learning_rate": 2.9082016989079357e-05, "loss": 2.1686, "step": 7520 }, { "epoch": 0.16, "grad_norm": 0.46875, "learning_rate": 2.9078461351936104e-05, "loss": 2.1762, "step": 7530 }, { "epoch": 0.16, "grad_norm": 0.4375, "learning_rate": 2.9074899060260237e-05, "loss": 2.1224, "step": 7540 }, { "epoch": 0.16, "grad_norm": 0.490234375, "learning_rate": 2.9071330115735556e-05, "loss": 2.1145, "step": 7550 }, { "epoch": 0.16, "grad_norm": 0.458984375, "learning_rate": 2.906775452004902e-05, "loss": 2.1215, "step": 7560 }, { "epoch": 0.16, "grad_norm": 0.419921875, "learning_rate": 2.9064172274890718e-05, "loss": 2.1391, "step": 7570 }, { "epoch": 0.16, "grad_norm": 0.72265625, "learning_rate": 2.906058338195389e-05, "loss": 2.1384, "step": 7580 }, { "epoch": 0.16, "grad_norm": 0.46875, "learning_rate": 2.9056987842934908e-05, "loss": 2.1288, "step": 7590 }, { "epoch": 0.16, "grad_norm": 0.45703125, "learning_rate": 2.9053385659533295e-05, "loss": 2.1429, "step": 7600 }, { "epoch": 0.16, "grad_norm": 0.54296875, "learning_rate": 2.9049776833451718e-05, "loss": 2.1199, "step": 7610 }, { "epoch": 0.16, "grad_norm": 0.4921875, "learning_rate": 2.9046161366395966e-05, "loss": 2.1551, "step": 7620 }, { "epoch": 0.16, "grad_norm": 0.466796875, "learning_rate": 2.9042539260074994e-05, "loss": 2.1427, "step": 7630 }, { "epoch": 0.16, "grad_norm": 0.470703125, "learning_rate": 2.9038910516200867e-05, "loss": 2.153, "step": 7640 }, { "epoch": 0.16, "grad_norm": 0.435546875, "learning_rate": 2.9035275136488807e-05, "loss": 2.1384, "step": 7650 }, { "epoch": 0.16, "grad_norm": 0.470703125, "learning_rate": 2.9031633122657163e-05, "loss": 2.1234, "step": 7660 }, { "epoch": 0.16, "grad_norm": 0.455078125, "learning_rate": 2.9027984476427427e-05, "loss": 2.1772, "step": 7670 }, { "epoch": 0.16, "grad_norm": 0.435546875, "learning_rate": 2.9024329199524217e-05, "loss": 2.1686, "step": 7680 }, { "epoch": 0.16, "grad_norm": 0.48828125, "learning_rate": 2.9020667293675292e-05, "loss": 2.1469, "step": 7690 }, { "epoch": 0.16, "grad_norm": 0.50390625, "learning_rate": 2.9016998760611542e-05, "loss": 2.1321, "step": 7700 }, { "epoch": 0.16, "grad_norm": 0.474609375, "learning_rate": 2.9013323602066992e-05, "loss": 2.139, "step": 7710 }, { "epoch": 0.16, "grad_norm": 0.4453125, "learning_rate": 2.9009641819778795e-05, "loss": 2.1379, "step": 7720 }, { "epoch": 0.16, "grad_norm": 0.419921875, "learning_rate": 2.9005953415487233e-05, "loss": 2.1558, "step": 7730 }, { "epoch": 0.16, "grad_norm": 0.45703125, "learning_rate": 2.9002258390935724e-05, "loss": 2.1326, "step": 7740 }, { "epoch": 0.16, "grad_norm": 0.447265625, "learning_rate": 2.8998556747870817e-05, "loss": 2.1088, "step": 7750 }, { "epoch": 0.16, "grad_norm": 0.416015625, "learning_rate": 2.8994848488042177e-05, "loss": 2.1503, "step": 7760 }, { "epoch": 0.16, "grad_norm": 0.466796875, "learning_rate": 2.8991133613202615e-05, "loss": 2.1422, "step": 7770 }, { "epoch": 0.16, "grad_norm": 0.470703125, "learning_rate": 2.8987412125108046e-05, "loss": 2.1498, "step": 7780 }, { "epoch": 0.16, "grad_norm": 0.431640625, "learning_rate": 2.898368402551753e-05, "loss": 2.1369, "step": 7790 }, { "epoch": 0.16, "grad_norm": 0.4375, "learning_rate": 2.897994931619325e-05, "loss": 2.1385, "step": 7800 }, { "epoch": 0.16, "grad_norm": 0.546875, "learning_rate": 2.8976207998900498e-05, "loss": 2.1686, "step": 7810 }, { "epoch": 0.16, "grad_norm": 0.478515625, "learning_rate": 2.8972460075407706e-05, "loss": 2.154, "step": 7820 }, { "epoch": 0.16, "grad_norm": 0.470703125, "learning_rate": 2.8968705547486423e-05, "loss": 2.1063, "step": 7830 }, { "epoch": 0.16, "grad_norm": 0.484375, "learning_rate": 2.8964944416911316e-05, "loss": 2.1428, "step": 7840 }, { "epoch": 0.16, "grad_norm": 0.455078125, "learning_rate": 2.8961176685460184e-05, "loss": 2.1558, "step": 7850 }, { "epoch": 0.16, "grad_norm": 0.46875, "learning_rate": 2.8957402354913927e-05, "loss": 2.1536, "step": 7860 }, { "epoch": 0.16, "grad_norm": 0.486328125, "learning_rate": 2.8953621427056588e-05, "loss": 2.1429, "step": 7870 }, { "epoch": 0.16, "grad_norm": 0.455078125, "learning_rate": 2.894983390367531e-05, "loss": 2.1456, "step": 7880 }, { "epoch": 0.16, "grad_norm": 0.5546875, "learning_rate": 2.8946039786560355e-05, "loss": 2.126, "step": 7890 }, { "epoch": 0.16, "grad_norm": 0.42578125, "learning_rate": 2.894223907750511e-05, "loss": 2.1207, "step": 7900 }, { "epoch": 0.16, "grad_norm": 0.44140625, "learning_rate": 2.8938431778306082e-05, "loss": 2.1407, "step": 7910 }, { "epoch": 0.16, "grad_norm": 0.419921875, "learning_rate": 2.893461789076287e-05, "loss": 2.1534, "step": 7920 }, { "epoch": 0.16, "grad_norm": 0.50390625, "learning_rate": 2.893079741667821e-05, "loss": 2.1167, "step": 7930 }, { "epoch": 0.17, "grad_norm": 0.43359375, "learning_rate": 2.8926970357857946e-05, "loss": 2.1525, "step": 7940 }, { "epoch": 0.17, "grad_norm": 0.49609375, "learning_rate": 2.8923136716111028e-05, "loss": 2.1571, "step": 7950 }, { "epoch": 0.17, "grad_norm": 0.47265625, "learning_rate": 2.8919296493249527e-05, "loss": 2.1512, "step": 7960 }, { "epoch": 0.17, "grad_norm": 0.435546875, "learning_rate": 2.8915449691088607e-05, "loss": 2.1659, "step": 7970 }, { "epoch": 0.17, "grad_norm": 0.474609375, "learning_rate": 2.891159631144657e-05, "loss": 2.1307, "step": 7980 }, { "epoch": 0.17, "grad_norm": 0.44921875, "learning_rate": 2.8907736356144794e-05, "loss": 2.094, "step": 7990 }, { "epoch": 0.17, "grad_norm": 0.61328125, "learning_rate": 2.8903869827007796e-05, "loss": 2.1681, "step": 8000 }, { "epoch": 0.17, "eval_accuracy": 0.5559098712798902, "eval_loss": 2.0096688270568848, "eval_runtime": 16.4393, "eval_samples_per_second": 36.194, "eval_steps_per_second": 1.156, "step": 8000 }, { "epoch": 0.17, "grad_norm": 0.4609375, "learning_rate": 2.889999672586318e-05, "loss": 2.1198, "step": 8010 }, { "epoch": 0.17, "grad_norm": 0.44921875, "learning_rate": 2.8896117054541667e-05, "loss": 2.1312, "step": 8020 }, { "epoch": 0.17, "grad_norm": 0.447265625, "learning_rate": 2.889223081487708e-05, "loss": 2.162, "step": 8030 }, { "epoch": 0.17, "grad_norm": 0.5234375, "learning_rate": 2.888833800870634e-05, "loss": 2.1709, "step": 8040 }, { "epoch": 0.17, "grad_norm": 0.50390625, "learning_rate": 2.8884438637869482e-05, "loss": 2.1311, "step": 8050 }, { "epoch": 0.17, "grad_norm": 0.47265625, "learning_rate": 2.8880532704209636e-05, "loss": 2.1262, "step": 8060 }, { "epoch": 0.17, "grad_norm": 0.455078125, "learning_rate": 2.8876620209573046e-05, "loss": 2.136, "step": 8070 }, { "epoch": 0.17, "grad_norm": 0.78125, "learning_rate": 2.8872701155809046e-05, "loss": 2.1387, "step": 8080 }, { "epoch": 0.17, "grad_norm": 0.43359375, "learning_rate": 2.886877554477007e-05, "loss": 2.1656, "step": 8090 }, { "epoch": 0.17, "grad_norm": 0.431640625, "learning_rate": 2.8864843378311653e-05, "loss": 2.1209, "step": 8100 }, { "epoch": 0.17, "grad_norm": 0.4296875, "learning_rate": 2.8860904658292446e-05, "loss": 2.1206, "step": 8110 }, { "epoch": 0.17, "grad_norm": 0.431640625, "learning_rate": 2.885695938657416e-05, "loss": 2.1264, "step": 8120 }, { "epoch": 0.17, "grad_norm": 0.4609375, "learning_rate": 2.8853007565021644e-05, "loss": 2.1118, "step": 8130 }, { "epoch": 0.17, "grad_norm": 0.45703125, "learning_rate": 2.8849049195502812e-05, "loss": 2.1673, "step": 8140 }, { "epoch": 0.17, "grad_norm": 0.41796875, "learning_rate": 2.8845084279888694e-05, "loss": 2.1242, "step": 8150 }, { "epoch": 0.17, "grad_norm": 0.46875, "learning_rate": 2.8841112820053396e-05, "loss": 2.1227, "step": 8160 }, { "epoch": 0.17, "grad_norm": 0.443359375, "learning_rate": 2.8837134817874138e-05, "loss": 2.1518, "step": 8170 }, { "epoch": 0.17, "grad_norm": 0.455078125, "learning_rate": 2.883315027523121e-05, "loss": 2.1387, "step": 8180 }, { "epoch": 0.17, "grad_norm": 0.44140625, "learning_rate": 2.8829159194008014e-05, "loss": 2.1144, "step": 8190 }, { "epoch": 0.17, "grad_norm": 0.47265625, "learning_rate": 2.8825161576091024e-05, "loss": 2.1354, "step": 8200 }, { "epoch": 0.17, "grad_norm": 0.46484375, "learning_rate": 2.882115742336982e-05, "loss": 2.1517, "step": 8210 }, { "epoch": 0.17, "grad_norm": 0.4453125, "learning_rate": 2.881714673773706e-05, "loss": 2.1294, "step": 8220 }, { "epoch": 0.17, "grad_norm": 0.466796875, "learning_rate": 2.8813129521088492e-05, "loss": 2.1437, "step": 8230 }, { "epoch": 0.17, "grad_norm": 0.447265625, "learning_rate": 2.880910577532296e-05, "loss": 2.1625, "step": 8240 }, { "epoch": 0.17, "grad_norm": 0.46484375, "learning_rate": 2.8805075502342377e-05, "loss": 2.1185, "step": 8250 }, { "epoch": 0.17, "grad_norm": 0.435546875, "learning_rate": 2.8801038704051765e-05, "loss": 2.1205, "step": 8260 }, { "epoch": 0.17, "grad_norm": 0.431640625, "learning_rate": 2.87969953823592e-05, "loss": 2.1658, "step": 8270 }, { "epoch": 0.17, "grad_norm": 0.44140625, "learning_rate": 2.879294553917587e-05, "loss": 2.1598, "step": 8280 }, { "epoch": 0.17, "grad_norm": 0.470703125, "learning_rate": 2.8788889176416026e-05, "loss": 2.1324, "step": 8290 }, { "epoch": 0.17, "grad_norm": 0.45703125, "learning_rate": 2.8784826295997012e-05, "loss": 2.1282, "step": 8300 }, { "epoch": 0.17, "grad_norm": 0.4765625, "learning_rate": 2.878075689983925e-05, "loss": 2.157, "step": 8310 }, { "epoch": 0.17, "grad_norm": 0.45703125, "learning_rate": 2.877668098986624e-05, "loss": 2.174, "step": 8320 }, { "epoch": 0.17, "grad_norm": 0.435546875, "learning_rate": 2.8772598568004557e-05, "loss": 2.1525, "step": 8330 }, { "epoch": 0.17, "grad_norm": 0.474609375, "learning_rate": 2.8768509636183863e-05, "loss": 2.1448, "step": 8340 }, { "epoch": 0.17, "grad_norm": 0.5390625, "learning_rate": 2.876441419633689e-05, "loss": 2.1316, "step": 8350 }, { "epoch": 0.17, "grad_norm": 0.43359375, "learning_rate": 2.8760312250399456e-05, "loss": 2.1271, "step": 8360 }, { "epoch": 0.17, "grad_norm": 0.4375, "learning_rate": 2.8756203800310433e-05, "loss": 2.1536, "step": 8370 }, { "epoch": 0.17, "grad_norm": 0.439453125, "learning_rate": 2.875208884801179e-05, "loss": 2.1218, "step": 8380 }, { "epoch": 0.17, "grad_norm": 0.427734375, "learning_rate": 2.874796739544857e-05, "loss": 2.1294, "step": 8390 }, { "epoch": 0.17, "grad_norm": 0.451171875, "learning_rate": 2.8743839444568865e-05, "loss": 2.141, "step": 8400 }, { "epoch": 0.17, "grad_norm": 0.484375, "learning_rate": 2.873970499732386e-05, "loss": 2.1562, "step": 8410 }, { "epoch": 0.18, "grad_norm": 0.515625, "learning_rate": 2.87355640556678e-05, "loss": 2.1623, "step": 8420 }, { "epoch": 0.18, "grad_norm": 0.451171875, "learning_rate": 2.8731416621558006e-05, "loss": 2.1554, "step": 8430 }, { "epoch": 0.18, "grad_norm": 0.4921875, "learning_rate": 2.8727262696954865e-05, "loss": 2.1186, "step": 8440 }, { "epoch": 0.18, "grad_norm": 0.44921875, "learning_rate": 2.8723102283821826e-05, "loss": 2.1369, "step": 8450 }, { "epoch": 0.18, "grad_norm": 0.453125, "learning_rate": 2.8718935384125423e-05, "loss": 2.1446, "step": 8460 }, { "epoch": 0.18, "grad_norm": 0.466796875, "learning_rate": 2.8714761999835236e-05, "loss": 2.1343, "step": 8470 }, { "epoch": 0.18, "grad_norm": 0.53125, "learning_rate": 2.871058213292392e-05, "loss": 2.1591, "step": 8480 }, { "epoch": 0.18, "grad_norm": 0.4296875, "learning_rate": 2.870639578536719e-05, "loss": 2.1227, "step": 8490 }, { "epoch": 0.18, "grad_norm": 0.50390625, "learning_rate": 2.8702202959143833e-05, "loss": 2.1572, "step": 8500 }, { "epoch": 0.18, "eval_accuracy": 0.5561628757074677, "eval_loss": 2.008275270462036, "eval_runtime": 16.4042, "eval_samples_per_second": 36.271, "eval_steps_per_second": 1.158, "step": 8500 }, { "epoch": 0.18, "grad_norm": 0.439453125, "learning_rate": 2.8698003656235686e-05, "loss": 2.0905, "step": 8510 }, { "epoch": 0.18, "grad_norm": 0.466796875, "learning_rate": 2.8693797878627654e-05, "loss": 2.0992, "step": 8520 }, { "epoch": 0.18, "grad_norm": 0.42578125, "learning_rate": 2.86895856283077e-05, "loss": 2.1138, "step": 8530 }, { "epoch": 0.18, "grad_norm": 0.427734375, "learning_rate": 2.8685366907266855e-05, "loss": 2.1292, "step": 8540 }, { "epoch": 0.18, "grad_norm": 0.453125, "learning_rate": 2.8681141717499198e-05, "loss": 2.148, "step": 8550 }, { "epoch": 0.18, "grad_norm": 0.431640625, "learning_rate": 2.867691006100187e-05, "loss": 2.1512, "step": 8560 }, { "epoch": 0.18, "grad_norm": 0.4921875, "learning_rate": 2.867267193977506e-05, "loss": 2.1567, "step": 8570 }, { "epoch": 0.18, "grad_norm": 0.490234375, "learning_rate": 2.8668427355822036e-05, "loss": 2.1394, "step": 8580 }, { "epoch": 0.18, "grad_norm": 0.466796875, "learning_rate": 2.8664176311149097e-05, "loss": 2.1065, "step": 8590 }, { "epoch": 0.18, "grad_norm": 0.44140625, "learning_rate": 2.86599188077656e-05, "loss": 2.1543, "step": 8600 }, { "epoch": 0.18, "grad_norm": 0.478515625, "learning_rate": 2.8655654847683965e-05, "loss": 2.1116, "step": 8610 }, { "epoch": 0.18, "grad_norm": 0.4453125, "learning_rate": 2.8651384432919655e-05, "loss": 2.1389, "step": 8620 }, { "epoch": 0.18, "grad_norm": 0.41796875, "learning_rate": 2.864710756549119e-05, "loss": 2.1154, "step": 8630 }, { "epoch": 0.18, "grad_norm": 0.431640625, "learning_rate": 2.8642824247420142e-05, "loss": 2.1443, "step": 8640 }, { "epoch": 0.18, "grad_norm": 0.466796875, "learning_rate": 2.863853448073111e-05, "loss": 2.1235, "step": 8650 }, { "epoch": 0.18, "grad_norm": 0.478515625, "learning_rate": 2.8634238267451776e-05, "loss": 2.1575, "step": 8660 }, { "epoch": 0.18, "grad_norm": 0.4453125, "learning_rate": 2.8629935609612842e-05, "loss": 2.1711, "step": 8670 }, { "epoch": 0.18, "grad_norm": 0.455078125, "learning_rate": 2.8625626509248073e-05, "loss": 2.1539, "step": 8680 }, { "epoch": 0.18, "grad_norm": 0.447265625, "learning_rate": 2.862131096839426e-05, "loss": 2.1201, "step": 8690 }, { "epoch": 0.18, "grad_norm": 0.451171875, "learning_rate": 2.8616988989091263e-05, "loss": 2.1376, "step": 8700 }, { "epoch": 0.18, "grad_norm": 0.4375, "learning_rate": 2.8612660573381964e-05, "loss": 2.141, "step": 8710 }, { "epoch": 0.18, "grad_norm": 0.423828125, "learning_rate": 2.8608325723312296e-05, "loss": 2.1141, "step": 8720 }, { "epoch": 0.18, "grad_norm": 0.482421875, "learning_rate": 2.860398444093124e-05, "loss": 2.1285, "step": 8730 }, { "epoch": 0.18, "grad_norm": 0.431640625, "learning_rate": 2.85996367282908e-05, "loss": 2.132, "step": 8740 }, { "epoch": 0.18, "grad_norm": 0.44140625, "learning_rate": 2.8595282587446034e-05, "loss": 2.1416, "step": 8750 }, { "epoch": 0.18, "grad_norm": 0.439453125, "learning_rate": 2.859092202045504e-05, "loss": 2.1107, "step": 8760 }, { "epoch": 0.18, "grad_norm": 0.443359375, "learning_rate": 2.858655502937894e-05, "loss": 2.1308, "step": 8770 }, { "epoch": 0.18, "grad_norm": 0.439453125, "learning_rate": 2.8582181616281904e-05, "loss": 2.1294, "step": 8780 }, { "epoch": 0.18, "grad_norm": 0.458984375, "learning_rate": 2.857780178323113e-05, "loss": 2.1616, "step": 8790 }, { "epoch": 0.18, "grad_norm": 0.44921875, "learning_rate": 2.8573415532296864e-05, "loss": 2.1197, "step": 8800 }, { "epoch": 0.18, "grad_norm": 0.42578125, "learning_rate": 2.8569022865552368e-05, "loss": 2.1422, "step": 8810 }, { "epoch": 0.18, "grad_norm": 0.439453125, "learning_rate": 2.856462378507394e-05, "loss": 2.1022, "step": 8820 }, { "epoch": 0.18, "grad_norm": 0.478515625, "learning_rate": 2.8560218292940924e-05, "loss": 2.1064, "step": 8830 }, { "epoch": 0.18, "grad_norm": 0.43359375, "learning_rate": 2.855580639123568e-05, "loss": 2.1418, "step": 8840 }, { "epoch": 0.18, "grad_norm": 0.44921875, "learning_rate": 2.85513880820436e-05, "loss": 2.1461, "step": 8850 }, { "epoch": 0.18, "grad_norm": 0.43359375, "learning_rate": 2.854696336745312e-05, "loss": 2.1522, "step": 8860 }, { "epoch": 0.18, "grad_norm": 0.416015625, "learning_rate": 2.854253224955567e-05, "loss": 2.1539, "step": 8870 }, { "epoch": 0.18, "grad_norm": 0.421875, "learning_rate": 2.853809473044574e-05, "loss": 2.1322, "step": 8880 }, { "epoch": 0.18, "grad_norm": 0.447265625, "learning_rate": 2.853365081222083e-05, "loss": 2.0938, "step": 8890 }, { "epoch": 0.19, "grad_norm": 0.453125, "learning_rate": 2.8529200496981472e-05, "loss": 2.1325, "step": 8900 }, { "epoch": 0.19, "grad_norm": 0.51171875, "learning_rate": 2.852474378683121e-05, "loss": 2.127, "step": 8910 }, { "epoch": 0.19, "grad_norm": 0.64453125, "learning_rate": 2.8520280683876625e-05, "loss": 2.1291, "step": 8920 }, { "epoch": 0.19, "grad_norm": 0.494140625, "learning_rate": 2.8515811190227312e-05, "loss": 2.1468, "step": 8930 }, { "epoch": 0.19, "grad_norm": 0.482421875, "learning_rate": 2.8511335307995882e-05, "loss": 2.1555, "step": 8940 }, { "epoch": 0.19, "grad_norm": 0.421875, "learning_rate": 2.8506853039297976e-05, "loss": 2.0734, "step": 8950 }, { "epoch": 0.19, "grad_norm": 0.44921875, "learning_rate": 2.850236438625225e-05, "loss": 2.1333, "step": 8960 }, { "epoch": 0.19, "grad_norm": 0.43359375, "learning_rate": 2.8497869350980376e-05, "loss": 2.1369, "step": 8970 }, { "epoch": 0.19, "grad_norm": 0.443359375, "learning_rate": 2.8493367935607037e-05, "loss": 2.1502, "step": 8980 }, { "epoch": 0.19, "grad_norm": 0.484375, "learning_rate": 2.848886014225995e-05, "loss": 2.1398, "step": 8990 }, { "epoch": 0.19, "grad_norm": 0.45703125, "learning_rate": 2.8484345973069824e-05, "loss": 2.0943, "step": 9000 }, { "epoch": 0.19, "eval_accuracy": 0.5566261695293954, "eval_loss": 2.0073742866516113, "eval_runtime": 16.4366, "eval_samples_per_second": 36.2, "eval_steps_per_second": 1.156, "step": 9000 }, { "epoch": 0.19, "grad_norm": 0.47265625, "learning_rate": 2.8479825430170398e-05, "loss": 2.1152, "step": 9010 }, { "epoch": 0.19, "grad_norm": 0.455078125, "learning_rate": 2.8475298515698418e-05, "loss": 2.1292, "step": 9020 }, { "epoch": 0.19, "grad_norm": 0.43359375, "learning_rate": 2.8470765231793642e-05, "loss": 2.1321, "step": 9030 }, { "epoch": 0.19, "grad_norm": 0.431640625, "learning_rate": 2.8466225580598836e-05, "loss": 2.1557, "step": 9040 }, { "epoch": 0.19, "grad_norm": 0.474609375, "learning_rate": 2.8461679564259785e-05, "loss": 2.1168, "step": 9050 }, { "epoch": 0.19, "grad_norm": 0.474609375, "learning_rate": 2.845712718492527e-05, "loss": 2.1334, "step": 9060 }, { "epoch": 0.19, "grad_norm": 0.470703125, "learning_rate": 2.8452568444747085e-05, "loss": 2.1103, "step": 9070 }, { "epoch": 0.19, "grad_norm": 0.458984375, "learning_rate": 2.844800334588003e-05, "loss": 2.17, "step": 9080 }, { "epoch": 0.19, "grad_norm": 0.431640625, "learning_rate": 2.844343189048192e-05, "loss": 2.1498, "step": 9090 }, { "epoch": 0.19, "grad_norm": 0.44921875, "learning_rate": 2.843885408071356e-05, "loss": 2.173, "step": 9100 }, { "epoch": 0.19, "grad_norm": 0.439453125, "learning_rate": 2.8434269918738764e-05, "loss": 2.1362, "step": 9110 }, { "epoch": 0.19, "grad_norm": 0.51953125, "learning_rate": 2.8429679406724353e-05, "loss": 2.1659, "step": 9120 }, { "epoch": 0.19, "grad_norm": 0.45703125, "learning_rate": 2.8425082546840145e-05, "loss": 2.141, "step": 9130 }, { "epoch": 0.19, "grad_norm": 0.44140625, "learning_rate": 2.8420479341258955e-05, "loss": 2.1376, "step": 9140 }, { "epoch": 0.19, "grad_norm": 0.466796875, "learning_rate": 2.8415869792156612e-05, "loss": 2.1429, "step": 9150 }, { "epoch": 0.19, "grad_norm": 0.435546875, "learning_rate": 2.8411253901711924e-05, "loss": 2.1491, "step": 9160 }, { "epoch": 0.19, "grad_norm": 0.4375, "learning_rate": 2.8406631672106706e-05, "loss": 2.1016, "step": 9170 }, { "epoch": 0.19, "grad_norm": 0.44140625, "learning_rate": 2.8402003105525775e-05, "loss": 2.1172, "step": 9180 }, { "epoch": 0.19, "grad_norm": 0.42578125, "learning_rate": 2.8397368204156935e-05, "loss": 2.1402, "step": 9190 }, { "epoch": 0.19, "grad_norm": 0.427734375, "learning_rate": 2.8392726970190983e-05, "loss": 2.1305, "step": 9200 }, { "epoch": 0.19, "grad_norm": 0.490234375, "learning_rate": 2.8388079405821718e-05, "loss": 2.1218, "step": 9210 }, { "epoch": 0.19, "grad_norm": 0.439453125, "learning_rate": 2.838342551324592e-05, "loss": 2.1405, "step": 9220 }, { "epoch": 0.19, "grad_norm": 0.427734375, "learning_rate": 2.8378765294663373e-05, "loss": 2.1289, "step": 9230 }, { "epoch": 0.19, "grad_norm": 0.46875, "learning_rate": 2.8374098752276843e-05, "loss": 2.1442, "step": 9240 }, { "epoch": 0.19, "grad_norm": 0.455078125, "learning_rate": 2.836942588829208e-05, "loss": 2.1256, "step": 9250 }, { "epoch": 0.19, "grad_norm": 0.490234375, "learning_rate": 2.836474670491784e-05, "loss": 2.1373, "step": 9260 }, { "epoch": 0.19, "grad_norm": 0.46875, "learning_rate": 2.8360061204365845e-05, "loss": 2.1321, "step": 9270 }, { "epoch": 0.19, "grad_norm": 0.44921875, "learning_rate": 2.8355369388850815e-05, "loss": 2.1388, "step": 9280 }, { "epoch": 0.19, "grad_norm": 0.431640625, "learning_rate": 2.8350671260590455e-05, "loss": 2.1369, "step": 9290 }, { "epoch": 0.19, "grad_norm": 0.431640625, "learning_rate": 2.834596682180545e-05, "loss": 2.1429, "step": 9300 }, { "epoch": 0.19, "grad_norm": 0.423828125, "learning_rate": 2.8341256074719465e-05, "loss": 2.1402, "step": 9310 }, { "epoch": 0.19, "grad_norm": 0.87890625, "learning_rate": 2.833653902155916e-05, "loss": 2.1267, "step": 9320 }, { "epoch": 0.19, "grad_norm": 0.4296875, "learning_rate": 2.833181566455416e-05, "loss": 2.1391, "step": 9330 }, { "epoch": 0.19, "grad_norm": 0.42578125, "learning_rate": 2.8327086005937078e-05, "loss": 2.1407, "step": 9340 }, { "epoch": 0.19, "grad_norm": 0.43359375, "learning_rate": 2.8322350047943503e-05, "loss": 2.1258, "step": 9350 }, { "epoch": 0.19, "grad_norm": 0.45703125, "learning_rate": 2.831760779281201e-05, "loss": 2.1461, "step": 9360 }, { "epoch": 0.19, "grad_norm": 0.431640625, "learning_rate": 2.831285924278413e-05, "loss": 2.1264, "step": 9370 }, { "epoch": 0.2, "grad_norm": 0.451171875, "learning_rate": 2.8308104400104397e-05, "loss": 2.1298, "step": 9380 }, { "epoch": 0.2, "grad_norm": 0.453125, "learning_rate": 2.830334326702029e-05, "loss": 2.1558, "step": 9390 }, { "epoch": 0.2, "grad_norm": 0.451171875, "learning_rate": 2.8298575845782285e-05, "loss": 2.1233, "step": 9400 }, { "epoch": 0.2, "grad_norm": 0.431640625, "learning_rate": 2.829380213864382e-05, "loss": 2.1063, "step": 9410 }, { "epoch": 0.2, "grad_norm": 0.46484375, "learning_rate": 2.8289022147861307e-05, "loss": 2.1103, "step": 9420 }, { "epoch": 0.2, "grad_norm": 0.515625, "learning_rate": 2.8284235875694125e-05, "loss": 2.1359, "step": 9430 }, { "epoch": 0.2, "grad_norm": 0.435546875, "learning_rate": 2.8279443324404623e-05, "loss": 2.1452, "step": 9440 }, { "epoch": 0.2, "grad_norm": 0.44140625, "learning_rate": 2.827464449625812e-05, "loss": 2.1253, "step": 9450 }, { "epoch": 0.2, "grad_norm": 0.42578125, "learning_rate": 2.82698393935229e-05, "loss": 2.1038, "step": 9460 }, { "epoch": 0.2, "grad_norm": 0.435546875, "learning_rate": 2.8265028018470215e-05, "loss": 2.1573, "step": 9470 }, { "epoch": 0.2, "grad_norm": 0.423828125, "learning_rate": 2.826021037337428e-05, "loss": 2.1392, "step": 9480 }, { "epoch": 0.2, "grad_norm": 0.4375, "learning_rate": 2.8255386460512273e-05, "loss": 2.0994, "step": 9490 }, { "epoch": 0.2, "grad_norm": 0.423828125, "learning_rate": 2.8250556282164334e-05, "loss": 2.1421, "step": 9500 }, { "epoch": 0.2, "eval_accuracy": 0.556583454496168, "eval_loss": 2.0063412189483643, "eval_runtime": 16.4451, "eval_samples_per_second": 36.181, "eval_steps_per_second": 1.155, "step": 9500 }, { "epoch": 0.2, "grad_norm": 0.431640625, "learning_rate": 2.824571984061357e-05, "loss": 2.1158, "step": 9510 }, { "epoch": 0.2, "grad_norm": 0.419921875, "learning_rate": 2.8240877138146045e-05, "loss": 2.1322, "step": 9520 }, { "epoch": 0.2, "grad_norm": 0.408203125, "learning_rate": 2.8236028177050776e-05, "loss": 2.1359, "step": 9530 }, { "epoch": 0.2, "grad_norm": 0.462890625, "learning_rate": 2.823117295961975e-05, "loss": 2.1402, "step": 9540 }, { "epoch": 0.2, "grad_norm": 0.427734375, "learning_rate": 2.8226311488147897e-05, "loss": 2.1369, "step": 9550 }, { "epoch": 0.2, "grad_norm": 0.44140625, "learning_rate": 2.8221443764933125e-05, "loss": 2.1434, "step": 9560 }, { "epoch": 0.2, "grad_norm": 0.443359375, "learning_rate": 2.8216569792276274e-05, "loss": 2.1244, "step": 9570 }, { "epoch": 0.2, "grad_norm": 0.423828125, "learning_rate": 2.8211689572481156e-05, "loss": 2.1241, "step": 9580 }, { "epoch": 0.2, "grad_norm": 0.439453125, "learning_rate": 2.820680310785452e-05, "loss": 2.1161, "step": 9590 }, { "epoch": 0.2, "grad_norm": 0.43359375, "learning_rate": 2.820191040070607e-05, "loss": 2.1528, "step": 9600 }, { "epoch": 0.2, "grad_norm": 0.453125, "learning_rate": 2.8197011453348474e-05, "loss": 2.1271, "step": 9610 }, { "epoch": 0.2, "grad_norm": 0.443359375, "learning_rate": 2.8192106268097336e-05, "loss": 2.1448, "step": 9620 }, { "epoch": 0.2, "grad_norm": 0.470703125, "learning_rate": 2.8187194847271218e-05, "loss": 2.1266, "step": 9630 }, { "epoch": 0.2, "grad_norm": 0.462890625, "learning_rate": 2.8182277193191617e-05, "loss": 2.1396, "step": 9640 }, { "epoch": 0.2, "grad_norm": 0.53125, "learning_rate": 2.817735330818299e-05, "loss": 2.1245, "step": 9650 }, { "epoch": 0.2, "grad_norm": 0.453125, "learning_rate": 2.8172423194572736e-05, "loss": 2.0962, "step": 9660 }, { "epoch": 0.2, "grad_norm": 0.44140625, "learning_rate": 2.8167486854691178e-05, "loss": 2.1343, "step": 9670 }, { "epoch": 0.2, "grad_norm": 0.451171875, "learning_rate": 2.8162544290871616e-05, "loss": 2.1278, "step": 9680 }, { "epoch": 0.2, "grad_norm": 0.4375, "learning_rate": 2.815759550545027e-05, "loss": 2.1441, "step": 9690 }, { "epoch": 0.2, "grad_norm": 0.435546875, "learning_rate": 2.8152640500766307e-05, "loss": 2.1313, "step": 9700 }, { "epoch": 0.2, "grad_norm": 0.42578125, "learning_rate": 2.8147679279161826e-05, "loss": 2.1243, "step": 9710 }, { "epoch": 0.2, "grad_norm": 0.52734375, "learning_rate": 2.8142711842981878e-05, "loss": 2.1317, "step": 9720 }, { "epoch": 0.2, "grad_norm": 0.42578125, "learning_rate": 2.8137738194574443e-05, "loss": 2.1158, "step": 9730 }, { "epoch": 0.2, "grad_norm": 0.44921875, "learning_rate": 2.8132758336290435e-05, "loss": 2.1255, "step": 9740 }, { "epoch": 0.2, "grad_norm": 0.42578125, "learning_rate": 2.8127772270483714e-05, "loss": 2.1453, "step": 9750 }, { "epoch": 0.2, "grad_norm": 0.4296875, "learning_rate": 2.8122779999511062e-05, "loss": 2.1354, "step": 9760 }, { "epoch": 0.2, "grad_norm": 0.447265625, "learning_rate": 2.8117781525732197e-05, "loss": 2.1403, "step": 9770 }, { "epoch": 0.2, "grad_norm": 0.4296875, "learning_rate": 2.8112776851509774e-05, "loss": 2.1206, "step": 9780 }, { "epoch": 0.2, "grad_norm": 0.451171875, "learning_rate": 2.810776597920938e-05, "loss": 2.1437, "step": 9790 }, { "epoch": 0.2, "grad_norm": 0.41796875, "learning_rate": 2.8102748911199524e-05, "loss": 2.1329, "step": 9800 }, { "epoch": 0.2, "grad_norm": 0.416015625, "learning_rate": 2.8097725649851645e-05, "loss": 2.1559, "step": 9810 }, { "epoch": 0.2, "grad_norm": 0.462890625, "learning_rate": 2.8092696197540117e-05, "loss": 2.1592, "step": 9820 }, { "epoch": 0.2, "grad_norm": 0.451171875, "learning_rate": 2.8087660556642222e-05, "loss": 2.1137, "step": 9830 }, { "epoch": 0.2, "grad_norm": 0.41796875, "learning_rate": 2.80826187295382e-05, "loss": 2.1521, "step": 9840 }, { "epoch": 0.2, "grad_norm": 0.498046875, "learning_rate": 2.8077570718611184e-05, "loss": 2.0977, "step": 9850 }, { "epoch": 0.2, "grad_norm": 0.478515625, "learning_rate": 2.8072516526247238e-05, "loss": 2.1278, "step": 9860 }, { "epoch": 0.21, "grad_norm": 0.4375, "learning_rate": 2.8067456154835354e-05, "loss": 2.1321, "step": 9870 }, { "epoch": 0.21, "grad_norm": 0.4375, "learning_rate": 2.8062389606767445e-05, "loss": 2.0997, "step": 9880 }, { "epoch": 0.21, "grad_norm": 0.431640625, "learning_rate": 2.805731688443834e-05, "loss": 2.1169, "step": 9890 }, { "epoch": 0.21, "grad_norm": 0.48046875, "learning_rate": 2.8052237990245778e-05, "loss": 2.1393, "step": 9900 }, { "epoch": 0.21, "grad_norm": 0.435546875, "learning_rate": 2.8047152926590432e-05, "loss": 2.1279, "step": 9910 }, { "epoch": 0.21, "grad_norm": 0.4296875, "learning_rate": 2.8042061695875883e-05, "loss": 2.1392, "step": 9920 }, { "epoch": 0.21, "grad_norm": 0.46875, "learning_rate": 2.8036964300508625e-05, "loss": 2.1144, "step": 9930 }, { "epoch": 0.21, "grad_norm": 0.443359375, "learning_rate": 2.803186074289807e-05, "loss": 2.1193, "step": 9940 }, { "epoch": 0.21, "grad_norm": 0.4296875, "learning_rate": 2.802675102545653e-05, "loss": 2.1541, "step": 9950 }, { "epoch": 0.21, "grad_norm": 0.43359375, "learning_rate": 2.8021635150599255e-05, "loss": 2.1294, "step": 9960 }, { "epoch": 0.21, "grad_norm": 0.462890625, "learning_rate": 2.801651312074438e-05, "loss": 2.1278, "step": 9970 }, { "epoch": 0.21, "grad_norm": 0.46875, "learning_rate": 2.801138493831296e-05, "loss": 2.1371, "step": 9980 }, { "epoch": 0.21, "grad_norm": 0.443359375, "learning_rate": 2.8006250605728954e-05, "loss": 2.1129, "step": 9990 }, { "epoch": 0.21, "grad_norm": 0.453125, "learning_rate": 2.8001110125419246e-05, "loss": 2.1196, "step": 10000 }, { "epoch": 0.21, "eval_accuracy": 0.5567691006021177, "eval_loss": 2.0049471855163574, "eval_runtime": 16.4359, "eval_samples_per_second": 36.201, "eval_steps_per_second": 1.156, "step": 10000 }, { "epoch": 0.21, "grad_norm": 0.42578125, "learning_rate": 2.7995963499813596e-05, "loss": 2.1338, "step": 10010 }, { "epoch": 0.21, "grad_norm": 0.43359375, "learning_rate": 2.7990810731344686e-05, "loss": 2.1141, "step": 10020 }, { "epoch": 0.21, "grad_norm": 0.44921875, "learning_rate": 2.7985651822448102e-05, "loss": 2.1257, "step": 10030 }, { "epoch": 0.21, "grad_norm": 0.44140625, "learning_rate": 2.7980486775562332e-05, "loss": 2.1507, "step": 10040 }, { "epoch": 0.21, "grad_norm": 0.435546875, "learning_rate": 2.797531559312876e-05, "loss": 2.1074, "step": 10050 }, { "epoch": 0.21, "grad_norm": 0.455078125, "learning_rate": 2.797013827759167e-05, "loss": 2.1374, "step": 10060 }, { "epoch": 0.21, "grad_norm": 0.431640625, "learning_rate": 2.7964954831398252e-05, "loss": 2.1046, "step": 10070 }, { "epoch": 0.21, "grad_norm": 0.447265625, "learning_rate": 2.7959765256998596e-05, "loss": 2.1433, "step": 10080 }, { "epoch": 0.21, "grad_norm": 0.4453125, "learning_rate": 2.795456955684567e-05, "loss": 2.1355, "step": 10090 }, { "epoch": 0.21, "grad_norm": 0.470703125, "learning_rate": 2.794936773339536e-05, "loss": 2.112, "step": 10100 }, { "epoch": 0.21, "grad_norm": 0.4375, "learning_rate": 2.794415978910643e-05, "loss": 2.132, "step": 10110 }, { "epoch": 0.21, "grad_norm": 0.40625, "learning_rate": 2.793894572644055e-05, "loss": 2.1753, "step": 10120 }, { "epoch": 0.21, "grad_norm": 0.43359375, "learning_rate": 2.7933725547862267e-05, "loss": 2.1266, "step": 10130 }, { "epoch": 0.21, "grad_norm": 0.4609375, "learning_rate": 2.7928499255839033e-05, "loss": 2.1375, "step": 10140 }, { "epoch": 0.21, "grad_norm": 0.44921875, "learning_rate": 2.7923266852841182e-05, "loss": 2.1419, "step": 10150 }, { "epoch": 0.21, "grad_norm": 0.49609375, "learning_rate": 2.7918028341341944e-05, "loss": 2.1547, "step": 10160 }, { "epoch": 0.21, "grad_norm": 0.423828125, "learning_rate": 2.791278372381742e-05, "loss": 2.1191, "step": 10170 }, { "epoch": 0.21, "grad_norm": 0.54296875, "learning_rate": 2.7907533002746616e-05, "loss": 2.1251, "step": 10180 }, { "epoch": 0.21, "grad_norm": 0.4453125, "learning_rate": 2.790227618061142e-05, "loss": 2.1291, "step": 10190 }, { "epoch": 0.21, "grad_norm": 0.43359375, "learning_rate": 2.7897013259896586e-05, "loss": 2.1493, "step": 10200 }, { "epoch": 0.21, "grad_norm": 0.421875, "learning_rate": 2.7891744243089775e-05, "loss": 2.1669, "step": 10210 }, { "epoch": 0.21, "grad_norm": 0.4296875, "learning_rate": 2.7886469132681507e-05, "loss": 2.1352, "step": 10220 }, { "epoch": 0.21, "grad_norm": 0.486328125, "learning_rate": 2.7881187931165207e-05, "loss": 2.1588, "step": 10230 }, { "epoch": 0.21, "grad_norm": 0.435546875, "learning_rate": 2.7875900641037155e-05, "loss": 2.1518, "step": 10240 }, { "epoch": 0.21, "grad_norm": 0.419921875, "learning_rate": 2.787060726479652e-05, "loss": 2.1527, "step": 10250 }, { "epoch": 0.21, "grad_norm": 0.443359375, "learning_rate": 2.7865307804945352e-05, "loss": 2.1195, "step": 10260 }, { "epoch": 0.21, "grad_norm": 0.435546875, "learning_rate": 2.7860002263988565e-05, "loss": 2.1271, "step": 10270 }, { "epoch": 0.21, "grad_norm": 0.462890625, "learning_rate": 2.785469064443396e-05, "loss": 2.1163, "step": 10280 }, { "epoch": 0.21, "grad_norm": 0.46875, "learning_rate": 2.7849372948792204e-05, "loss": 2.1123, "step": 10290 }, { "epoch": 0.21, "grad_norm": 0.43359375, "learning_rate": 2.7844049179576837e-05, "loss": 2.1576, "step": 10300 }, { "epoch": 0.21, "grad_norm": 0.59765625, "learning_rate": 2.7838719339304265e-05, "loss": 2.0895, "step": 10310 }, { "epoch": 0.21, "grad_norm": 0.7734375, "learning_rate": 2.7833383430493776e-05, "loss": 2.1207, "step": 10320 }, { "epoch": 0.21, "grad_norm": 0.435546875, "learning_rate": 2.782804145566751e-05, "loss": 2.1284, "step": 10330 }, { "epoch": 0.21, "grad_norm": 0.51171875, "learning_rate": 2.78226934173505e-05, "loss": 2.1896, "step": 10340 }, { "epoch": 0.22, "grad_norm": 0.4453125, "learning_rate": 2.781733931807061e-05, "loss": 2.1414, "step": 10350 }, { "epoch": 0.22, "grad_norm": 0.453125, "learning_rate": 2.78119791603586e-05, "loss": 2.1309, "step": 10360 }, { "epoch": 0.22, "grad_norm": 0.423828125, "learning_rate": 2.7806612946748073e-05, "loss": 2.13, "step": 10370 }, { "epoch": 0.22, "grad_norm": 0.45703125, "learning_rate": 2.7801240679775508e-05, "loss": 2.1399, "step": 10380 }, { "epoch": 0.22, "grad_norm": 0.42578125, "learning_rate": 2.7795862361980237e-05, "loss": 2.0951, "step": 10390 }, { "epoch": 0.22, "grad_norm": 0.431640625, "learning_rate": 2.7790477995904454e-05, "loss": 2.1012, "step": 10400 }, { "epoch": 0.22, "grad_norm": 0.412109375, "learning_rate": 2.7785087584093216e-05, "loss": 2.1295, "step": 10410 }, { "epoch": 0.22, "grad_norm": 0.458984375, "learning_rate": 2.7779691129094435e-05, "loss": 2.1594, "step": 10420 }, { "epoch": 0.22, "grad_norm": 0.45703125, "learning_rate": 2.7774288633458865e-05, "loss": 2.1037, "step": 10430 }, { "epoch": 0.22, "grad_norm": 0.416015625, "learning_rate": 2.776888009974015e-05, "loss": 2.1035, "step": 10440 }, { "epoch": 0.22, "grad_norm": 0.4453125, "learning_rate": 2.7763465530494753e-05, "loss": 2.1344, "step": 10450 }, { "epoch": 0.22, "grad_norm": 0.453125, "learning_rate": 2.775804492828201e-05, "loss": 2.1294, "step": 10460 }, { "epoch": 0.22, "grad_norm": 0.43359375, "learning_rate": 2.7752618295664103e-05, "loss": 2.1379, "step": 10470 }, { "epoch": 0.22, "grad_norm": 0.443359375, "learning_rate": 2.7747185635206057e-05, "loss": 2.1426, "step": 10480 }, { "epoch": 0.22, "grad_norm": 0.443359375, "learning_rate": 2.7741746949475763e-05, "loss": 2.1441, "step": 10490 }, { "epoch": 0.22, "grad_norm": 0.43359375, "learning_rate": 2.773630224104394e-05, "loss": 2.1634, "step": 10500 }, { "epoch": 0.22, "eval_accuracy": 0.5568348160378521, "eval_loss": 2.0042004585266113, "eval_runtime": 16.4274, "eval_samples_per_second": 36.22, "eval_steps_per_second": 1.157, "step": 10500 }, { "epoch": 0.22, "grad_norm": 0.4609375, "learning_rate": 2.7730851512484168e-05, "loss": 2.1206, "step": 10510 }, { "epoch": 0.22, "grad_norm": 0.451171875, "learning_rate": 2.7725394766372874e-05, "loss": 2.1319, "step": 10520 }, { "epoch": 0.22, "grad_norm": 0.4453125, "learning_rate": 2.7719932005289317e-05, "loss": 2.1057, "step": 10530 }, { "epoch": 0.22, "grad_norm": 0.4375, "learning_rate": 2.7714463231815614e-05, "loss": 2.1158, "step": 10540 }, { "epoch": 0.22, "grad_norm": 0.435546875, "learning_rate": 2.77089884485367e-05, "loss": 2.145, "step": 10550 }, { "epoch": 0.22, "grad_norm": 0.4296875, "learning_rate": 2.7703507658040387e-05, "loss": 2.1266, "step": 10560 }, { "epoch": 0.22, "grad_norm": 0.46875, "learning_rate": 2.7698020862917288e-05, "loss": 2.1457, "step": 10570 }, { "epoch": 0.22, "grad_norm": 0.462890625, "learning_rate": 2.7692528065760886e-05, "loss": 2.1206, "step": 10580 }, { "epoch": 0.22, "grad_norm": 0.4375, "learning_rate": 2.7687029269167477e-05, "loss": 2.1058, "step": 10590 }, { "epoch": 0.22, "grad_norm": 0.44921875, "learning_rate": 2.7681524475736212e-05, "loss": 2.1258, "step": 10600 }, { "epoch": 0.22, "grad_norm": 0.494140625, "learning_rate": 2.767601368806906e-05, "loss": 2.1204, "step": 10610 }, { "epoch": 0.22, "grad_norm": 0.4375, "learning_rate": 2.767049690877084e-05, "loss": 2.1271, "step": 10620 }, { "epoch": 0.22, "grad_norm": 0.58203125, "learning_rate": 2.766497414044918e-05, "loss": 2.1059, "step": 10630 }, { "epoch": 0.22, "grad_norm": 0.44921875, "learning_rate": 2.7659445385714567e-05, "loss": 2.1184, "step": 10640 }, { "epoch": 0.22, "grad_norm": 0.435546875, "learning_rate": 2.7653910647180298e-05, "loss": 2.1299, "step": 10650 }, { "epoch": 0.22, "grad_norm": 0.416015625, "learning_rate": 2.7648369927462503e-05, "loss": 2.1336, "step": 10660 }, { "epoch": 0.22, "grad_norm": 0.41015625, "learning_rate": 2.7642823229180138e-05, "loss": 2.113, "step": 10670 }, { "epoch": 0.22, "grad_norm": 0.4296875, "learning_rate": 2.763727055495499e-05, "loss": 2.1126, "step": 10680 }, { "epoch": 0.22, "grad_norm": 0.44140625, "learning_rate": 2.763171190741167e-05, "loss": 2.1323, "step": 10690 }, { "epoch": 0.22, "grad_norm": 0.43359375, "learning_rate": 2.7626147289177602e-05, "loss": 2.1229, "step": 10700 }, { "epoch": 0.22, "grad_norm": 0.4296875, "learning_rate": 2.7620576702883053e-05, "loss": 2.1137, "step": 10710 }, { "epoch": 0.22, "grad_norm": 0.44140625, "learning_rate": 2.761500015116109e-05, "loss": 2.1521, "step": 10720 }, { "epoch": 0.22, "grad_norm": 0.4296875, "learning_rate": 2.7609417636647602e-05, "loss": 2.1017, "step": 10730 }, { "epoch": 0.22, "grad_norm": 0.4375, "learning_rate": 2.7603829161981317e-05, "loss": 2.1455, "step": 10740 }, { "epoch": 0.22, "grad_norm": 0.4453125, "learning_rate": 2.7598234729803753e-05, "loss": 2.1034, "step": 10750 }, { "epoch": 0.22, "grad_norm": 0.4453125, "learning_rate": 2.7592634342759265e-05, "loss": 2.1238, "step": 10760 }, { "epoch": 0.22, "grad_norm": 0.4375, "learning_rate": 2.758702800349501e-05, "loss": 2.1025, "step": 10770 }, { "epoch": 0.22, "grad_norm": 0.458984375, "learning_rate": 2.7581415714660963e-05, "loss": 2.1239, "step": 10780 }, { "epoch": 0.22, "grad_norm": 0.443359375, "learning_rate": 2.7575797478909914e-05, "loss": 2.0961, "step": 10790 }, { "epoch": 0.22, "grad_norm": 0.47265625, "learning_rate": 2.7570173298897458e-05, "loss": 2.1189, "step": 10800 }, { "epoch": 0.22, "grad_norm": 0.44140625, "learning_rate": 2.7564543177282e-05, "loss": 2.1192, "step": 10810 }, { "epoch": 0.22, "grad_norm": 0.447265625, "learning_rate": 2.7558907116724766e-05, "loss": 2.0905, "step": 10820 }, { "epoch": 0.23, "grad_norm": 0.443359375, "learning_rate": 2.7553265119889765e-05, "loss": 2.1482, "step": 10830 }, { "epoch": 0.23, "grad_norm": 0.431640625, "learning_rate": 2.7547617189443835e-05, "loss": 2.0997, "step": 10840 }, { "epoch": 0.23, "grad_norm": 0.4453125, "learning_rate": 2.7541963328056616e-05, "loss": 2.1088, "step": 10850 }, { "epoch": 0.23, "grad_norm": 0.4296875, "learning_rate": 2.7536303538400532e-05, "loss": 2.1213, "step": 10860 }, { "epoch": 0.23, "grad_norm": 0.46484375, "learning_rate": 2.753063782315083e-05, "loss": 2.1114, "step": 10870 }, { "epoch": 0.23, "grad_norm": 0.447265625, "learning_rate": 2.752496618498555e-05, "loss": 2.1292, "step": 10880 }, { "epoch": 0.23, "grad_norm": 0.447265625, "learning_rate": 2.7519288626585528e-05, "loss": 2.1307, "step": 10890 }, { "epoch": 0.23, "grad_norm": 0.447265625, "learning_rate": 2.751360515063441e-05, "loss": 2.13, "step": 10900 }, { "epoch": 0.23, "grad_norm": 0.42578125, "learning_rate": 2.7507915759818622e-05, "loss": 2.14, "step": 10910 }, { "epoch": 0.23, "grad_norm": 0.48046875, "learning_rate": 2.75022204568274e-05, "loss": 2.1275, "step": 10920 }, { "epoch": 0.23, "grad_norm": 0.447265625, "learning_rate": 2.749651924435277e-05, "loss": 2.1041, "step": 10930 }, { "epoch": 0.23, "grad_norm": 0.421875, "learning_rate": 2.749081212508955e-05, "loss": 2.1419, "step": 10940 }, { "epoch": 0.23, "grad_norm": 0.435546875, "learning_rate": 2.7485099101735348e-05, "loss": 2.1523, "step": 10950 }, { "epoch": 0.23, "grad_norm": 0.421875, "learning_rate": 2.7479380176990575e-05, "loss": 2.1015, "step": 10960 }, { "epoch": 0.23, "grad_norm": 0.44140625, "learning_rate": 2.747365535355841e-05, "loss": 2.1573, "step": 10970 }, { "epoch": 0.23, "grad_norm": 0.427734375, "learning_rate": 2.746792463414484e-05, "loss": 2.1187, "step": 10980 }, { "epoch": 0.23, "grad_norm": 0.4296875, "learning_rate": 2.746218802145863e-05, "loss": 2.1564, "step": 10990 }, { "epoch": 0.23, "grad_norm": 0.4375, "learning_rate": 2.7456445518211332e-05, "loss": 2.1361, "step": 11000 }, { "epoch": 0.23, "eval_accuracy": 0.5572570377124457, "eval_loss": 2.003464937210083, "eval_runtime": 16.4509, "eval_samples_per_second": 36.168, "eval_steps_per_second": 1.155, "step": 11000 }, { "epoch": 0.23, "grad_norm": 0.5234375, "learning_rate": 2.7450697127117273e-05, "loss": 2.1047, "step": 11010 }, { "epoch": 0.23, "grad_norm": 0.46484375, "learning_rate": 2.7444942850893588e-05, "loss": 2.1343, "step": 11020 }, { "epoch": 0.23, "grad_norm": 0.423828125, "learning_rate": 2.743918269226017e-05, "loss": 2.1454, "step": 11030 }, { "epoch": 0.23, "grad_norm": 0.4296875, "learning_rate": 2.7433416653939696e-05, "loss": 2.1369, "step": 11040 }, { "epoch": 0.23, "grad_norm": 0.435546875, "learning_rate": 2.7427644738657635e-05, "loss": 2.1412, "step": 11050 }, { "epoch": 0.23, "grad_norm": 0.4453125, "learning_rate": 2.7421866949142216e-05, "loss": 2.0932, "step": 11060 }, { "epoch": 0.23, "grad_norm": 0.4609375, "learning_rate": 2.7416083288124464e-05, "loss": 2.1111, "step": 11070 }, { "epoch": 0.23, "grad_norm": 0.43359375, "learning_rate": 2.741029375833816e-05, "loss": 2.142, "step": 11080 }, { "epoch": 0.23, "grad_norm": 0.4375, "learning_rate": 2.7404498362519872e-05, "loss": 2.1368, "step": 11090 }, { "epoch": 0.23, "grad_norm": 0.44140625, "learning_rate": 2.739869710340894e-05, "loss": 2.1066, "step": 11100 }, { "epoch": 0.23, "grad_norm": 0.431640625, "learning_rate": 2.7392889983747473e-05, "loss": 2.1737, "step": 11110 }, { "epoch": 0.23, "grad_norm": 0.443359375, "learning_rate": 2.738707700628034e-05, "loss": 2.1517, "step": 11120 }, { "epoch": 0.23, "grad_norm": 0.443359375, "learning_rate": 2.73812581737552e-05, "loss": 2.1411, "step": 11130 }, { "epoch": 0.23, "grad_norm": 0.43359375, "learning_rate": 2.737543348892246e-05, "loss": 2.1118, "step": 11140 }, { "epoch": 0.23, "grad_norm": 0.427734375, "learning_rate": 2.7369602954535308e-05, "loss": 2.1332, "step": 11150 }, { "epoch": 0.23, "grad_norm": 0.43359375, "learning_rate": 2.736376657334968e-05, "loss": 2.1295, "step": 11160 }, { "epoch": 0.23, "grad_norm": 0.494140625, "learning_rate": 2.73579243481243e-05, "loss": 2.0897, "step": 11170 }, { "epoch": 0.23, "grad_norm": 0.419921875, "learning_rate": 2.735207628162063e-05, "loss": 2.1324, "step": 11180 }, { "epoch": 0.23, "grad_norm": 0.462890625, "learning_rate": 2.7346222376602906e-05, "loss": 2.1254, "step": 11190 }, { "epoch": 0.23, "grad_norm": 0.4296875, "learning_rate": 2.7340362635838115e-05, "loss": 2.1349, "step": 11200 }, { "epoch": 0.23, "grad_norm": 0.470703125, "learning_rate": 2.7334497062096018e-05, "loss": 2.1281, "step": 11210 }, { "epoch": 0.23, "grad_norm": 0.458984375, "learning_rate": 2.732862565814912e-05, "loss": 2.1122, "step": 11220 }, { "epoch": 0.23, "grad_norm": 0.447265625, "learning_rate": 2.7322748426772678e-05, "loss": 2.1334, "step": 11230 }, { "epoch": 0.23, "grad_norm": 0.46875, "learning_rate": 2.731686537074472e-05, "loss": 2.1648, "step": 11240 }, { "epoch": 0.23, "grad_norm": 0.45703125, "learning_rate": 2.7310976492846013e-05, "loss": 2.1501, "step": 11250 }, { "epoch": 0.23, "grad_norm": 0.455078125, "learning_rate": 2.7305081795860084e-05, "loss": 2.139, "step": 11260 }, { "epoch": 0.23, "grad_norm": 0.474609375, "learning_rate": 2.7299181282573206e-05, "loss": 2.134, "step": 11270 }, { "epoch": 0.23, "grad_norm": 0.427734375, "learning_rate": 2.72932749557744e-05, "loss": 2.0959, "step": 11280 }, { "epoch": 0.23, "grad_norm": 0.421875, "learning_rate": 2.7287362818255438e-05, "loss": 2.0825, "step": 11290 }, { "epoch": 0.23, "grad_norm": 0.4296875, "learning_rate": 2.7281444872810843e-05, "loss": 2.1234, "step": 11300 }, { "epoch": 0.24, "grad_norm": 0.478515625, "learning_rate": 2.727552112223787e-05, "loss": 2.1073, "step": 11310 }, { "epoch": 0.24, "grad_norm": 0.427734375, "learning_rate": 2.7269591569336532e-05, "loss": 2.1179, "step": 11320 }, { "epoch": 0.24, "grad_norm": 0.43359375, "learning_rate": 2.7263656216909574e-05, "loss": 2.1269, "step": 11330 }, { "epoch": 0.24, "grad_norm": 0.447265625, "learning_rate": 2.7257715067762493e-05, "loss": 2.1363, "step": 11340 }, { "epoch": 0.24, "grad_norm": 0.46484375, "learning_rate": 2.7251768124703512e-05, "loss": 2.1344, "step": 11350 }, { "epoch": 0.24, "grad_norm": 0.5, "learning_rate": 2.7245815390543612e-05, "loss": 2.0938, "step": 11360 }, { "epoch": 0.24, "grad_norm": 0.44921875, "learning_rate": 2.7239856868096488e-05, "loss": 2.138, "step": 11370 }, { "epoch": 0.24, "grad_norm": 0.5625, "learning_rate": 2.7233892560178594e-05, "loss": 2.094, "step": 11380 }, { "epoch": 0.24, "grad_norm": 0.42578125, "learning_rate": 2.7227922469609094e-05, "loss": 2.1032, "step": 11390 }, { "epoch": 0.24, "grad_norm": 0.447265625, "learning_rate": 2.722194659920991e-05, "loss": 2.1207, "step": 11400 }, { "epoch": 0.24, "grad_norm": 0.4375, "learning_rate": 2.721596495180568e-05, "loss": 2.1311, "step": 11410 }, { "epoch": 0.24, "grad_norm": 0.427734375, "learning_rate": 2.720997753022378e-05, "loss": 2.1437, "step": 11420 }, { "epoch": 0.24, "grad_norm": 0.4375, "learning_rate": 2.720398433729431e-05, "loss": 2.1223, "step": 11430 }, { "epoch": 0.24, "grad_norm": 0.419921875, "learning_rate": 2.71979853758501e-05, "loss": 2.1518, "step": 11440 }, { "epoch": 0.24, "grad_norm": 0.427734375, "learning_rate": 2.7191980648726714e-05, "loss": 2.118, "step": 11450 }, { "epoch": 0.24, "grad_norm": 0.41796875, "learning_rate": 2.7185970158762423e-05, "loss": 2.1094, "step": 11460 }, { "epoch": 0.24, "grad_norm": 0.427734375, "learning_rate": 2.7179953908798246e-05, "loss": 2.1038, "step": 11470 }, { "epoch": 0.24, "grad_norm": 0.443359375, "learning_rate": 2.7173931901677908e-05, "loss": 2.1237, "step": 11480 }, { "epoch": 0.24, "grad_norm": 0.447265625, "learning_rate": 2.7167904140247855e-05, "loss": 2.0998, "step": 11490 }, { "epoch": 0.24, "grad_norm": 0.462890625, "learning_rate": 2.7161870627357263e-05, "loss": 2.1614, "step": 11500 }, { "epoch": 0.24, "eval_accuracy": 0.5572373230817254, "eval_loss": 2.002742052078247, "eval_runtime": 16.4495, "eval_samples_per_second": 36.171, "eval_steps_per_second": 1.155, "step": 11500 }, { "epoch": 0.24, "grad_norm": 0.421875, "learning_rate": 2.7155831365858016e-05, "loss": 2.123, "step": 11510 }, { "epoch": 0.24, "grad_norm": 0.4609375, "learning_rate": 2.714978635860472e-05, "loss": 2.1166, "step": 11520 }, { "epoch": 0.24, "grad_norm": 0.435546875, "learning_rate": 2.7143735608454704e-05, "loss": 2.1447, "step": 11530 }, { "epoch": 0.24, "grad_norm": 0.466796875, "learning_rate": 2.7137679118268004e-05, "loss": 2.1581, "step": 11540 }, { "epoch": 0.24, "grad_norm": 0.427734375, "learning_rate": 2.713161689090736e-05, "loss": 2.1443, "step": 11550 }, { "epoch": 0.24, "grad_norm": 0.423828125, "learning_rate": 2.712554892923824e-05, "loss": 2.1002, "step": 11560 }, { "epoch": 0.24, "grad_norm": 0.431640625, "learning_rate": 2.7119475236128816e-05, "loss": 2.1158, "step": 11570 }, { "epoch": 0.24, "grad_norm": 0.421875, "learning_rate": 2.711339581444997e-05, "loss": 2.1226, "step": 11580 }, { "epoch": 0.24, "grad_norm": 0.431640625, "learning_rate": 2.710731066707529e-05, "loss": 2.1133, "step": 11590 }, { "epoch": 0.24, "grad_norm": 0.419921875, "learning_rate": 2.7101219796881068e-05, "loss": 2.136, "step": 11600 }, { "epoch": 0.24, "grad_norm": 0.462890625, "learning_rate": 2.7095123206746313e-05, "loss": 2.1554, "step": 11610 }, { "epoch": 0.24, "grad_norm": 0.46484375, "learning_rate": 2.708902089955272e-05, "loss": 2.1598, "step": 11620 }, { "epoch": 0.24, "grad_norm": 0.55078125, "learning_rate": 2.70829128781847e-05, "loss": 2.1231, "step": 11630 }, { "epoch": 0.24, "grad_norm": 0.43359375, "learning_rate": 2.7076799145529353e-05, "loss": 2.1169, "step": 11640 }, { "epoch": 0.24, "grad_norm": 0.48046875, "learning_rate": 2.7070679704476497e-05, "loss": 2.1103, "step": 11650 }, { "epoch": 0.24, "grad_norm": 0.43359375, "learning_rate": 2.706455455791863e-05, "loss": 2.1337, "step": 11660 }, { "epoch": 0.24, "grad_norm": 0.43359375, "learning_rate": 2.7058423708750954e-05, "loss": 2.1526, "step": 11670 }, { "epoch": 0.24, "grad_norm": 0.4375, "learning_rate": 2.7052287159871366e-05, "loss": 2.1409, "step": 11680 }, { "epoch": 0.24, "grad_norm": 0.44140625, "learning_rate": 2.7046144914180463e-05, "loss": 2.126, "step": 11690 }, { "epoch": 0.24, "grad_norm": 0.453125, "learning_rate": 2.7039996974581524e-05, "loss": 2.1366, "step": 11700 }, { "epoch": 0.24, "grad_norm": 0.44140625, "learning_rate": 2.7033843343980523e-05, "loss": 2.1041, "step": 11710 }, { "epoch": 0.24, "grad_norm": 0.453125, "learning_rate": 2.7027684025286134e-05, "loss": 2.1407, "step": 11720 }, { "epoch": 0.24, "grad_norm": 0.427734375, "learning_rate": 2.7021519021409704e-05, "loss": 2.1085, "step": 11730 }, { "epoch": 0.24, "grad_norm": 0.427734375, "learning_rate": 2.7015348335265276e-05, "loss": 2.1546, "step": 11740 }, { "epoch": 0.24, "grad_norm": 0.4609375, "learning_rate": 2.7009171969769576e-05, "loss": 2.1217, "step": 11750 }, { "epoch": 0.24, "grad_norm": 0.4453125, "learning_rate": 2.7002989927842013e-05, "loss": 2.1162, "step": 11760 }, { "epoch": 0.24, "grad_norm": 0.435546875, "learning_rate": 2.699680221240469e-05, "loss": 2.1309, "step": 11770 }, { "epoch": 0.24, "grad_norm": 0.443359375, "learning_rate": 2.699060882638238e-05, "loss": 2.0804, "step": 11780 }, { "epoch": 0.25, "grad_norm": 0.451171875, "learning_rate": 2.6984409772702537e-05, "loss": 2.1226, "step": 11790 }, { "epoch": 0.25, "grad_norm": 0.455078125, "learning_rate": 2.6978205054295302e-05, "loss": 2.1519, "step": 11800 }, { "epoch": 0.25, "grad_norm": 0.5078125, "learning_rate": 2.6971994674093484e-05, "loss": 2.1112, "step": 11810 }, { "epoch": 0.25, "grad_norm": 0.443359375, "learning_rate": 2.6965778635032573e-05, "loss": 2.0919, "step": 11820 }, { "epoch": 0.25, "grad_norm": 0.431640625, "learning_rate": 2.6959556940050736e-05, "loss": 2.1574, "step": 11830 }, { "epoch": 0.25, "grad_norm": 0.431640625, "learning_rate": 2.695332959208881e-05, "loss": 2.121, "step": 11840 }, { "epoch": 0.25, "grad_norm": 0.45703125, "learning_rate": 2.6947096594090306e-05, "loss": 2.0938, "step": 11850 }, { "epoch": 0.25, "grad_norm": 0.47265625, "learning_rate": 2.6940857949001403e-05, "loss": 2.115, "step": 11860 }, { "epoch": 0.25, "grad_norm": 0.447265625, "learning_rate": 2.693461365977095e-05, "loss": 2.141, "step": 11870 }, { "epoch": 0.25, "grad_norm": 0.470703125, "learning_rate": 2.6928363729350465e-05, "loss": 2.1445, "step": 11880 }, { "epoch": 0.25, "grad_norm": 0.421875, "learning_rate": 2.6922108160694133e-05, "loss": 2.1487, "step": 11890 }, { "epoch": 0.25, "grad_norm": 0.419921875, "learning_rate": 2.69158469567588e-05, "loss": 2.1153, "step": 11900 }, { "epoch": 0.25, "grad_norm": 0.439453125, "learning_rate": 2.6909580120503985e-05, "loss": 2.1072, "step": 11910 }, { "epoch": 0.25, "grad_norm": 0.455078125, "learning_rate": 2.6903307654891858e-05, "loss": 2.0888, "step": 11920 }, { "epoch": 0.25, "grad_norm": 0.431640625, "learning_rate": 2.6897029562887257e-05, "loss": 2.12, "step": 11930 }, { "epoch": 0.25, "grad_norm": 0.443359375, "learning_rate": 2.6890745847457676e-05, "loss": 2.1288, "step": 11940 }, { "epoch": 0.25, "grad_norm": 0.42578125, "learning_rate": 2.6884456511573266e-05, "loss": 2.0738, "step": 11950 }, { "epoch": 0.25, "grad_norm": 0.4140625, "learning_rate": 2.6878161558206845e-05, "loss": 2.1162, "step": 11960 }, { "epoch": 0.25, "grad_norm": 0.458984375, "learning_rate": 2.6871860990333866e-05, "loss": 2.1009, "step": 11970 }, { "epoch": 0.25, "grad_norm": 0.427734375, "learning_rate": 2.6865554810932464e-05, "loss": 2.147, "step": 11980 }, { "epoch": 0.25, "grad_norm": 0.53125, "learning_rate": 2.68592430229834e-05, "loss": 2.1533, "step": 11990 }, { "epoch": 0.25, "grad_norm": 0.42578125, "learning_rate": 2.6852925629470092e-05, "loss": 2.1205, "step": 12000 }, { "epoch": 0.25, "eval_accuracy": 0.5575905435487978, "eval_loss": 2.0020804405212402, "eval_runtime": 16.4541, "eval_samples_per_second": 36.161, "eval_steps_per_second": 1.155, "step": 12000 }, { "epoch": 0.25, "grad_norm": 0.4296875, "learning_rate": 2.684660263337862e-05, "loss": 2.1284, "step": 12010 }, { "epoch": 0.25, "grad_norm": 0.427734375, "learning_rate": 2.6840274037697706e-05, "loss": 2.096, "step": 12020 }, { "epoch": 0.25, "grad_norm": 0.41796875, "learning_rate": 2.6833939845418713e-05, "loss": 2.1219, "step": 12030 }, { "epoch": 0.25, "grad_norm": 0.431640625, "learning_rate": 2.682760005953565e-05, "loss": 2.1337, "step": 12040 }, { "epoch": 0.25, "grad_norm": 0.439453125, "learning_rate": 2.6821254683045184e-05, "loss": 2.17, "step": 12050 }, { "epoch": 0.25, "grad_norm": 0.443359375, "learning_rate": 2.6814903718946605e-05, "loss": 2.1433, "step": 12060 }, { "epoch": 0.25, "grad_norm": 0.451171875, "learning_rate": 2.6808547170241857e-05, "loss": 2.1263, "step": 12070 }, { "epoch": 0.25, "grad_norm": 0.44140625, "learning_rate": 2.6802185039935514e-05, "loss": 2.0722, "step": 12080 }, { "epoch": 0.25, "grad_norm": 0.4921875, "learning_rate": 2.6795817331034803e-05, "loss": 2.1319, "step": 12090 }, { "epoch": 0.25, "grad_norm": 0.421875, "learning_rate": 2.678944404654957e-05, "loss": 2.1281, "step": 12100 }, { "epoch": 0.25, "grad_norm": 0.41796875, "learning_rate": 2.678306518949232e-05, "loss": 2.1321, "step": 12110 }, { "epoch": 0.25, "grad_norm": 0.4375, "learning_rate": 2.677668076287816e-05, "loss": 2.0867, "step": 12120 }, { "epoch": 0.25, "grad_norm": 0.443359375, "learning_rate": 2.6770290769724856e-05, "loss": 2.0957, "step": 12130 }, { "epoch": 0.25, "grad_norm": 0.427734375, "learning_rate": 2.676389521305279e-05, "loss": 2.0962, "step": 12140 }, { "epoch": 0.25, "grad_norm": 0.4453125, "learning_rate": 2.675749409588499e-05, "loss": 2.0884, "step": 12150 }, { "epoch": 0.25, "grad_norm": 0.435546875, "learning_rate": 2.6751087421247096e-05, "loss": 2.0776, "step": 12160 }, { "epoch": 0.25, "grad_norm": 0.42578125, "learning_rate": 2.674467519216738e-05, "loss": 2.1515, "step": 12170 }, { "epoch": 0.25, "grad_norm": 0.453125, "learning_rate": 2.6738257411676746e-05, "loss": 2.1342, "step": 12180 }, { "epoch": 0.25, "grad_norm": 0.439453125, "learning_rate": 2.673183408280871e-05, "loss": 2.1562, "step": 12190 }, { "epoch": 0.25, "grad_norm": 0.439453125, "learning_rate": 2.6725405208599417e-05, "loss": 2.1546, "step": 12200 }, { "epoch": 0.25, "grad_norm": 0.42578125, "learning_rate": 2.6718970792087642e-05, "loss": 2.1857, "step": 12210 }, { "epoch": 0.25, "grad_norm": 0.45703125, "learning_rate": 2.6712530836314763e-05, "loss": 2.1436, "step": 12220 }, { "epoch": 0.25, "grad_norm": 0.42578125, "learning_rate": 2.6706085344324786e-05, "loss": 2.1, "step": 12230 }, { "epoch": 0.25, "grad_norm": 0.455078125, "learning_rate": 2.669963431916433e-05, "loss": 2.1346, "step": 12240 }, { "epoch": 0.25, "grad_norm": 0.494140625, "learning_rate": 2.669317776388263e-05, "loss": 2.1038, "step": 12250 }, { "epoch": 0.25, "grad_norm": 0.4140625, "learning_rate": 2.6686715681531546e-05, "loss": 2.1231, "step": 12260 }, { "epoch": 0.26, "grad_norm": 0.435546875, "learning_rate": 2.6680248075165535e-05, "loss": 2.1126, "step": 12270 }, { "epoch": 0.26, "grad_norm": 0.419921875, "learning_rate": 2.6673774947841673e-05, "loss": 2.1341, "step": 12280 }, { "epoch": 0.26, "grad_norm": 0.44140625, "learning_rate": 2.6667296302619634e-05, "loss": 2.0758, "step": 12290 }, { "epoch": 0.26, "grad_norm": 0.431640625, "learning_rate": 2.6660812142561723e-05, "loss": 2.1322, "step": 12300 }, { "epoch": 0.26, "grad_norm": 0.451171875, "learning_rate": 2.665432247073283e-05, "loss": 2.1306, "step": 12310 }, { "epoch": 0.26, "grad_norm": 0.44921875, "learning_rate": 2.6647827290200467e-05, "loss": 2.1324, "step": 12320 }, { "epoch": 0.26, "grad_norm": 0.435546875, "learning_rate": 2.6641326604034736e-05, "loss": 2.1077, "step": 12330 }, { "epoch": 0.26, "grad_norm": 1.0, "learning_rate": 2.663482041530834e-05, "loss": 2.1343, "step": 12340 }, { "epoch": 0.26, "grad_norm": 0.451171875, "learning_rate": 2.66283087270966e-05, "loss": 2.1316, "step": 12350 }, { "epoch": 0.26, "grad_norm": 0.46875, "learning_rate": 2.662179154247742e-05, "loss": 2.1421, "step": 12360 }, { "epoch": 0.26, "grad_norm": 0.43359375, "learning_rate": 2.6615268864531318e-05, "loss": 2.1153, "step": 12370 }, { "epoch": 0.26, "grad_norm": 0.44140625, "learning_rate": 2.660874069634139e-05, "loss": 2.1173, "step": 12380 }, { "epoch": 0.26, "grad_norm": 0.45703125, "learning_rate": 2.660220704099334e-05, "loss": 2.1345, "step": 12390 }, { "epoch": 0.26, "grad_norm": 0.447265625, "learning_rate": 2.6595667901575455e-05, "loss": 2.1061, "step": 12400 }, { "epoch": 0.26, "grad_norm": 0.458984375, "learning_rate": 2.658912328117863e-05, "loss": 2.1338, "step": 12410 }, { "epoch": 0.26, "grad_norm": 0.423828125, "learning_rate": 2.6582573182896336e-05, "loss": 2.1018, "step": 12420 }, { "epoch": 0.26, "grad_norm": 0.44140625, "learning_rate": 2.657601760982464e-05, "loss": 2.121, "step": 12430 }, { "epoch": 0.26, "grad_norm": 0.4453125, "learning_rate": 2.6569456565062194e-05, "loss": 2.1161, "step": 12440 }, { "epoch": 0.26, "grad_norm": 0.423828125, "learning_rate": 2.656289005171024e-05, "loss": 2.1494, "step": 12450 }, { "epoch": 0.26, "grad_norm": 0.455078125, "learning_rate": 2.6556318072872607e-05, "loss": 2.1486, "step": 12460 }, { "epoch": 0.26, "grad_norm": 0.41796875, "learning_rate": 2.6549740631655695e-05, "loss": 2.1253, "step": 12470 }, { "epoch": 0.26, "grad_norm": 0.42578125, "learning_rate": 2.6543157731168502e-05, "loss": 2.1218, "step": 12480 }, { "epoch": 0.26, "grad_norm": 0.447265625, "learning_rate": 2.6536569374522585e-05, "loss": 2.1214, "step": 12490 }, { "epoch": 0.26, "grad_norm": 0.44140625, "learning_rate": 2.6529975564832113e-05, "loss": 2.0984, "step": 12500 }, { "epoch": 0.26, "eval_accuracy": 0.5575626144886107, "eval_loss": 2.0010862350463867, "eval_runtime": 16.4543, "eval_samples_per_second": 36.161, "eval_steps_per_second": 1.155, "step": 12500 }, { "epoch": 0.26, "grad_norm": 0.4375, "learning_rate": 2.65233763052138e-05, "loss": 2.1521, "step": 12510 }, { "epoch": 0.26, "grad_norm": 0.46875, "learning_rate": 2.6516771598786947e-05, "loss": 2.1079, "step": 12520 }, { "epoch": 0.26, "grad_norm": 0.435546875, "learning_rate": 2.6510161448673443e-05, "loss": 2.1547, "step": 12530 }, { "epoch": 0.26, "grad_norm": 0.462890625, "learning_rate": 2.650354585799773e-05, "loss": 2.1408, "step": 12540 }, { "epoch": 0.26, "grad_norm": 0.4296875, "learning_rate": 2.649692482988683e-05, "loss": 2.1057, "step": 12550 }, { "epoch": 0.26, "grad_norm": 0.447265625, "learning_rate": 2.6490298367470344e-05, "loss": 2.1718, "step": 12560 }, { "epoch": 0.26, "grad_norm": 0.453125, "learning_rate": 2.6483666473880425e-05, "loss": 2.1259, "step": 12570 }, { "epoch": 0.26, "grad_norm": 0.435546875, "learning_rate": 2.6477029152251804e-05, "loss": 2.1342, "step": 12580 }, { "epoch": 0.26, "grad_norm": 0.44140625, "learning_rate": 2.6470386405721777e-05, "loss": 2.1192, "step": 12590 }, { "epoch": 0.26, "grad_norm": 0.447265625, "learning_rate": 2.64637382374302e-05, "loss": 2.0874, "step": 12600 }, { "epoch": 0.26, "grad_norm": 0.439453125, "learning_rate": 2.645708465051949e-05, "loss": 2.1217, "step": 12610 }, { "epoch": 0.26, "grad_norm": 0.55859375, "learning_rate": 2.6450425648134637e-05, "loss": 2.1198, "step": 12620 }, { "epoch": 0.26, "grad_norm": 0.42578125, "learning_rate": 2.644376123342318e-05, "loss": 2.1479, "step": 12630 }, { "epoch": 0.26, "grad_norm": 0.447265625, "learning_rate": 2.643709140953522e-05, "loss": 2.1281, "step": 12640 }, { "epoch": 0.26, "grad_norm": 0.451171875, "learning_rate": 2.643041617962341e-05, "loss": 2.1134, "step": 12650 }, { "epoch": 0.26, "grad_norm": 0.4765625, "learning_rate": 2.6423735546842965e-05, "loss": 2.1187, "step": 12660 }, { "epoch": 0.26, "grad_norm": 0.44140625, "learning_rate": 2.6417049514351655e-05, "loss": 2.1359, "step": 12670 }, { "epoch": 0.26, "grad_norm": 0.42578125, "learning_rate": 2.6410358085309793e-05, "loss": 2.1364, "step": 12680 }, { "epoch": 0.26, "grad_norm": 0.42578125, "learning_rate": 2.640366126288025e-05, "loss": 2.135, "step": 12690 }, { "epoch": 0.26, "grad_norm": 0.443359375, "learning_rate": 2.6396959050228443e-05, "loss": 2.1092, "step": 12700 }, { "epoch": 0.26, "grad_norm": 0.439453125, "learning_rate": 2.6390251450522343e-05, "loss": 2.1422, "step": 12710 }, { "epoch": 0.26, "grad_norm": 0.421875, "learning_rate": 2.638353846693246e-05, "loss": 2.1304, "step": 12720 }, { "epoch": 0.26, "grad_norm": 0.4453125, "learning_rate": 2.637682010263185e-05, "loss": 2.1503, "step": 12730 }, { "epoch": 0.26, "grad_norm": 0.46484375, "learning_rate": 2.6370096360796116e-05, "loss": 2.1212, "step": 12740 }, { "epoch": 0.27, "grad_norm": 0.50390625, "learning_rate": 2.6363367244603402e-05, "loss": 2.1071, "step": 12750 }, { "epoch": 0.27, "grad_norm": 0.43359375, "learning_rate": 2.635663275723439e-05, "loss": 2.1121, "step": 12760 }, { "epoch": 0.27, "grad_norm": 0.447265625, "learning_rate": 2.6349892901872298e-05, "loss": 2.1268, "step": 12770 }, { "epoch": 0.27, "grad_norm": 0.4296875, "learning_rate": 2.6343147681702887e-05, "loss": 2.1267, "step": 12780 }, { "epoch": 0.27, "grad_norm": 0.423828125, "learning_rate": 2.6336397099914454e-05, "loss": 2.1344, "step": 12790 }, { "epoch": 0.27, "grad_norm": 0.451171875, "learning_rate": 2.6329641159697828e-05, "loss": 2.1353, "step": 12800 }, { "epoch": 0.27, "grad_norm": 0.458984375, "learning_rate": 2.6322879864246375e-05, "loss": 2.1403, "step": 12810 }, { "epoch": 0.27, "grad_norm": 0.486328125, "learning_rate": 2.6316113216755978e-05, "loss": 2.102, "step": 12820 }, { "epoch": 0.27, "grad_norm": 0.439453125, "learning_rate": 2.6309341220425066e-05, "loss": 2.1618, "step": 12830 }, { "epoch": 0.27, "grad_norm": 0.43359375, "learning_rate": 2.630256387845459e-05, "loss": 2.1364, "step": 12840 }, { "epoch": 0.27, "grad_norm": 0.57421875, "learning_rate": 2.6295781194048033e-05, "loss": 2.1393, "step": 12850 }, { "epoch": 0.27, "grad_norm": 0.439453125, "learning_rate": 2.628899317041139e-05, "loss": 2.1186, "step": 12860 }, { "epoch": 0.27, "grad_norm": 0.421875, "learning_rate": 2.6282199810753192e-05, "loss": 2.1323, "step": 12870 }, { "epoch": 0.27, "grad_norm": 0.4296875, "learning_rate": 2.6275401118284494e-05, "loss": 2.1235, "step": 12880 }, { "epoch": 0.27, "grad_norm": 0.4296875, "learning_rate": 2.6268597096218853e-05, "loss": 2.1143, "step": 12890 }, { "epoch": 0.27, "grad_norm": 0.4453125, "learning_rate": 2.626178774777237e-05, "loss": 2.1171, "step": 12900 }, { "epoch": 0.27, "grad_norm": 0.4453125, "learning_rate": 2.6254973076163645e-05, "loss": 2.1114, "step": 12910 }, { "epoch": 0.27, "grad_norm": 0.4453125, "learning_rate": 2.6248153084613803e-05, "loss": 2.125, "step": 12920 }, { "epoch": 0.27, "grad_norm": 0.421875, "learning_rate": 2.6241327776346482e-05, "loss": 2.1128, "step": 12930 }, { "epoch": 0.27, "grad_norm": 0.443359375, "learning_rate": 2.6234497154587832e-05, "loss": 2.1439, "step": 12940 }, { "epoch": 0.27, "grad_norm": 0.4296875, "learning_rate": 2.6227661222566516e-05, "loss": 2.1513, "step": 12950 }, { "epoch": 0.27, "grad_norm": 0.427734375, "learning_rate": 2.6220819983513706e-05, "loss": 2.1626, "step": 12960 }, { "epoch": 0.27, "grad_norm": 0.427734375, "learning_rate": 2.6213973440663083e-05, "loss": 2.1124, "step": 12970 }, { "epoch": 0.27, "grad_norm": 0.4453125, "learning_rate": 2.6207121597250835e-05, "loss": 2.1114, "step": 12980 }, { "epoch": 0.27, "grad_norm": 0.4453125, "learning_rate": 2.620026445651565e-05, "loss": 2.105, "step": 12990 }, { "epoch": 0.27, "grad_norm": 0.44140625, "learning_rate": 2.6193402021698732e-05, "loss": 2.1226, "step": 13000 }, { "epoch": 0.27, "eval_accuracy": 0.5575166136835966, "eval_loss": 2.0006489753723145, "eval_runtime": 16.4376, "eval_samples_per_second": 36.198, "eval_steps_per_second": 1.156, "step": 13000 }, { "epoch": 0.27, "grad_norm": 0.458984375, "learning_rate": 2.6186534296043772e-05, "loss": 2.1257, "step": 13010 }, { "epoch": 0.27, "grad_norm": 0.41796875, "learning_rate": 2.617966128279698e-05, "loss": 2.0916, "step": 13020 }, { "epoch": 0.27, "grad_norm": 0.427734375, "learning_rate": 2.617278298520705e-05, "loss": 2.0715, "step": 13030 }, { "epoch": 0.27, "grad_norm": 0.435546875, "learning_rate": 2.616589940652518e-05, "loss": 2.1366, "step": 13040 }, { "epoch": 0.27, "grad_norm": 0.42578125, "learning_rate": 2.6159010550005068e-05, "loss": 2.0964, "step": 13050 }, { "epoch": 0.27, "grad_norm": 0.447265625, "learning_rate": 2.6152116418902892e-05, "loss": 2.0973, "step": 13060 }, { "epoch": 0.27, "grad_norm": 0.46875, "learning_rate": 2.6145217016477344e-05, "loss": 2.1366, "step": 13070 }, { "epoch": 0.27, "grad_norm": 0.46875, "learning_rate": 2.6138312345989594e-05, "loss": 2.1442, "step": 13080 }, { "epoch": 0.27, "grad_norm": 0.44921875, "learning_rate": 2.6131402410703295e-05, "loss": 2.1418, "step": 13090 }, { "epoch": 0.27, "grad_norm": 0.4375, "learning_rate": 2.6124487213884616e-05, "loss": 2.1205, "step": 13100 }, { "epoch": 0.27, "grad_norm": 0.478515625, "learning_rate": 2.6117566758802186e-05, "loss": 2.1157, "step": 13110 }, { "epoch": 0.27, "grad_norm": 0.47265625, "learning_rate": 2.6110641048727125e-05, "loss": 2.1568, "step": 13120 }, { "epoch": 0.27, "grad_norm": 0.4453125, "learning_rate": 2.610371008693305e-05, "loss": 2.1396, "step": 13130 }, { "epoch": 0.27, "grad_norm": 0.46875, "learning_rate": 2.6096773876696045e-05, "loss": 2.1056, "step": 13140 }, { "epoch": 0.27, "grad_norm": 0.435546875, "learning_rate": 2.6089832421294686e-05, "loss": 2.116, "step": 13150 }, { "epoch": 0.27, "grad_norm": 0.470703125, "learning_rate": 2.608288572401002e-05, "loss": 2.1278, "step": 13160 }, { "epoch": 0.27, "grad_norm": 0.423828125, "learning_rate": 2.6075933788125576e-05, "loss": 2.1327, "step": 13170 }, { "epoch": 0.27, "grad_norm": 0.44140625, "learning_rate": 2.6068976616927362e-05, "loss": 2.1458, "step": 13180 }, { "epoch": 0.27, "grad_norm": 0.41796875, "learning_rate": 2.6062014213703848e-05, "loss": 2.1489, "step": 13190 }, { "epoch": 0.27, "grad_norm": 0.447265625, "learning_rate": 2.6055046581745995e-05, "loss": 2.1528, "step": 13200 }, { "epoch": 0.27, "grad_norm": 0.43359375, "learning_rate": 2.604807372434722e-05, "loss": 2.1142, "step": 13210 }, { "epoch": 0.27, "grad_norm": 0.462890625, "learning_rate": 2.604109564480342e-05, "loss": 2.1181, "step": 13220 }, { "epoch": 0.28, "grad_norm": 0.4375, "learning_rate": 2.6034112346412957e-05, "loss": 2.1306, "step": 13230 }, { "epoch": 0.28, "grad_norm": 0.44921875, "learning_rate": 2.602712383247666e-05, "loss": 2.1209, "step": 13240 }, { "epoch": 0.28, "grad_norm": 0.443359375, "learning_rate": 2.6020130106297814e-05, "loss": 2.1028, "step": 13250 }, { "epoch": 0.28, "grad_norm": 0.43359375, "learning_rate": 2.6013131171182185e-05, "loss": 2.0995, "step": 13260 }, { "epoch": 0.28, "grad_norm": 0.45703125, "learning_rate": 2.6006127030437996e-05, "loss": 2.109, "step": 13270 }, { "epoch": 0.28, "grad_norm": 0.45703125, "learning_rate": 2.5999117687375917e-05, "loss": 2.1417, "step": 13280 }, { "epoch": 0.28, "grad_norm": 0.4453125, "learning_rate": 2.5992103145309095e-05, "loss": 2.1016, "step": 13290 }, { "epoch": 0.28, "grad_norm": 0.431640625, "learning_rate": 2.5985083407553124e-05, "loss": 2.0963, "step": 13300 }, { "epoch": 0.28, "grad_norm": 0.470703125, "learning_rate": 2.5978058477426055e-05, "loss": 2.1199, "step": 13310 }, { "epoch": 0.28, "grad_norm": 0.453125, "learning_rate": 2.5971028358248396e-05, "loss": 2.1211, "step": 13320 }, { "epoch": 0.28, "grad_norm": 0.421875, "learning_rate": 2.5963993053343108e-05, "loss": 2.0817, "step": 13330 }, { "epoch": 0.28, "grad_norm": 0.55078125, "learning_rate": 2.5956952566035602e-05, "loss": 2.0825, "step": 13340 }, { "epoch": 0.28, "grad_norm": 0.451171875, "learning_rate": 2.5949906899653734e-05, "loss": 2.0881, "step": 13350 }, { "epoch": 0.28, "grad_norm": 0.4296875, "learning_rate": 2.594285605752782e-05, "loss": 2.1466, "step": 13360 }, { "epoch": 0.28, "grad_norm": 0.439453125, "learning_rate": 2.5935800042990605e-05, "loss": 2.1196, "step": 13370 }, { "epoch": 0.28, "grad_norm": 0.447265625, "learning_rate": 2.5928738859377296e-05, "loss": 2.1781, "step": 13380 }, { "epoch": 0.28, "grad_norm": 0.421875, "learning_rate": 2.592167251002553e-05, "loss": 2.1188, "step": 13390 }, { "epoch": 0.28, "grad_norm": 0.44140625, "learning_rate": 2.59146009982754e-05, "loss": 2.1374, "step": 13400 }, { "epoch": 0.28, "grad_norm": 0.4453125, "learning_rate": 2.5907524327469424e-05, "loss": 2.1316, "step": 13410 }, { "epoch": 0.28, "grad_norm": 0.44921875, "learning_rate": 2.5900442500952566e-05, "loss": 2.1396, "step": 13420 }, { "epoch": 0.28, "grad_norm": 0.421875, "learning_rate": 2.589335552207223e-05, "loss": 2.1401, "step": 13430 }, { "epoch": 0.28, "grad_norm": 0.431640625, "learning_rate": 2.5886263394178246e-05, "loss": 2.1438, "step": 13440 }, { "epoch": 0.28, "grad_norm": 0.43359375, "learning_rate": 2.5879166120622887e-05, "loss": 2.1369, "step": 13450 }, { "epoch": 0.28, "grad_norm": 0.4296875, "learning_rate": 2.5872063704760856e-05, "loss": 2.1011, "step": 13460 }, { "epoch": 0.28, "grad_norm": 0.46875, "learning_rate": 2.586495614994928e-05, "loss": 2.1311, "step": 13470 }, { "epoch": 0.28, "grad_norm": 0.439453125, "learning_rate": 2.585784345954773e-05, "loss": 2.1379, "step": 13480 }, { "epoch": 0.28, "grad_norm": 0.4140625, "learning_rate": 2.5850725636918183e-05, "loss": 2.1109, "step": 13490 }, { "epoch": 0.28, "grad_norm": 0.451171875, "learning_rate": 2.584360268542506e-05, "loss": 2.1054, "step": 13500 }, { "epoch": 0.28, "eval_accuracy": 0.5577351175074136, "eval_loss": 2.000054121017456, "eval_runtime": 16.4541, "eval_samples_per_second": 36.161, "eval_steps_per_second": 1.155, "step": 13500 }, { "epoch": 0.28, "grad_norm": 0.4609375, "learning_rate": 2.5836474608435202e-05, "loss": 2.0967, "step": 13510 }, { "epoch": 0.28, "grad_norm": 0.43359375, "learning_rate": 2.5829341409317866e-05, "loss": 2.1492, "step": 13520 }, { "epoch": 0.28, "grad_norm": 0.421875, "learning_rate": 2.582220309144474e-05, "loss": 2.1162, "step": 13530 }, { "epoch": 0.28, "grad_norm": 0.423828125, "learning_rate": 2.5815059658189924e-05, "loss": 2.0795, "step": 13540 }, { "epoch": 0.28, "grad_norm": 0.490234375, "learning_rate": 2.5807911112929937e-05, "loss": 2.1174, "step": 13550 }, { "epoch": 0.28, "grad_norm": 0.421875, "learning_rate": 2.580075745904371e-05, "loss": 2.1442, "step": 13560 }, { "epoch": 0.28, "grad_norm": 0.447265625, "learning_rate": 2.5793598699912612e-05, "loss": 2.1377, "step": 13570 }, { "epoch": 0.28, "grad_norm": 0.416015625, "learning_rate": 2.5786434838920385e-05, "loss": 2.1311, "step": 13580 }, { "epoch": 0.28, "grad_norm": 0.8515625, "learning_rate": 2.577926587945322e-05, "loss": 2.1132, "step": 13590 }, { "epoch": 0.28, "grad_norm": 0.435546875, "learning_rate": 2.5772091824899697e-05, "loss": 2.1114, "step": 13600 }, { "epoch": 0.28, "grad_norm": 0.439453125, "learning_rate": 2.5764912678650814e-05, "loss": 2.1312, "step": 13610 }, { "epoch": 0.28, "grad_norm": 0.5234375, "learning_rate": 2.5757728444099973e-05, "loss": 2.1173, "step": 13620 }, { "epoch": 0.28, "grad_norm": 0.40625, "learning_rate": 2.5750539124642972e-05, "loss": 2.135, "step": 13630 }, { "epoch": 0.28, "grad_norm": 0.44140625, "learning_rate": 2.5743344723678026e-05, "loss": 2.1151, "step": 13640 }, { "epoch": 0.28, "grad_norm": 0.4453125, "learning_rate": 2.5736145244605746e-05, "loss": 2.125, "step": 13650 }, { "epoch": 0.28, "grad_norm": 0.4375, "learning_rate": 2.572894069082914e-05, "loss": 2.1157, "step": 13660 }, { "epoch": 0.28, "grad_norm": 0.49609375, "learning_rate": 2.5721731065753626e-05, "loss": 2.1441, "step": 13670 }, { "epoch": 0.28, "grad_norm": 0.431640625, "learning_rate": 2.5714516372787003e-05, "loss": 2.1452, "step": 13680 }, { "epoch": 0.28, "grad_norm": 0.443359375, "learning_rate": 2.570729661533948e-05, "loss": 2.1182, "step": 13690 }, { "epoch": 0.28, "grad_norm": 0.43359375, "learning_rate": 2.5700071796823652e-05, "loss": 2.1234, "step": 13700 }, { "epoch": 0.29, "grad_norm": 0.4375, "learning_rate": 2.5692841920654504e-05, "loss": 2.1208, "step": 13710 }, { "epoch": 0.29, "grad_norm": 0.439453125, "learning_rate": 2.5685606990249424e-05, "loss": 2.0994, "step": 13720 }, { "epoch": 0.29, "grad_norm": 0.43359375, "learning_rate": 2.5678367009028173e-05, "loss": 2.1448, "step": 13730 }, { "epoch": 0.29, "grad_norm": 0.423828125, "learning_rate": 2.5671121980412908e-05, "loss": 2.1197, "step": 13740 }, { "epoch": 0.29, "grad_norm": 0.4609375, "learning_rate": 2.5663871907828175e-05, "loss": 2.1325, "step": 13750 }, { "epoch": 0.29, "grad_norm": 0.462890625, "learning_rate": 2.56566167947009e-05, "loss": 2.1228, "step": 13760 }, { "epoch": 0.29, "grad_norm": 0.44921875, "learning_rate": 2.5649356644460385e-05, "loss": 2.1461, "step": 13770 }, { "epoch": 0.29, "grad_norm": 0.455078125, "learning_rate": 2.5642091460538324e-05, "loss": 2.1437, "step": 13780 }, { "epoch": 0.29, "grad_norm": 0.470703125, "learning_rate": 2.5634821246368784e-05, "loss": 2.1147, "step": 13790 }, { "epoch": 0.29, "grad_norm": 0.427734375, "learning_rate": 2.5627546005388218e-05, "loss": 2.1283, "step": 13800 }, { "epoch": 0.29, "grad_norm": 0.490234375, "learning_rate": 2.5620265741035438e-05, "loss": 2.1616, "step": 13810 }, { "epoch": 0.29, "grad_norm": 0.458984375, "learning_rate": 2.5612980456751643e-05, "loss": 2.1316, "step": 13820 }, { "epoch": 0.29, "grad_norm": 0.455078125, "learning_rate": 2.560569015598041e-05, "loss": 2.1151, "step": 13830 }, { "epoch": 0.29, "grad_norm": 0.447265625, "learning_rate": 2.5598394842167674e-05, "loss": 2.0983, "step": 13840 }, { "epoch": 0.29, "grad_norm": 1.0390625, "learning_rate": 2.5591094518761747e-05, "loss": 2.1351, "step": 13850 }, { "epoch": 0.29, "grad_norm": 0.4375, "learning_rate": 2.5583789189213307e-05, "loss": 2.103, "step": 13860 }, { "epoch": 0.29, "grad_norm": 0.4296875, "learning_rate": 2.5576478856975394e-05, "loss": 2.116, "step": 13870 }, { "epoch": 0.29, "grad_norm": 0.443359375, "learning_rate": 2.5569163525503426e-05, "loss": 2.1208, "step": 13880 }, { "epoch": 0.29, "grad_norm": 0.435546875, "learning_rate": 2.556184319825517e-05, "loss": 2.1323, "step": 13890 }, { "epoch": 0.29, "grad_norm": 0.4296875, "learning_rate": 2.555451787869076e-05, "loss": 2.1104, "step": 13900 }, { "epoch": 0.29, "grad_norm": 0.439453125, "learning_rate": 2.5547187570272687e-05, "loss": 2.107, "step": 13910 }, { "epoch": 0.29, "grad_norm": 0.43359375, "learning_rate": 2.5539852276465806e-05, "loss": 2.134, "step": 13920 }, { "epoch": 0.29, "grad_norm": 0.435546875, "learning_rate": 2.5532512000737325e-05, "loss": 2.1666, "step": 13930 }, { "epoch": 0.29, "grad_norm": 0.44921875, "learning_rate": 2.5525166746556802e-05, "loss": 2.1158, "step": 13940 }, { "epoch": 0.29, "grad_norm": 0.443359375, "learning_rate": 2.5517816517396156e-05, "loss": 2.1199, "step": 13950 }, { "epoch": 0.29, "grad_norm": 0.484375, "learning_rate": 2.5510461316729657e-05, "loss": 2.1216, "step": 13960 }, { "epoch": 0.29, "grad_norm": 0.421875, "learning_rate": 2.5503101148033925e-05, "loss": 2.0959, "step": 13970 }, { "epoch": 0.29, "grad_norm": 0.455078125, "learning_rate": 2.5495736014787914e-05, "loss": 2.1081, "step": 13980 }, { "epoch": 0.29, "grad_norm": 0.474609375, "learning_rate": 2.548836592047294e-05, "loss": 2.1296, "step": 13990 }, { "epoch": 0.29, "grad_norm": 0.421875, "learning_rate": 2.548099086857267e-05, "loss": 2.1297, "step": 14000 }, { "epoch": 0.29, "eval_accuracy": 0.5578221904597617, "eval_loss": 1.9996856451034546, "eval_runtime": 16.446, "eval_samples_per_second": 36.179, "eval_steps_per_second": 1.155, "step": 14000 }, { "epoch": 0.29, "grad_norm": 0.439453125, "learning_rate": 2.547361086257309e-05, "loss": 2.1268, "step": 14010 }, { "epoch": 0.29, "grad_norm": 0.439453125, "learning_rate": 2.5466225905962556e-05, "loss": 2.0814, "step": 14020 }, { "epoch": 0.29, "grad_norm": 0.515625, "learning_rate": 2.545883600223174e-05, "loss": 2.1518, "step": 14030 }, { "epoch": 0.29, "grad_norm": 0.443359375, "learning_rate": 2.5451441154873666e-05, "loss": 2.1365, "step": 14040 }, { "epoch": 0.29, "grad_norm": 0.435546875, "learning_rate": 2.5444041367383696e-05, "loss": 2.1052, "step": 14050 }, { "epoch": 0.29, "grad_norm": 0.421875, "learning_rate": 2.5436636643259515e-05, "loss": 2.0966, "step": 14060 }, { "epoch": 0.29, "grad_norm": 0.439453125, "learning_rate": 2.5429226986001152e-05, "loss": 2.1351, "step": 14070 }, { "epoch": 0.29, "grad_norm": 0.423828125, "learning_rate": 2.5421812399110968e-05, "loss": 2.0983, "step": 14080 }, { "epoch": 0.29, "grad_norm": 0.439453125, "learning_rate": 2.5414392886093645e-05, "loss": 2.1336, "step": 14090 }, { "epoch": 0.29, "grad_norm": 0.4296875, "learning_rate": 2.54069684504562e-05, "loss": 2.1183, "step": 14100 }, { "epoch": 0.29, "grad_norm": 0.43359375, "learning_rate": 2.5399539095707977e-05, "loss": 2.1162, "step": 14110 }, { "epoch": 0.29, "grad_norm": 0.451171875, "learning_rate": 2.539210482536065e-05, "loss": 2.1401, "step": 14120 }, { "epoch": 0.29, "grad_norm": 0.51171875, "learning_rate": 2.5384665642928197e-05, "loss": 2.1168, "step": 14130 }, { "epoch": 0.29, "grad_norm": 0.431640625, "learning_rate": 2.5377221551926942e-05, "loss": 2.1489, "step": 14140 }, { "epoch": 0.29, "grad_norm": 0.458984375, "learning_rate": 2.5369772555875518e-05, "loss": 2.1357, "step": 14150 }, { "epoch": 0.29, "grad_norm": 0.4140625, "learning_rate": 2.536231865829487e-05, "loss": 2.0854, "step": 14160 }, { "epoch": 0.29, "grad_norm": 0.423828125, "learning_rate": 2.5354859862708276e-05, "loss": 2.1236, "step": 14170 }, { "epoch": 0.29, "grad_norm": 0.470703125, "learning_rate": 2.5347396172641313e-05, "loss": 2.1173, "step": 14180 }, { "epoch": 0.3, "grad_norm": 0.439453125, "learning_rate": 2.5339927591621887e-05, "loss": 2.1211, "step": 14190 }, { "epoch": 0.3, "grad_norm": 0.439453125, "learning_rate": 2.5332454123180196e-05, "loss": 2.0856, "step": 14200 }, { "epoch": 0.3, "grad_norm": 0.439453125, "learning_rate": 2.5324975770848768e-05, "loss": 2.1184, "step": 14210 }, { "epoch": 0.3, "grad_norm": 0.4375, "learning_rate": 2.5317492538162425e-05, "loss": 2.107, "step": 14220 }, { "epoch": 0.3, "grad_norm": 0.447265625, "learning_rate": 2.5310004428658307e-05, "loss": 2.1646, "step": 14230 }, { "epoch": 0.3, "grad_norm": 0.439453125, "learning_rate": 2.5302511445875857e-05, "loss": 2.1197, "step": 14240 }, { "epoch": 0.3, "grad_norm": 0.431640625, "learning_rate": 2.5295013593356805e-05, "loss": 2.1103, "step": 14250 }, { "epoch": 0.3, "grad_norm": 0.431640625, "learning_rate": 2.5287510874645207e-05, "loss": 2.1076, "step": 14260 }, { "epoch": 0.3, "grad_norm": 0.478515625, "learning_rate": 2.528000329328741e-05, "loss": 2.1217, "step": 14270 }, { "epoch": 0.3, "grad_norm": 0.427734375, "learning_rate": 2.527249085283205e-05, "loss": 2.1151, "step": 14280 }, { "epoch": 0.3, "grad_norm": 0.435546875, "learning_rate": 2.5264973556830077e-05, "loss": 2.1274, "step": 14290 }, { "epoch": 0.3, "grad_norm": 0.416015625, "learning_rate": 2.5257451408834715e-05, "loss": 2.1284, "step": 14300 }, { "epoch": 0.3, "grad_norm": 0.427734375, "learning_rate": 2.5249924412401503e-05, "loss": 2.0981, "step": 14310 }, { "epoch": 0.3, "grad_norm": 0.4921875, "learning_rate": 2.5242392571088257e-05, "loss": 2.1233, "step": 14320 }, { "epoch": 0.3, "grad_norm": 0.427734375, "learning_rate": 2.523485588845509e-05, "loss": 2.1225, "step": 14330 }, { "epoch": 0.3, "grad_norm": 0.4375, "learning_rate": 2.52273143680644e-05, "loss": 2.1173, "step": 14340 }, { "epoch": 0.3, "grad_norm": 0.4296875, "learning_rate": 2.5219768013480873e-05, "loss": 2.1039, "step": 14350 }, { "epoch": 0.3, "grad_norm": 0.42578125, "learning_rate": 2.521221682827148e-05, "loss": 2.1242, "step": 14360 }, { "epoch": 0.3, "grad_norm": 0.484375, "learning_rate": 2.5204660816005473e-05, "loss": 2.1307, "step": 14370 }, { "epoch": 0.3, "grad_norm": 0.419921875, "learning_rate": 2.5197099980254395e-05, "loss": 2.0909, "step": 14380 }, { "epoch": 0.3, "grad_norm": 0.447265625, "learning_rate": 2.5189534324592054e-05, "loss": 2.1161, "step": 14390 }, { "epoch": 0.3, "grad_norm": 0.4140625, "learning_rate": 2.5181963852594548e-05, "loss": 2.0909, "step": 14400 }, { "epoch": 0.3, "grad_norm": 0.4375, "learning_rate": 2.5174388567840247e-05, "loss": 2.1651, "step": 14410 }, { "epoch": 0.3, "grad_norm": 0.474609375, "learning_rate": 2.51668084739098e-05, "loss": 2.1234, "step": 14420 }, { "epoch": 0.3, "grad_norm": 0.4296875, "learning_rate": 2.5159223574386117e-05, "loss": 2.1284, "step": 14430 }, { "epoch": 0.3, "grad_norm": 0.43359375, "learning_rate": 2.5151633872854393e-05, "loss": 2.1327, "step": 14440 }, { "epoch": 0.3, "grad_norm": 0.42578125, "learning_rate": 2.5144039372902093e-05, "loss": 2.1369, "step": 14450 }, { "epoch": 0.3, "grad_norm": 0.48828125, "learning_rate": 2.513644007811894e-05, "loss": 2.1351, "step": 14460 }, { "epoch": 0.3, "grad_norm": 0.458984375, "learning_rate": 2.5128835992096926e-05, "loss": 2.1129, "step": 14470 }, { "epoch": 0.3, "grad_norm": 0.443359375, "learning_rate": 2.5121227118430314e-05, "loss": 2.1323, "step": 14480 }, { "epoch": 0.3, "grad_norm": 0.47265625, "learning_rate": 2.5113613460715625e-05, "loss": 2.1137, "step": 14490 }, { "epoch": 0.3, "grad_norm": 0.431640625, "learning_rate": 2.510599502255164e-05, "loss": 2.1233, "step": 14500 }, { "epoch": 0.3, "eval_accuracy": 0.5581014810616328, "eval_loss": 1.9988162517547607, "eval_runtime": 16.4202, "eval_samples_per_second": 36.236, "eval_steps_per_second": 1.157, "step": 14500 }, { "epoch": 0.3, "grad_norm": 0.4609375, "learning_rate": 2.509837180753941e-05, "loss": 2.1153, "step": 14510 }, { "epoch": 0.3, "grad_norm": 0.42578125, "learning_rate": 2.5090743819282225e-05, "loss": 2.124, "step": 14520 }, { "epoch": 0.3, "grad_norm": 0.486328125, "learning_rate": 2.508311106138565e-05, "loss": 2.1196, "step": 14530 }, { "epoch": 0.3, "grad_norm": 0.431640625, "learning_rate": 2.5075473537457496e-05, "loss": 2.1181, "step": 14540 }, { "epoch": 0.3, "grad_norm": 0.478515625, "learning_rate": 2.5067831251107826e-05, "loss": 2.1276, "step": 14550 }, { "epoch": 0.3, "grad_norm": 0.474609375, "learning_rate": 2.5060184205948953e-05, "loss": 2.1338, "step": 14560 }, { "epoch": 0.3, "grad_norm": 0.4296875, "learning_rate": 2.5052532405595444e-05, "loss": 2.0974, "step": 14570 }, { "epoch": 0.3, "grad_norm": 0.427734375, "learning_rate": 2.504487585366411e-05, "loss": 2.1171, "step": 14580 }, { "epoch": 0.3, "grad_norm": 0.42578125, "learning_rate": 2.5037214553774017e-05, "loss": 2.1265, "step": 14590 }, { "epoch": 0.3, "grad_norm": 0.435546875, "learning_rate": 2.5029548509546452e-05, "loss": 2.1231, "step": 14600 }, { "epoch": 0.3, "grad_norm": 0.423828125, "learning_rate": 2.5021877724604976e-05, "loss": 2.1272, "step": 14610 }, { "epoch": 0.3, "grad_norm": 0.453125, "learning_rate": 2.5014202202575367e-05, "loss": 2.1321, "step": 14620 }, { "epoch": 0.3, "grad_norm": 0.43359375, "learning_rate": 2.5006521947085654e-05, "loss": 2.1254, "step": 14630 }, { "epoch": 0.3, "grad_norm": 0.44921875, "learning_rate": 2.4998836961766098e-05, "loss": 2.1157, "step": 14640 }, { "epoch": 0.3, "grad_norm": 0.421875, "learning_rate": 2.4991147250249193e-05, "loss": 2.1514, "step": 14650 }, { "epoch": 0.3, "grad_norm": 0.4296875, "learning_rate": 2.4983452816169676e-05, "loss": 2.1128, "step": 14660 }, { "epoch": 0.3, "grad_norm": 0.490234375, "learning_rate": 2.497575366316451e-05, "loss": 2.1163, "step": 14670 }, { "epoch": 0.31, "grad_norm": 0.435546875, "learning_rate": 2.4968049794872895e-05, "loss": 2.1011, "step": 14680 }, { "epoch": 0.31, "grad_norm": 0.453125, "learning_rate": 2.4960341214936244e-05, "loss": 2.1491, "step": 14690 }, { "epoch": 0.31, "grad_norm": 0.439453125, "learning_rate": 2.4952627926998214e-05, "loss": 2.1284, "step": 14700 }, { "epoch": 0.31, "grad_norm": 0.439453125, "learning_rate": 2.4944909934704683e-05, "loss": 2.1329, "step": 14710 }, { "epoch": 0.31, "grad_norm": 0.470703125, "learning_rate": 2.4937187241703752e-05, "loss": 2.089, "step": 14720 }, { "epoch": 0.31, "grad_norm": 0.443359375, "learning_rate": 2.4929459851645736e-05, "loss": 2.1106, "step": 14730 }, { "epoch": 0.31, "grad_norm": 0.421875, "learning_rate": 2.4921727768183178e-05, "loss": 2.1323, "step": 14740 }, { "epoch": 0.31, "grad_norm": 0.416015625, "learning_rate": 2.4913990994970838e-05, "loss": 2.1156, "step": 14750 }, { "epoch": 0.31, "grad_norm": 0.423828125, "learning_rate": 2.49062495356657e-05, "loss": 2.1578, "step": 14760 }, { "epoch": 0.31, "grad_norm": 0.427734375, "learning_rate": 2.489850339392695e-05, "loss": 2.1294, "step": 14770 }, { "epoch": 0.31, "grad_norm": 0.4375, "learning_rate": 2.4890752573415997e-05, "loss": 2.1327, "step": 14780 }, { "epoch": 0.31, "grad_norm": 0.443359375, "learning_rate": 2.4882997077796453e-05, "loss": 2.1374, "step": 14790 }, { "epoch": 0.31, "grad_norm": 0.447265625, "learning_rate": 2.4875236910734145e-05, "loss": 2.0998, "step": 14800 }, { "epoch": 0.31, "grad_norm": 0.431640625, "learning_rate": 2.4867472075897112e-05, "loss": 2.1303, "step": 14810 }, { "epoch": 0.31, "grad_norm": 0.44921875, "learning_rate": 2.4859702576955595e-05, "loss": 2.1342, "step": 14820 }, { "epoch": 0.31, "grad_norm": 0.431640625, "learning_rate": 2.4851928417582034e-05, "loss": 2.1316, "step": 14830 }, { "epoch": 0.31, "grad_norm": 0.427734375, "learning_rate": 2.484414960145108e-05, "loss": 2.1225, "step": 14840 }, { "epoch": 0.31, "grad_norm": 0.431640625, "learning_rate": 2.4836366132239585e-05, "loss": 2.1203, "step": 14850 }, { "epoch": 0.31, "grad_norm": 0.439453125, "learning_rate": 2.48285780136266e-05, "loss": 2.1207, "step": 14860 }, { "epoch": 0.31, "grad_norm": 0.4296875, "learning_rate": 2.4820785249293367e-05, "loss": 2.0991, "step": 14870 }, { "epoch": 0.31, "grad_norm": 0.43359375, "learning_rate": 2.481298784292333e-05, "loss": 2.1467, "step": 14880 }, { "epoch": 0.31, "grad_norm": 0.447265625, "learning_rate": 2.4805185798202125e-05, "loss": 2.1217, "step": 14890 }, { "epoch": 0.31, "grad_norm": 0.4765625, "learning_rate": 2.4797379118817577e-05, "loss": 2.1357, "step": 14900 }, { "epoch": 0.31, "grad_norm": 0.43359375, "learning_rate": 2.4789567808459718e-05, "loss": 2.1433, "step": 14910 }, { "epoch": 0.31, "grad_norm": 0.44921875, "learning_rate": 2.4781751870820746e-05, "loss": 2.1387, "step": 14920 }, { "epoch": 0.31, "grad_norm": 0.4296875, "learning_rate": 2.4773931309595064e-05, "loss": 2.1119, "step": 14930 }, { "epoch": 0.31, "grad_norm": 0.4296875, "learning_rate": 2.4766106128479248e-05, "loss": 2.1469, "step": 14940 }, { "epoch": 0.31, "grad_norm": 0.458984375, "learning_rate": 2.4758276331172065e-05, "loss": 2.1553, "step": 14950 }, { "epoch": 0.31, "grad_norm": 0.486328125, "learning_rate": 2.475044192137446e-05, "loss": 2.1087, "step": 14960 }, { "epoch": 0.31, "grad_norm": 0.423828125, "learning_rate": 2.474260290278957e-05, "loss": 2.1025, "step": 14970 }, { "epoch": 0.31, "grad_norm": 0.455078125, "learning_rate": 2.473475927912269e-05, "loss": 2.1313, "step": 14980 }, { "epoch": 0.31, "grad_norm": 0.439453125, "learning_rate": 2.4726911054081304e-05, "loss": 2.1499, "step": 14990 }, { "epoch": 0.31, "grad_norm": 0.46484375, "learning_rate": 2.4719058231375076e-05, "loss": 2.1348, "step": 15000 }, { "epoch": 0.31, "eval_accuracy": 0.5581129812628863, "eval_loss": 1.9984461069107056, "eval_runtime": 16.4389, "eval_samples_per_second": 36.195, "eval_steps_per_second": 1.156, "step": 15000 }, { "epoch": 0.31, "grad_norm": 0.5234375, "learning_rate": 2.471120081471583e-05, "loss": 2.1141, "step": 15010 }, { "epoch": 0.31, "grad_norm": 0.4375, "learning_rate": 2.4703338807817573e-05, "loss": 2.1094, "step": 15020 }, { "epoch": 0.31, "grad_norm": 0.455078125, "learning_rate": 2.469547221439647e-05, "loss": 2.0645, "step": 15030 }, { "epoch": 0.31, "grad_norm": 0.4375, "learning_rate": 2.468760103817087e-05, "loss": 2.0682, "step": 15040 }, { "epoch": 0.31, "grad_norm": 0.458984375, "learning_rate": 2.467972528286127e-05, "loss": 2.1166, "step": 15050 }, { "epoch": 0.31, "grad_norm": 0.412109375, "learning_rate": 2.467184495219035e-05, "loss": 2.1059, "step": 15060 }, { "epoch": 0.31, "grad_norm": 0.421875, "learning_rate": 2.4663960049882933e-05, "loss": 2.1045, "step": 15070 }, { "epoch": 0.31, "grad_norm": 0.435546875, "learning_rate": 2.4656070579666022e-05, "loss": 2.1114, "step": 15080 }, { "epoch": 0.31, "grad_norm": 0.482421875, "learning_rate": 2.464817654526876e-05, "loss": 2.1392, "step": 15090 }, { "epoch": 0.31, "grad_norm": 0.5078125, "learning_rate": 2.4640277950422462e-05, "loss": 2.1055, "step": 15100 }, { "epoch": 0.31, "grad_norm": 0.431640625, "learning_rate": 2.4632374798860598e-05, "loss": 2.1051, "step": 15110 }, { "epoch": 0.31, "grad_norm": 0.55078125, "learning_rate": 2.4624467094318783e-05, "loss": 2.1752, "step": 15120 }, { "epoch": 0.31, "grad_norm": 0.416015625, "learning_rate": 2.4616554840534797e-05, "loss": 2.0669, "step": 15130 }, { "epoch": 0.31, "grad_norm": 0.431640625, "learning_rate": 2.460863804124855e-05, "loss": 2.1496, "step": 15140 }, { "epoch": 0.31, "grad_norm": 0.435546875, "learning_rate": 2.4600716700202114e-05, "loss": 2.1215, "step": 15150 }, { "epoch": 0.32, "grad_norm": 0.427734375, "learning_rate": 2.459279082113972e-05, "loss": 2.0839, "step": 15160 }, { "epoch": 0.32, "grad_norm": 0.455078125, "learning_rate": 2.458486040780772e-05, "loss": 2.1379, "step": 15170 }, { "epoch": 0.32, "grad_norm": 0.43359375, "learning_rate": 2.457692546395462e-05, "loss": 2.1164, "step": 15180 }, { "epoch": 0.32, "grad_norm": 0.44140625, "learning_rate": 2.456898599333107e-05, "loss": 2.1049, "step": 15190 }, { "epoch": 0.32, "grad_norm": 0.43359375, "learning_rate": 2.4561041999689853e-05, "loss": 2.117, "step": 15200 }, { "epoch": 0.32, "grad_norm": 0.4375, "learning_rate": 2.45530934867859e-05, "loss": 2.1007, "step": 15210 }, { "epoch": 0.32, "grad_norm": 0.419921875, "learning_rate": 2.454514045837627e-05, "loss": 2.1464, "step": 15220 }, { "epoch": 0.32, "grad_norm": 0.451171875, "learning_rate": 2.453718291822015e-05, "loss": 2.1302, "step": 15230 }, { "epoch": 0.32, "grad_norm": 0.419921875, "learning_rate": 2.4529220870078884e-05, "loss": 2.1042, "step": 15240 }, { "epoch": 0.32, "grad_norm": 0.451171875, "learning_rate": 2.452125431771592e-05, "loss": 2.1049, "step": 15250 }, { "epoch": 0.32, "grad_norm": 0.490234375, "learning_rate": 2.451328326489684e-05, "loss": 2.0955, "step": 15260 }, { "epoch": 0.32, "grad_norm": 0.46484375, "learning_rate": 2.4505307715389377e-05, "loss": 2.1172, "step": 15270 }, { "epoch": 0.32, "grad_norm": 0.443359375, "learning_rate": 2.449732767296335e-05, "loss": 2.1699, "step": 15280 }, { "epoch": 0.32, "grad_norm": 0.435546875, "learning_rate": 2.4489343141390745e-05, "loss": 2.1195, "step": 15290 }, { "epoch": 0.32, "grad_norm": 0.462890625, "learning_rate": 2.4481354124445633e-05, "loss": 2.1466, "step": 15300 }, { "epoch": 0.32, "grad_norm": 0.43359375, "learning_rate": 2.447336062590422e-05, "loss": 2.1435, "step": 15310 }, { "epoch": 0.32, "grad_norm": 0.46484375, "learning_rate": 2.4465362649544845e-05, "loss": 2.1078, "step": 15320 }, { "epoch": 0.32, "grad_norm": 0.4375, "learning_rate": 2.445736019914794e-05, "loss": 2.1435, "step": 15330 }, { "epoch": 0.32, "grad_norm": 0.64453125, "learning_rate": 2.4449353278496052e-05, "loss": 2.138, "step": 15340 }, { "epoch": 0.32, "grad_norm": 0.44921875, "learning_rate": 2.4441341891373866e-05, "loss": 2.105, "step": 15350 }, { "epoch": 0.32, "grad_norm": 0.451171875, "learning_rate": 2.4433326041568152e-05, "loss": 2.088, "step": 15360 }, { "epoch": 0.32, "grad_norm": 0.46484375, "learning_rate": 2.44253057328678e-05, "loss": 2.1287, "step": 15370 }, { "epoch": 0.32, "grad_norm": 0.427734375, "learning_rate": 2.441728096906381e-05, "loss": 2.1336, "step": 15380 }, { "epoch": 0.32, "grad_norm": 0.447265625, "learning_rate": 2.4409251753949284e-05, "loss": 2.1116, "step": 15390 }, { "epoch": 0.32, "grad_norm": 0.419921875, "learning_rate": 2.4401218091319427e-05, "loss": 2.1251, "step": 15400 }, { "epoch": 0.32, "grad_norm": 0.431640625, "learning_rate": 2.4393179984971556e-05, "loss": 2.1223, "step": 15410 }, { "epoch": 0.32, "grad_norm": 0.439453125, "learning_rate": 2.438513743870507e-05, "loss": 2.1083, "step": 15420 }, { "epoch": 0.32, "grad_norm": 0.439453125, "learning_rate": 2.437709045632148e-05, "loss": 2.1026, "step": 15430 }, { "epoch": 0.32, "grad_norm": 2.25, "learning_rate": 2.4369039041624402e-05, "loss": 2.1047, "step": 15440 }, { "epoch": 0.32, "grad_norm": 0.41796875, "learning_rate": 2.4360983198419514e-05, "loss": 2.1191, "step": 15450 }, { "epoch": 0.32, "grad_norm": 0.4375, "learning_rate": 2.4352922930514633e-05, "loss": 2.1407, "step": 15460 }, { "epoch": 0.32, "grad_norm": 0.4609375, "learning_rate": 2.4344858241719625e-05, "loss": 2.1377, "step": 15470 }, { "epoch": 0.32, "grad_norm": 0.458984375, "learning_rate": 2.433678913584648e-05, "loss": 2.1688, "step": 15480 }, { "epoch": 0.32, "grad_norm": 0.416015625, "learning_rate": 2.432871561670925e-05, "loss": 2.0989, "step": 15490 }, { "epoch": 0.32, "grad_norm": 0.4609375, "learning_rate": 2.4320637688124084e-05, "loss": 2.1494, "step": 15500 }, { "epoch": 0.32, "eval_accuracy": 0.5582411263625685, "eval_loss": 1.9979814291000366, "eval_runtime": 16.4549, "eval_samples_per_second": 36.159, "eval_steps_per_second": 1.155, "step": 15500 }, { "epoch": 0.32, "grad_norm": 0.439453125, "learning_rate": 2.431255535390922e-05, "loss": 2.0885, "step": 15510 }, { "epoch": 0.32, "grad_norm": 0.451171875, "learning_rate": 2.4304468617884965e-05, "loss": 2.1309, "step": 15520 }, { "epoch": 0.32, "grad_norm": 0.4453125, "learning_rate": 2.429637748387372e-05, "loss": 2.1513, "step": 15530 }, { "epoch": 0.32, "grad_norm": 0.447265625, "learning_rate": 2.4288281955699963e-05, "loss": 2.126, "step": 15540 }, { "epoch": 0.32, "grad_norm": 0.447265625, "learning_rate": 2.428018203719024e-05, "loss": 2.1299, "step": 15550 }, { "epoch": 0.32, "grad_norm": 0.43359375, "learning_rate": 2.4272077732173177e-05, "loss": 2.1302, "step": 15560 }, { "epoch": 0.32, "grad_norm": 0.439453125, "learning_rate": 2.4263969044479477e-05, "loss": 2.1572, "step": 15570 }, { "epoch": 0.32, "grad_norm": 0.447265625, "learning_rate": 2.425585597794191e-05, "loss": 2.1321, "step": 15580 }, { "epoch": 0.32, "grad_norm": 0.435546875, "learning_rate": 2.4247738536395315e-05, "loss": 2.1122, "step": 15590 }, { "epoch": 0.32, "grad_norm": 0.416015625, "learning_rate": 2.4239616723676603e-05, "loss": 2.0932, "step": 15600 }, { "epoch": 0.32, "grad_norm": 0.44140625, "learning_rate": 2.4231490543624752e-05, "loss": 2.1365, "step": 15610 }, { "epoch": 0.32, "grad_norm": 0.421875, "learning_rate": 2.4223360000080803e-05, "loss": 2.1153, "step": 15620 }, { "epoch": 0.32, "grad_norm": 0.455078125, "learning_rate": 2.4215225096887853e-05, "loss": 2.1355, "step": 15630 }, { "epoch": 0.33, "grad_norm": 0.451171875, "learning_rate": 2.4207085837891066e-05, "loss": 2.1666, "step": 15640 }, { "epoch": 0.33, "grad_norm": 0.4453125, "learning_rate": 2.4198942226937665e-05, "loss": 2.1014, "step": 15650 }, { "epoch": 0.33, "grad_norm": 0.470703125, "learning_rate": 2.419079426787693e-05, "loss": 2.1422, "step": 15660 }, { "epoch": 0.33, "grad_norm": 0.4375, "learning_rate": 2.41826419645602e-05, "loss": 2.1147, "step": 15670 }, { "epoch": 0.33, "grad_norm": 0.43359375, "learning_rate": 2.417448532084085e-05, "loss": 2.1315, "step": 15680 }, { "epoch": 0.33, "grad_norm": 0.4453125, "learning_rate": 2.4166324340574328e-05, "loss": 2.1433, "step": 15690 }, { "epoch": 0.33, "grad_norm": 0.4140625, "learning_rate": 2.4158159027618126e-05, "loss": 2.1467, "step": 15700 }, { "epoch": 0.33, "grad_norm": 0.427734375, "learning_rate": 2.4149989385831775e-05, "loss": 2.1164, "step": 15710 }, { "epoch": 0.33, "grad_norm": 0.43359375, "learning_rate": 2.4141815419076853e-05, "loss": 2.0902, "step": 15720 }, { "epoch": 0.33, "grad_norm": 0.439453125, "learning_rate": 2.4133637131216994e-05, "loss": 2.116, "step": 15730 }, { "epoch": 0.33, "grad_norm": 0.466796875, "learning_rate": 2.412545452611787e-05, "loss": 2.1461, "step": 15740 }, { "epoch": 0.33, "grad_norm": 0.458984375, "learning_rate": 2.411726760764719e-05, "loss": 2.1146, "step": 15750 }, { "epoch": 0.33, "grad_norm": 0.421875, "learning_rate": 2.4109076379674702e-05, "loss": 2.1436, "step": 15760 }, { "epoch": 0.33, "grad_norm": 0.455078125, "learning_rate": 2.4100880846072192e-05, "loss": 2.0966, "step": 15770 }, { "epoch": 0.33, "grad_norm": 0.419921875, "learning_rate": 2.409268101071348e-05, "loss": 2.1472, "step": 15780 }, { "epoch": 0.33, "grad_norm": 0.43359375, "learning_rate": 2.4084476877474424e-05, "loss": 2.1254, "step": 15790 }, { "epoch": 0.33, "grad_norm": 0.4375, "learning_rate": 2.407626845023291e-05, "loss": 2.1058, "step": 15800 }, { "epoch": 0.33, "grad_norm": 0.451171875, "learning_rate": 2.4068055732868855e-05, "loss": 2.1425, "step": 15810 }, { "epoch": 0.33, "grad_norm": 0.42578125, "learning_rate": 2.4059838729264197e-05, "loss": 2.1306, "step": 15820 }, { "epoch": 0.33, "grad_norm": 0.54296875, "learning_rate": 2.4051617443302904e-05, "loss": 2.0765, "step": 15830 }, { "epoch": 0.33, "grad_norm": 0.439453125, "learning_rate": 2.4043391878870984e-05, "loss": 2.1367, "step": 15840 }, { "epoch": 0.33, "grad_norm": 0.4296875, "learning_rate": 2.4035162039856437e-05, "loss": 2.1235, "step": 15850 }, { "epoch": 0.33, "grad_norm": 0.427734375, "learning_rate": 2.4026927930149318e-05, "loss": 2.1253, "step": 15860 }, { "epoch": 0.33, "grad_norm": 0.427734375, "learning_rate": 2.4018689553641667e-05, "loss": 2.1068, "step": 15870 }, { "epoch": 0.33, "grad_norm": 0.439453125, "learning_rate": 2.4010446914227558e-05, "loss": 2.1235, "step": 15880 }, { "epoch": 0.33, "grad_norm": 0.4140625, "learning_rate": 2.4002200015803082e-05, "loss": 2.094, "step": 15890 }, { "epoch": 0.33, "grad_norm": 0.447265625, "learning_rate": 2.3993948862266342e-05, "loss": 2.101, "step": 15900 }, { "epoch": 0.33, "grad_norm": 0.455078125, "learning_rate": 2.3985693457517444e-05, "loss": 2.1197, "step": 15910 }, { "epoch": 0.33, "grad_norm": 0.44921875, "learning_rate": 2.397743380545851e-05, "loss": 2.1242, "step": 15920 }, { "epoch": 0.33, "grad_norm": 0.45703125, "learning_rate": 2.3969169909993675e-05, "loss": 2.1115, "step": 15930 }, { "epoch": 0.33, "grad_norm": 0.431640625, "learning_rate": 2.396090177502906e-05, "loss": 2.1099, "step": 15940 }, { "epoch": 0.33, "grad_norm": 0.431640625, "learning_rate": 2.3952629404472816e-05, "loss": 2.1553, "step": 15950 }, { "epoch": 0.33, "grad_norm": 0.43359375, "learning_rate": 2.3944352802235075e-05, "loss": 2.1397, "step": 15960 }, { "epoch": 0.33, "grad_norm": 0.43359375, "learning_rate": 2.3936071972227988e-05, "loss": 2.1019, "step": 15970 }, { "epoch": 0.33, "grad_norm": 0.458984375, "learning_rate": 2.392778691836568e-05, "loss": 2.1409, "step": 15980 }, { "epoch": 0.33, "grad_norm": 0.427734375, "learning_rate": 2.3919497644564302e-05, "loss": 2.0796, "step": 15990 }, { "epoch": 0.33, "grad_norm": 0.462890625, "learning_rate": 2.391120415474197e-05, "loss": 2.0827, "step": 16000 }, { "epoch": 0.33, "eval_accuracy": 0.5584021291801178, "eval_loss": 1.9976391792297363, "eval_runtime": 16.4349, "eval_samples_per_second": 36.203, "eval_steps_per_second": 1.156, "step": 16000 }, { "epoch": 0.33, "grad_norm": 0.43359375, "learning_rate": 2.3902906452818814e-05, "loss": 2.1198, "step": 16010 }, { "epoch": 0.33, "grad_norm": 0.4296875, "learning_rate": 2.3894604542716948e-05, "loss": 2.1231, "step": 16020 }, { "epoch": 0.33, "grad_norm": 0.466796875, "learning_rate": 2.3886298428360472e-05, "loss": 2.1199, "step": 16030 }, { "epoch": 0.33, "grad_norm": 0.4140625, "learning_rate": 2.3877988113675472e-05, "loss": 2.1104, "step": 16040 }, { "epoch": 0.33, "grad_norm": 0.447265625, "learning_rate": 2.3869673602590036e-05, "loss": 2.115, "step": 16050 }, { "epoch": 0.33, "grad_norm": 0.48046875, "learning_rate": 2.3861354899034213e-05, "loss": 2.1439, "step": 16060 }, { "epoch": 0.33, "grad_norm": 0.435546875, "learning_rate": 2.385303200694005e-05, "loss": 2.1313, "step": 16070 }, { "epoch": 0.33, "grad_norm": 0.435546875, "learning_rate": 2.3844704930241564e-05, "loss": 2.0941, "step": 16080 }, { "epoch": 0.33, "grad_norm": 0.43359375, "learning_rate": 2.3836373672874758e-05, "loss": 2.147, "step": 16090 }, { "epoch": 0.33, "grad_norm": 0.41015625, "learning_rate": 2.3828038238777592e-05, "loss": 2.1247, "step": 16100 }, { "epoch": 0.33, "grad_norm": 0.65625, "learning_rate": 2.3819698631890034e-05, "loss": 2.1169, "step": 16110 }, { "epoch": 0.34, "grad_norm": 0.427734375, "learning_rate": 2.3811354856153996e-05, "loss": 2.1007, "step": 16120 }, { "epoch": 0.34, "grad_norm": 0.412109375, "learning_rate": 2.3803006915513364e-05, "loss": 2.0962, "step": 16130 }, { "epoch": 0.34, "grad_norm": 0.427734375, "learning_rate": 2.3794654813914015e-05, "loss": 2.0955, "step": 16140 }, { "epoch": 0.34, "grad_norm": 0.41796875, "learning_rate": 2.378629855530376e-05, "loss": 2.107, "step": 16150 }, { "epoch": 0.34, "grad_norm": 0.43359375, "learning_rate": 2.37779381436324e-05, "loss": 2.1207, "step": 16160 }, { "epoch": 0.34, "grad_norm": 0.4453125, "learning_rate": 2.376957358285169e-05, "loss": 2.1022, "step": 16170 }, { "epoch": 0.34, "grad_norm": 0.427734375, "learning_rate": 2.3761204876915338e-05, "loss": 2.1603, "step": 16180 }, { "epoch": 0.34, "grad_norm": 0.46484375, "learning_rate": 2.375283202977903e-05, "loss": 2.1225, "step": 16190 }, { "epoch": 0.34, "grad_norm": 0.4375, "learning_rate": 2.3744455045400393e-05, "loss": 2.1115, "step": 16200 }, { "epoch": 0.34, "grad_norm": 0.421875, "learning_rate": 2.3736073927739016e-05, "loss": 2.1236, "step": 16210 }, { "epoch": 0.34, "grad_norm": 0.44140625, "learning_rate": 2.3727688680756443e-05, "loss": 2.1275, "step": 16220 }, { "epoch": 0.34, "grad_norm": 0.431640625, "learning_rate": 2.3719299308416167e-05, "loss": 2.1374, "step": 16230 }, { "epoch": 0.34, "grad_norm": 0.44140625, "learning_rate": 2.3710905814683635e-05, "loss": 2.1422, "step": 16240 }, { "epoch": 0.34, "grad_norm": 0.44921875, "learning_rate": 2.3702508203526236e-05, "loss": 2.122, "step": 16250 }, { "epoch": 0.34, "grad_norm": 0.470703125, "learning_rate": 2.3694106478913313e-05, "loss": 2.1379, "step": 16260 }, { "epoch": 0.34, "grad_norm": 0.42578125, "learning_rate": 2.3685700644816143e-05, "loss": 2.1167, "step": 16270 }, { "epoch": 0.34, "grad_norm": 0.435546875, "learning_rate": 2.367729070520795e-05, "loss": 2.1103, "step": 16280 }, { "epoch": 0.34, "grad_norm": 0.423828125, "learning_rate": 2.3668876664063908e-05, "loss": 2.1135, "step": 16290 }, { "epoch": 0.34, "grad_norm": 0.4296875, "learning_rate": 2.3660458525361113e-05, "loss": 2.1097, "step": 16300 }, { "epoch": 0.34, "grad_norm": 0.451171875, "learning_rate": 2.3652036293078612e-05, "loss": 2.1381, "step": 16310 }, { "epoch": 0.34, "grad_norm": 0.435546875, "learning_rate": 2.364360997119738e-05, "loss": 2.1099, "step": 16320 }, { "epoch": 0.34, "grad_norm": 0.44921875, "learning_rate": 2.3635179563700324e-05, "loss": 2.1221, "step": 16330 }, { "epoch": 0.34, "grad_norm": 0.435546875, "learning_rate": 2.362674507457228e-05, "loss": 2.0826, "step": 16340 }, { "epoch": 0.34, "grad_norm": 0.5703125, "learning_rate": 2.361830650780003e-05, "loss": 2.1204, "step": 16350 }, { "epoch": 0.34, "grad_norm": 0.4296875, "learning_rate": 2.3609863867372266e-05, "loss": 2.121, "step": 16360 }, { "epoch": 0.34, "grad_norm": 0.462890625, "learning_rate": 2.3601417157279606e-05, "loss": 2.1185, "step": 16370 }, { "epoch": 0.34, "grad_norm": 0.46484375, "learning_rate": 2.35929663815146e-05, "loss": 2.1422, "step": 16380 }, { "epoch": 0.34, "grad_norm": 0.458984375, "learning_rate": 2.3584511544071716e-05, "loss": 2.1114, "step": 16390 }, { "epoch": 0.34, "grad_norm": 0.443359375, "learning_rate": 2.3576052648947338e-05, "loss": 2.1555, "step": 16400 }, { "epoch": 0.34, "grad_norm": 0.439453125, "learning_rate": 2.3567589700139776e-05, "loss": 2.1213, "step": 16410 }, { "epoch": 0.34, "grad_norm": 0.4765625, "learning_rate": 2.3559122701649252e-05, "loss": 2.117, "step": 16420 }, { "epoch": 0.34, "grad_norm": 0.474609375, "learning_rate": 2.35506516574779e-05, "loss": 2.0975, "step": 16430 }, { "epoch": 0.34, "grad_norm": 0.42578125, "learning_rate": 2.354217657162977e-05, "loss": 2.1092, "step": 16440 }, { "epoch": 0.34, "grad_norm": 0.44921875, "learning_rate": 2.3533697448110817e-05, "loss": 2.1054, "step": 16450 }, { "epoch": 0.34, "grad_norm": 0.443359375, "learning_rate": 2.3525214290928917e-05, "loss": 2.0694, "step": 16460 }, { "epoch": 0.34, "grad_norm": 0.421875, "learning_rate": 2.3516727104093836e-05, "loss": 2.1213, "step": 16470 }, { "epoch": 0.34, "grad_norm": 0.43359375, "learning_rate": 2.350823589161725e-05, "loss": 2.1395, "step": 16480 }, { "epoch": 0.34, "grad_norm": 0.447265625, "learning_rate": 2.349974065751275e-05, "loss": 2.1251, "step": 16490 }, { "epoch": 0.34, "grad_norm": 0.447265625, "learning_rate": 2.349124140579581e-05, "loss": 2.0991, "step": 16500 }, { "epoch": 0.34, "eval_accuracy": 0.5582000542152344, "eval_loss": 1.997467041015625, "eval_runtime": 16.4503, "eval_samples_per_second": 36.17, "eval_steps_per_second": 1.155, "step": 16500 }, { "epoch": 0.34, "grad_norm": 0.4296875, "learning_rate": 2.3482738140483815e-05, "loss": 2.1377, "step": 16510 }, { "epoch": 0.34, "grad_norm": 0.435546875, "learning_rate": 2.3474230865596045e-05, "loss": 2.1417, "step": 16520 }, { "epoch": 0.34, "grad_norm": 0.4375, "learning_rate": 2.346571958515367e-05, "loss": 2.1126, "step": 16530 }, { "epoch": 0.34, "grad_norm": 0.458984375, "learning_rate": 2.345720430317976e-05, "loss": 2.0925, "step": 16540 }, { "epoch": 0.34, "grad_norm": 0.466796875, "learning_rate": 2.3448685023699267e-05, "loss": 2.1063, "step": 16550 }, { "epoch": 0.34, "grad_norm": 0.421875, "learning_rate": 2.344016175073905e-05, "loss": 2.0826, "step": 16560 }, { "epoch": 0.34, "grad_norm": 0.439453125, "learning_rate": 2.3431634488327834e-05, "loss": 2.1062, "step": 16570 }, { "epoch": 0.34, "grad_norm": 0.423828125, "learning_rate": 2.342310324049625e-05, "loss": 2.1128, "step": 16580 }, { "epoch": 0.34, "grad_norm": 0.458984375, "learning_rate": 2.3414568011276794e-05, "loss": 2.1029, "step": 16590 }, { "epoch": 0.35, "grad_norm": 0.451171875, "learning_rate": 2.340602880470386e-05, "loss": 2.1128, "step": 16600 }, { "epoch": 0.35, "grad_norm": 0.439453125, "learning_rate": 2.339748562481371e-05, "loss": 2.1209, "step": 16610 }, { "epoch": 0.35, "grad_norm": 0.453125, "learning_rate": 2.3388938475644494e-05, "loss": 2.14, "step": 16620 }, { "epoch": 0.35, "grad_norm": 0.435546875, "learning_rate": 2.3380387361236223e-05, "loss": 2.1274, "step": 16630 }, { "epoch": 0.35, "grad_norm": 0.423828125, "learning_rate": 2.3371832285630805e-05, "loss": 2.0816, "step": 16640 }, { "epoch": 0.35, "grad_norm": 0.416015625, "learning_rate": 2.3363273252872003e-05, "loss": 2.1338, "step": 16650 }, { "epoch": 0.35, "grad_norm": 0.4375, "learning_rate": 2.3354710267005452e-05, "loss": 2.1182, "step": 16660 }, { "epoch": 0.35, "grad_norm": 0.43359375, "learning_rate": 2.334614333207866e-05, "loss": 2.1068, "step": 16670 }, { "epoch": 0.35, "grad_norm": 0.470703125, "learning_rate": 2.3337572452141005e-05, "loss": 2.1285, "step": 16680 }, { "epoch": 0.35, "grad_norm": 0.44921875, "learning_rate": 2.3328997631243726e-05, "loss": 2.1612, "step": 16690 }, { "epoch": 0.35, "grad_norm": 0.470703125, "learning_rate": 2.3320418873439918e-05, "loss": 2.1015, "step": 16700 }, { "epoch": 0.35, "grad_norm": 0.443359375, "learning_rate": 2.3311836182784544e-05, "loss": 2.1444, "step": 16710 }, { "epoch": 0.35, "grad_norm": 0.439453125, "learning_rate": 2.3303249563334425e-05, "loss": 2.0945, "step": 16720 }, { "epoch": 0.35, "grad_norm": 0.494140625, "learning_rate": 2.329465901914824e-05, "loss": 2.1494, "step": 16730 }, { "epoch": 0.35, "grad_norm": 0.4609375, "learning_rate": 2.3286064554286525e-05, "loss": 2.1233, "step": 16740 }, { "epoch": 0.35, "grad_norm": 0.498046875, "learning_rate": 2.327746617281166e-05, "loss": 2.1219, "step": 16750 }, { "epoch": 0.35, "grad_norm": 0.451171875, "learning_rate": 2.3268863878787886e-05, "loss": 2.1231, "step": 16760 }, { "epoch": 0.35, "grad_norm": 0.42578125, "learning_rate": 2.326025767628129e-05, "loss": 2.1019, "step": 16770 }, { "epoch": 0.35, "grad_norm": 0.43359375, "learning_rate": 2.3251647569359804e-05, "loss": 2.1066, "step": 16780 }, { "epoch": 0.35, "grad_norm": 0.423828125, "learning_rate": 2.324303356209321e-05, "loss": 2.1257, "step": 16790 }, { "epoch": 0.35, "grad_norm": 0.46484375, "learning_rate": 2.323441565855313e-05, "loss": 2.1128, "step": 16800 }, { "epoch": 0.35, "grad_norm": 0.419921875, "learning_rate": 2.3225793862813033e-05, "loss": 2.1295, "step": 16810 }, { "epoch": 0.35, "grad_norm": 0.431640625, "learning_rate": 2.3217168178948217e-05, "loss": 2.0857, "step": 16820 }, { "epoch": 0.35, "grad_norm": 0.435546875, "learning_rate": 2.3208538611035826e-05, "loss": 2.1211, "step": 16830 }, { "epoch": 0.35, "grad_norm": 0.455078125, "learning_rate": 2.3199905163154848e-05, "loss": 2.1346, "step": 16840 }, { "epoch": 0.35, "grad_norm": 0.431640625, "learning_rate": 2.3191267839386082e-05, "loss": 2.115, "step": 16850 }, { "epoch": 0.35, "grad_norm": 0.451171875, "learning_rate": 2.318262664381218e-05, "loss": 2.1395, "step": 16860 }, { "epoch": 0.35, "grad_norm": 0.43359375, "learning_rate": 2.3173981580517615e-05, "loss": 2.1148, "step": 16870 }, { "epoch": 0.35, "grad_norm": 0.443359375, "learning_rate": 2.3165332653588692e-05, "loss": 2.1701, "step": 16880 }, { "epoch": 0.35, "grad_norm": 0.44140625, "learning_rate": 2.3156679867113534e-05, "loss": 2.1025, "step": 16890 }, { "epoch": 0.35, "grad_norm": 0.4375, "learning_rate": 2.3148023225182102e-05, "loss": 2.1646, "step": 16900 }, { "epoch": 0.35, "grad_norm": 0.443359375, "learning_rate": 2.3139362731886164e-05, "loss": 2.1109, "step": 16910 }, { "epoch": 0.35, "grad_norm": 0.439453125, "learning_rate": 2.3130698391319328e-05, "loss": 2.0948, "step": 16920 }, { "epoch": 0.35, "grad_norm": 0.44140625, "learning_rate": 2.3122030207576993e-05, "loss": 2.1279, "step": 16930 }, { "epoch": 0.35, "grad_norm": 0.443359375, "learning_rate": 2.31133581847564e-05, "loss": 2.1231, "step": 16940 }, { "epoch": 0.35, "grad_norm": 0.4375, "learning_rate": 2.3104682326956598e-05, "loss": 2.1146, "step": 16950 }, { "epoch": 0.35, "grad_norm": 0.44140625, "learning_rate": 2.309600263827843e-05, "loss": 2.1351, "step": 16960 }, { "epoch": 0.35, "grad_norm": 0.447265625, "learning_rate": 2.3087319122824582e-05, "loss": 2.124, "step": 16970 }, { "epoch": 0.35, "grad_norm": 0.51171875, "learning_rate": 2.3078631784699526e-05, "loss": 2.1753, "step": 16980 }, { "epoch": 0.35, "grad_norm": 0.7265625, "learning_rate": 2.3069940628009543e-05, "loss": 2.1136, "step": 16990 }, { "epoch": 0.35, "grad_norm": 0.443359375, "learning_rate": 2.306124565686273e-05, "loss": 2.1108, "step": 17000 }, { "epoch": 0.35, "eval_accuracy": 0.5581786966986207, "eval_loss": 1.997156023979187, "eval_runtime": 16.4385, "eval_samples_per_second": 36.196, "eval_steps_per_second": 1.156, "step": 17000 }, { "epoch": 0.35, "grad_norm": 0.4453125, "learning_rate": 2.305254687536898e-05, "loss": 2.1053, "step": 17010 }, { "epoch": 0.35, "grad_norm": 0.439453125, "learning_rate": 2.304384428763998e-05, "loss": 2.0957, "step": 17020 }, { "epoch": 0.35, "grad_norm": 0.4375, "learning_rate": 2.303513789778923e-05, "loss": 2.1267, "step": 17030 }, { "epoch": 0.35, "grad_norm": 0.458984375, "learning_rate": 2.3026427709932016e-05, "loss": 2.0924, "step": 17040 }, { "epoch": 0.35, "grad_norm": 0.447265625, "learning_rate": 2.3017713728185416e-05, "loss": 2.1457, "step": 17050 }, { "epoch": 0.35, "grad_norm": 0.43359375, "learning_rate": 2.3008995956668327e-05, "loss": 2.1346, "step": 17060 }, { "epoch": 0.35, "grad_norm": 0.453125, "learning_rate": 2.300027439950141e-05, "loss": 2.1404, "step": 17070 }, { "epoch": 0.36, "grad_norm": 0.455078125, "learning_rate": 2.299154906080712e-05, "loss": 2.107, "step": 17080 }, { "epoch": 0.36, "grad_norm": 0.4453125, "learning_rate": 2.298281994470971e-05, "loss": 2.1133, "step": 17090 }, { "epoch": 0.36, "grad_norm": 0.4375, "learning_rate": 2.2974087055335204e-05, "loss": 2.1116, "step": 17100 }, { "epoch": 0.36, "grad_norm": 0.447265625, "learning_rate": 2.296535039681143e-05, "loss": 2.1429, "step": 17110 }, { "epoch": 0.36, "grad_norm": 0.421875, "learning_rate": 2.2956609973267983e-05, "loss": 2.1368, "step": 17120 }, { "epoch": 0.36, "grad_norm": 0.439453125, "learning_rate": 2.294786578883623e-05, "loss": 2.1275, "step": 17130 }, { "epoch": 0.36, "grad_norm": 0.45703125, "learning_rate": 2.293911784764934e-05, "loss": 2.1393, "step": 17140 }, { "epoch": 0.36, "grad_norm": 0.42578125, "learning_rate": 2.2930366153842234e-05, "loss": 2.1127, "step": 17150 }, { "epoch": 0.36, "grad_norm": 0.46484375, "learning_rate": 2.2921610711551618e-05, "loss": 2.1061, "step": 17160 }, { "epoch": 0.36, "grad_norm": 0.451171875, "learning_rate": 2.2912851524915968e-05, "loss": 2.1509, "step": 17170 }, { "epoch": 0.36, "grad_norm": 0.427734375, "learning_rate": 2.2904088598075528e-05, "loss": 2.1013, "step": 17180 }, { "epoch": 0.36, "grad_norm": 0.43359375, "learning_rate": 2.2895321935172317e-05, "loss": 2.1362, "step": 17190 }, { "epoch": 0.36, "grad_norm": 0.44921875, "learning_rate": 2.2886551540350115e-05, "loss": 2.1365, "step": 17200 }, { "epoch": 0.36, "grad_norm": 0.419921875, "learning_rate": 2.287777741775446e-05, "loss": 2.1467, "step": 17210 }, { "epoch": 0.36, "grad_norm": 0.44140625, "learning_rate": 2.2868999571532664e-05, "loss": 2.0935, "step": 17220 }, { "epoch": 0.36, "grad_norm": 0.451171875, "learning_rate": 2.2860218005833788e-05, "loss": 2.1297, "step": 17230 }, { "epoch": 0.36, "grad_norm": 0.4375, "learning_rate": 2.285143272480866e-05, "loss": 2.144, "step": 17240 }, { "epoch": 0.36, "grad_norm": 0.431640625, "learning_rate": 2.2842643732609856e-05, "loss": 2.147, "step": 17250 }, { "epoch": 0.36, "grad_norm": 0.4296875, "learning_rate": 2.2833851033391714e-05, "loss": 2.1434, "step": 17260 }, { "epoch": 0.36, "grad_norm": 0.443359375, "learning_rate": 2.282505463131032e-05, "loss": 2.1556, "step": 17270 }, { "epoch": 0.36, "grad_norm": 0.435546875, "learning_rate": 2.281625453052351e-05, "loss": 2.0865, "step": 17280 }, { "epoch": 0.36, "grad_norm": 0.458984375, "learning_rate": 2.2807450735190864e-05, "loss": 2.1126, "step": 17290 }, { "epoch": 0.36, "grad_norm": 0.55859375, "learning_rate": 2.2798643249473727e-05, "loss": 2.13, "step": 17300 }, { "epoch": 0.36, "grad_norm": 0.458984375, "learning_rate": 2.278983207753516e-05, "loss": 2.0904, "step": 17310 }, { "epoch": 0.36, "grad_norm": 0.41796875, "learning_rate": 2.2781017223539988e-05, "loss": 2.1025, "step": 17320 }, { "epoch": 0.36, "grad_norm": 0.4296875, "learning_rate": 2.2772198691654768e-05, "loss": 2.1126, "step": 17330 }, { "epoch": 0.36, "grad_norm": 0.431640625, "learning_rate": 2.27633764860478e-05, "loss": 2.1331, "step": 17340 }, { "epoch": 0.36, "grad_norm": 0.439453125, "learning_rate": 2.2754550610889117e-05, "loss": 2.1165, "step": 17350 }, { "epoch": 0.36, "grad_norm": 0.451171875, "learning_rate": 2.274572107035048e-05, "loss": 2.14, "step": 17360 }, { "epoch": 0.36, "grad_norm": 0.4296875, "learning_rate": 2.2736887868605396e-05, "loss": 2.0895, "step": 17370 }, { "epoch": 0.36, "grad_norm": 0.4375, "learning_rate": 2.2728051009829095e-05, "loss": 2.1234, "step": 17380 }, { "epoch": 0.36, "grad_norm": 0.453125, "learning_rate": 2.2719210498198537e-05, "loss": 2.1233, "step": 17390 }, { "epoch": 0.36, "grad_norm": 0.4296875, "learning_rate": 2.271036633789241e-05, "loss": 2.1033, "step": 17400 }, { "epoch": 0.36, "grad_norm": 0.6875, "learning_rate": 2.270151853309112e-05, "loss": 2.1471, "step": 17410 }, { "epoch": 0.36, "grad_norm": 0.46484375, "learning_rate": 2.2692667087976803e-05, "loss": 2.1319, "step": 17420 }, { "epoch": 0.36, "grad_norm": 0.439453125, "learning_rate": 2.2683812006733313e-05, "loss": 2.091, "step": 17430 }, { "epoch": 0.36, "grad_norm": 0.41796875, "learning_rate": 2.2674953293546224e-05, "loss": 2.0865, "step": 17440 }, { "epoch": 0.36, "grad_norm": 0.453125, "learning_rate": 2.2666090952602826e-05, "loss": 2.1314, "step": 17450 }, { "epoch": 0.36, "grad_norm": 0.451171875, "learning_rate": 2.2657224988092122e-05, "loss": 2.1015, "step": 17460 }, { "epoch": 0.36, "grad_norm": 0.44140625, "learning_rate": 2.264835540420483e-05, "loss": 2.1318, "step": 17470 }, { "epoch": 0.36, "grad_norm": 0.44140625, "learning_rate": 2.2639482205133375e-05, "loss": 2.1215, "step": 17480 }, { "epoch": 0.36, "grad_norm": 0.451171875, "learning_rate": 2.2630605395071894e-05, "loss": 2.1075, "step": 17490 }, { "epoch": 0.36, "grad_norm": 0.421875, "learning_rate": 2.262172497821624e-05, "loss": 2.1209, "step": 17500 }, { "epoch": 0.36, "eval_accuracy": 0.5582739840804357, "eval_loss": 1.9968290328979492, "eval_runtime": 16.4403, "eval_samples_per_second": 36.192, "eval_steps_per_second": 1.156, "step": 17500 }, { "epoch": 0.36, "grad_norm": 0.435546875, "learning_rate": 2.2612840958763946e-05, "loss": 2.1393, "step": 17510 }, { "epoch": 0.36, "grad_norm": 0.42578125, "learning_rate": 2.2603953340914282e-05, "loss": 2.1351, "step": 17520 }, { "epoch": 0.36, "grad_norm": 0.439453125, "learning_rate": 2.2595062128868185e-05, "loss": 2.1133, "step": 17530 }, { "epoch": 0.36, "grad_norm": 0.4296875, "learning_rate": 2.258616732682831e-05, "loss": 2.1398, "step": 17540 }, { "epoch": 0.36, "grad_norm": 0.55859375, "learning_rate": 2.2577268938999008e-05, "loss": 2.122, "step": 17550 }, { "epoch": 0.37, "grad_norm": 0.43359375, "learning_rate": 2.2568366969586315e-05, "loss": 2.0974, "step": 17560 }, { "epoch": 0.37, "grad_norm": 0.478515625, "learning_rate": 2.255946142279798e-05, "loss": 2.1085, "step": 17570 }, { "epoch": 0.37, "grad_norm": 0.431640625, "learning_rate": 2.2550552302843416e-05, "loss": 2.1024, "step": 17580 }, { "epoch": 0.37, "grad_norm": 0.44140625, "learning_rate": 2.254163961393374e-05, "loss": 2.1257, "step": 17590 }, { "epoch": 0.37, "grad_norm": 0.44921875, "learning_rate": 2.2532723360281762e-05, "loss": 2.1274, "step": 17600 }, { "epoch": 0.37, "grad_norm": 0.4375, "learning_rate": 2.252380354610197e-05, "loss": 2.1184, "step": 17610 }, { "epoch": 0.37, "grad_norm": 0.5703125, "learning_rate": 2.2514880175610522e-05, "loss": 2.1198, "step": 17620 }, { "epoch": 0.37, "grad_norm": 0.45703125, "learning_rate": 2.2505953253025285e-05, "loss": 2.1069, "step": 17630 }, { "epoch": 0.37, "grad_norm": 0.45703125, "learning_rate": 2.2497022782565783e-05, "loss": 2.1095, "step": 17640 }, { "epoch": 0.37, "grad_norm": 0.453125, "learning_rate": 2.248808876845322e-05, "loss": 2.1438, "step": 17650 }, { "epoch": 0.37, "grad_norm": 0.43359375, "learning_rate": 2.2479151214910483e-05, "loss": 2.1282, "step": 17660 }, { "epoch": 0.37, "grad_norm": 0.4453125, "learning_rate": 2.2470210126162126e-05, "loss": 2.1368, "step": 17670 }, { "epoch": 0.37, "grad_norm": 0.447265625, "learning_rate": 2.246126550643438e-05, "loss": 2.1384, "step": 17680 }, { "epoch": 0.37, "grad_norm": 0.41796875, "learning_rate": 2.2452317359955133e-05, "loss": 2.1358, "step": 17690 }, { "epoch": 0.37, "grad_norm": 0.42578125, "learning_rate": 2.244336569095395e-05, "loss": 2.1071, "step": 17700 }, { "epoch": 0.37, "grad_norm": 0.42578125, "learning_rate": 2.2434410503662064e-05, "loss": 2.1211, "step": 17710 }, { "epoch": 0.37, "grad_norm": 0.490234375, "learning_rate": 2.242545180231236e-05, "loss": 2.1031, "step": 17720 }, { "epoch": 0.37, "grad_norm": 0.427734375, "learning_rate": 2.2416489591139382e-05, "loss": 2.1204, "step": 17730 }, { "epoch": 0.37, "grad_norm": 0.453125, "learning_rate": 2.2407523874379357e-05, "loss": 2.1133, "step": 17740 }, { "epoch": 0.37, "grad_norm": 0.423828125, "learning_rate": 2.239855465627014e-05, "loss": 2.0785, "step": 17750 }, { "epoch": 0.37, "grad_norm": 0.4296875, "learning_rate": 2.2389581941051264e-05, "loss": 2.1482, "step": 17760 }, { "epoch": 0.37, "grad_norm": 0.4296875, "learning_rate": 2.23806057329639e-05, "loss": 2.1116, "step": 17770 }, { "epoch": 0.37, "grad_norm": 0.435546875, "learning_rate": 2.2371626036250868e-05, "loss": 2.1437, "step": 17780 }, { "epoch": 0.37, "grad_norm": 0.439453125, "learning_rate": 2.236264285515665e-05, "loss": 2.1247, "step": 17790 }, { "epoch": 0.37, "grad_norm": 0.4296875, "learning_rate": 2.235365619392737e-05, "loss": 2.0982, "step": 17800 }, { "epoch": 0.37, "grad_norm": 0.431640625, "learning_rate": 2.2344666056810788e-05, "loss": 2.1155, "step": 17810 }, { "epoch": 0.37, "grad_norm": 0.4375, "learning_rate": 2.2335672448056323e-05, "loss": 2.1112, "step": 17820 }, { "epoch": 0.37, "grad_norm": 0.42578125, "learning_rate": 2.232667537191502e-05, "loss": 2.11, "step": 17830 }, { "epoch": 0.37, "grad_norm": 0.474609375, "learning_rate": 2.2317674832639574e-05, "loss": 2.085, "step": 17840 }, { "epoch": 0.37, "grad_norm": 0.435546875, "learning_rate": 2.230867083448431e-05, "loss": 2.1234, "step": 17850 }, { "epoch": 0.37, "grad_norm": 0.41796875, "learning_rate": 2.2299663381705187e-05, "loss": 2.1444, "step": 17860 }, { "epoch": 0.37, "grad_norm": 0.4453125, "learning_rate": 2.2290652478559807e-05, "loss": 2.0778, "step": 17870 }, { "epoch": 0.37, "grad_norm": 0.4453125, "learning_rate": 2.2281638129307393e-05, "loss": 2.1029, "step": 17880 }, { "epoch": 0.37, "grad_norm": 0.44140625, "learning_rate": 2.2272620338208796e-05, "loss": 2.1198, "step": 17890 }, { "epoch": 0.37, "grad_norm": 0.44140625, "learning_rate": 2.2263599109526505e-05, "loss": 2.126, "step": 17900 }, { "epoch": 0.37, "grad_norm": 0.447265625, "learning_rate": 2.2254574447524624e-05, "loss": 2.0921, "step": 17910 }, { "epoch": 0.37, "grad_norm": 0.4453125, "learning_rate": 2.224554635646888e-05, "loss": 2.1375, "step": 17920 }, { "epoch": 0.37, "grad_norm": 0.4296875, "learning_rate": 2.2236514840626628e-05, "loss": 2.1375, "step": 17930 }, { "epoch": 0.37, "grad_norm": 0.4296875, "learning_rate": 2.2227479904266827e-05, "loss": 2.1237, "step": 17940 }, { "epoch": 0.37, "grad_norm": 0.46484375, "learning_rate": 2.2218441551660073e-05, "loss": 2.1077, "step": 17950 }, { "epoch": 0.37, "grad_norm": 0.439453125, "learning_rate": 2.2209399787078565e-05, "loss": 2.1325, "step": 17960 }, { "epoch": 0.37, "grad_norm": 0.4375, "learning_rate": 2.220035461479611e-05, "loss": 2.137, "step": 17970 }, { "epoch": 0.37, "grad_norm": 0.451171875, "learning_rate": 2.219130603908814e-05, "loss": 2.1194, "step": 17980 }, { "epoch": 0.37, "grad_norm": 0.423828125, "learning_rate": 2.2182254064231688e-05, "loss": 2.0936, "step": 17990 }, { "epoch": 0.37, "grad_norm": 0.423828125, "learning_rate": 2.2173198694505384e-05, "loss": 2.1012, "step": 18000 }, { "epoch": 0.37, "eval_accuracy": 0.5583774858917174, "eval_loss": 1.9962693452835083, "eval_runtime": 16.4418, "eval_samples_per_second": 36.188, "eval_steps_per_second": 1.156, "step": 18000 }, { "epoch": 0.37, "grad_norm": 0.419921875, "learning_rate": 2.2164139934189483e-05, "loss": 2.1205, "step": 18010 }, { "epoch": 0.37, "grad_norm": 0.427734375, "learning_rate": 2.2155077787565828e-05, "loss": 2.143, "step": 18020 }, { "epoch": 0.37, "grad_norm": 0.447265625, "learning_rate": 2.2146012258917867e-05, "loss": 2.1182, "step": 18030 }, { "epoch": 0.38, "grad_norm": 0.435546875, "learning_rate": 2.2136943352530643e-05, "loss": 2.1035, "step": 18040 }, { "epoch": 0.38, "grad_norm": 0.4296875, "learning_rate": 2.2127871072690798e-05, "loss": 2.1078, "step": 18050 }, { "epoch": 0.38, "grad_norm": 0.44140625, "learning_rate": 2.2118795423686577e-05, "loss": 2.1071, "step": 18060 }, { "epoch": 0.38, "grad_norm": 0.4296875, "learning_rate": 2.2109716409807804e-05, "loss": 2.1282, "step": 18070 }, { "epoch": 0.38, "grad_norm": 0.466796875, "learning_rate": 2.2100634035345898e-05, "loss": 2.077, "step": 18080 }, { "epoch": 0.38, "grad_norm": 0.4765625, "learning_rate": 2.2091548304593873e-05, "loss": 2.1142, "step": 18090 }, { "epoch": 0.38, "grad_norm": 0.45703125, "learning_rate": 2.208245922184632e-05, "loss": 2.0945, "step": 18100 }, { "epoch": 0.38, "grad_norm": 0.43359375, "learning_rate": 2.2073366791399415e-05, "loss": 2.1391, "step": 18110 }, { "epoch": 0.38, "grad_norm": 0.4296875, "learning_rate": 2.206427101755093e-05, "loss": 2.1022, "step": 18120 }, { "epoch": 0.38, "grad_norm": 0.47265625, "learning_rate": 2.2055171904600202e-05, "loss": 2.1278, "step": 18130 }, { "epoch": 0.38, "grad_norm": 0.498046875, "learning_rate": 2.2046069456848146e-05, "loss": 2.1298, "step": 18140 }, { "epoch": 0.38, "grad_norm": 0.4453125, "learning_rate": 2.2036963678597266e-05, "loss": 2.1101, "step": 18150 }, { "epoch": 0.38, "grad_norm": 0.44140625, "learning_rate": 2.2027854574151634e-05, "loss": 2.1756, "step": 18160 }, { "epoch": 0.38, "grad_norm": 0.4453125, "learning_rate": 2.201874214781689e-05, "loss": 2.1157, "step": 18170 }, { "epoch": 0.38, "grad_norm": 0.50390625, "learning_rate": 2.200962640390025e-05, "loss": 2.1264, "step": 18180 }, { "epoch": 0.38, "grad_norm": 0.43359375, "learning_rate": 2.2000507346710487e-05, "loss": 2.1134, "step": 18190 }, { "epoch": 0.38, "grad_norm": 0.5078125, "learning_rate": 2.1991384980557958e-05, "loss": 2.1098, "step": 18200 }, { "epoch": 0.38, "grad_norm": 0.4296875, "learning_rate": 2.1982259309754574e-05, "loss": 2.1148, "step": 18210 }, { "epoch": 0.38, "grad_norm": 0.431640625, "learning_rate": 2.1973130338613804e-05, "loss": 2.1257, "step": 18220 }, { "epoch": 0.38, "grad_norm": 0.443359375, "learning_rate": 2.1963998071450685e-05, "loss": 2.1218, "step": 18230 }, { "epoch": 0.38, "grad_norm": 0.421875, "learning_rate": 2.1954862512581807e-05, "loss": 2.1428, "step": 18240 }, { "epoch": 0.38, "grad_norm": 0.439453125, "learning_rate": 2.1945723666325324e-05, "loss": 2.1209, "step": 18250 }, { "epoch": 0.38, "grad_norm": 0.49609375, "learning_rate": 2.193658153700093e-05, "loss": 2.1002, "step": 18260 }, { "epoch": 0.38, "grad_norm": 0.443359375, "learning_rate": 2.1927436128929875e-05, "loss": 2.1369, "step": 18270 }, { "epoch": 0.38, "grad_norm": 0.4453125, "learning_rate": 2.1918287446434974e-05, "loss": 2.1262, "step": 18280 }, { "epoch": 0.38, "grad_norm": 0.423828125, "learning_rate": 2.1909135493840573e-05, "loss": 2.0885, "step": 18290 }, { "epoch": 0.38, "grad_norm": 0.4453125, "learning_rate": 2.1899980275472562e-05, "loss": 2.1326, "step": 18300 }, { "epoch": 0.38, "grad_norm": 0.46484375, "learning_rate": 2.189082179565839e-05, "loss": 2.1098, "step": 18310 }, { "epoch": 0.38, "grad_norm": 0.455078125, "learning_rate": 2.188166005872704e-05, "loss": 2.1133, "step": 18320 }, { "epoch": 0.38, "grad_norm": 0.4375, "learning_rate": 2.187249506900902e-05, "loss": 2.1311, "step": 18330 }, { "epoch": 0.38, "grad_norm": 0.470703125, "learning_rate": 2.1863326830836396e-05, "loss": 2.1083, "step": 18340 }, { "epoch": 0.38, "grad_norm": 0.431640625, "learning_rate": 2.1854155348542764e-05, "loss": 2.0964, "step": 18350 }, { "epoch": 0.38, "grad_norm": 0.451171875, "learning_rate": 2.1844980626463254e-05, "loss": 2.1133, "step": 18360 }, { "epoch": 0.38, "grad_norm": 0.48046875, "learning_rate": 2.1835802668934527e-05, "loss": 2.13, "step": 18370 }, { "epoch": 0.38, "grad_norm": 0.4296875, "learning_rate": 2.182662148029476e-05, "loss": 2.1313, "step": 18380 }, { "epoch": 0.38, "grad_norm": 0.44921875, "learning_rate": 2.1817437064883673e-05, "loss": 2.1062, "step": 18390 }, { "epoch": 0.38, "grad_norm": 0.427734375, "learning_rate": 2.1808249427042516e-05, "loss": 2.0795, "step": 18400 }, { "epoch": 0.38, "grad_norm": 0.4296875, "learning_rate": 2.1799058571114046e-05, "loss": 2.103, "step": 18410 }, { "epoch": 0.38, "grad_norm": 0.421875, "learning_rate": 2.1789864501442547e-05, "loss": 2.1372, "step": 18420 }, { "epoch": 0.38, "grad_norm": 0.435546875, "learning_rate": 2.1780667222373826e-05, "loss": 2.1031, "step": 18430 }, { "epoch": 0.38, "grad_norm": 0.453125, "learning_rate": 2.1771466738255198e-05, "loss": 2.1161, "step": 18440 }, { "epoch": 0.38, "grad_norm": 0.5, "learning_rate": 2.176226305343551e-05, "loss": 2.128, "step": 18450 }, { "epoch": 0.38, "grad_norm": 0.443359375, "learning_rate": 2.17530561722651e-05, "loss": 2.1048, "step": 18460 }, { "epoch": 0.38, "grad_norm": 0.435546875, "learning_rate": 2.1743846099095837e-05, "loss": 2.092, "step": 18470 }, { "epoch": 0.38, "grad_norm": 0.423828125, "learning_rate": 2.1734632838281082e-05, "loss": 2.1186, "step": 18480 }, { "epoch": 0.38, "grad_norm": 0.42578125, "learning_rate": 2.1725416394175714e-05, "loss": 2.1216, "step": 18490 }, { "epoch": 0.38, "grad_norm": 0.435546875, "learning_rate": 2.1716196771136115e-05, "loss": 2.1155, "step": 18500 }, { "epoch": 0.38, "eval_accuracy": 0.5584908450183592, "eval_loss": 1.9959198236465454, "eval_runtime": 16.4576, "eval_samples_per_second": 36.154, "eval_steps_per_second": 1.154, "step": 18500 }, { "epoch": 0.38, "grad_norm": 0.431640625, "learning_rate": 2.1706973973520167e-05, "loss": 2.1317, "step": 18510 }, { "epoch": 0.39, "grad_norm": 0.423828125, "learning_rate": 2.169774800568725e-05, "loss": 2.1393, "step": 18520 }, { "epoch": 0.39, "grad_norm": 0.46484375, "learning_rate": 2.1688518871998247e-05, "loss": 2.1273, "step": 18530 }, { "epoch": 0.39, "grad_norm": 0.435546875, "learning_rate": 2.1679286576815538e-05, "loss": 2.1473, "step": 18540 }, { "epoch": 0.39, "grad_norm": 0.431640625, "learning_rate": 2.167005112450299e-05, "loss": 2.1019, "step": 18550 }, { "epoch": 0.39, "grad_norm": 0.439453125, "learning_rate": 2.166081251942598e-05, "loss": 2.1197, "step": 18560 }, { "epoch": 0.39, "grad_norm": 0.44921875, "learning_rate": 2.1651570765951348e-05, "loss": 2.1222, "step": 18570 }, { "epoch": 0.39, "grad_norm": 0.42578125, "learning_rate": 2.1642325868447444e-05, "loss": 2.1083, "step": 18580 }, { "epoch": 0.39, "grad_norm": 0.447265625, "learning_rate": 2.16330778312841e-05, "loss": 2.1206, "step": 18590 }, { "epoch": 0.39, "grad_norm": 0.435546875, "learning_rate": 2.162382665883262e-05, "loss": 2.1095, "step": 18600 }, { "epoch": 0.39, "grad_norm": 0.42578125, "learning_rate": 2.1614572355465814e-05, "loss": 2.1141, "step": 18610 }, { "epoch": 0.39, "grad_norm": 0.43359375, "learning_rate": 2.160531492555794e-05, "loss": 2.1116, "step": 18620 }, { "epoch": 0.39, "grad_norm": 0.45703125, "learning_rate": 2.1596054373484764e-05, "loss": 2.0925, "step": 18630 }, { "epoch": 0.39, "grad_norm": 0.44140625, "learning_rate": 2.158679070362351e-05, "loss": 2.1143, "step": 18640 }, { "epoch": 0.39, "grad_norm": 0.58203125, "learning_rate": 2.1577523920352882e-05, "loss": 2.0887, "step": 18650 }, { "epoch": 0.39, "grad_norm": 0.4375, "learning_rate": 2.1568254028053053e-05, "loss": 2.1091, "step": 18660 }, { "epoch": 0.39, "grad_norm": 0.43359375, "learning_rate": 2.1558981031105665e-05, "loss": 2.1082, "step": 18670 }, { "epoch": 0.39, "grad_norm": 0.423828125, "learning_rate": 2.1549704933893834e-05, "loss": 2.1472, "step": 18680 }, { "epoch": 0.39, "grad_norm": 0.4453125, "learning_rate": 2.1540425740802134e-05, "loss": 2.1089, "step": 18690 }, { "epoch": 0.39, "grad_norm": 0.427734375, "learning_rate": 2.15311434562166e-05, "loss": 2.1108, "step": 18700 }, { "epoch": 0.39, "grad_norm": 0.42578125, "learning_rate": 2.1521858084524737e-05, "loss": 2.127, "step": 18710 }, { "epoch": 0.39, "grad_norm": 0.435546875, "learning_rate": 2.1512569630115507e-05, "loss": 2.1515, "step": 18720 }, { "epoch": 0.39, "grad_norm": 0.43359375, "learning_rate": 2.1503278097379323e-05, "loss": 2.1187, "step": 18730 }, { "epoch": 0.39, "grad_norm": 0.427734375, "learning_rate": 2.1493983490708065e-05, "loss": 2.1243, "step": 18740 }, { "epoch": 0.39, "grad_norm": 0.4375, "learning_rate": 2.1484685814495054e-05, "loss": 2.0826, "step": 18750 }, { "epoch": 0.39, "grad_norm": 0.431640625, "learning_rate": 2.147538507313506e-05, "loss": 2.1193, "step": 18760 }, { "epoch": 0.39, "grad_norm": 0.421875, "learning_rate": 2.146608127102432e-05, "loss": 2.1057, "step": 18770 }, { "epoch": 0.39, "grad_norm": 0.44921875, "learning_rate": 2.1456774412560502e-05, "loss": 2.1127, "step": 18780 }, { "epoch": 0.39, "grad_norm": 0.42578125, "learning_rate": 2.1447464502142723e-05, "loss": 2.0959, "step": 18790 }, { "epoch": 0.39, "grad_norm": 0.44140625, "learning_rate": 2.143815154417154e-05, "loss": 2.1005, "step": 18800 }, { "epoch": 0.39, "grad_norm": 0.44140625, "learning_rate": 2.142883554304895e-05, "loss": 2.125, "step": 18810 }, { "epoch": 0.39, "grad_norm": 0.419921875, "learning_rate": 2.1419516503178392e-05, "loss": 2.1308, "step": 18820 }, { "epoch": 0.39, "grad_norm": 0.443359375, "learning_rate": 2.1410194428964746e-05, "loss": 2.079, "step": 18830 }, { "epoch": 0.39, "grad_norm": 0.42578125, "learning_rate": 2.1400869324814315e-05, "loss": 2.1182, "step": 18840 }, { "epoch": 0.39, "grad_norm": 0.52734375, "learning_rate": 2.1391541195134845e-05, "loss": 2.1309, "step": 18850 }, { "epoch": 0.39, "grad_norm": 0.43359375, "learning_rate": 2.1382210044335504e-05, "loss": 2.1397, "step": 18860 }, { "epoch": 0.39, "grad_norm": 0.439453125, "learning_rate": 2.1372875876826892e-05, "loss": 2.1195, "step": 18870 }, { "epoch": 0.39, "grad_norm": 0.45703125, "learning_rate": 2.136353869702103e-05, "loss": 2.1318, "step": 18880 }, { "epoch": 0.39, "grad_norm": 0.44921875, "learning_rate": 2.1354198509331378e-05, "loss": 2.126, "step": 18890 }, { "epoch": 0.39, "grad_norm": 0.431640625, "learning_rate": 2.1344855318172788e-05, "loss": 2.1306, "step": 18900 }, { "epoch": 0.39, "grad_norm": 0.431640625, "learning_rate": 2.1335509127961566e-05, "loss": 2.1333, "step": 18910 }, { "epoch": 0.39, "grad_norm": 0.42578125, "learning_rate": 2.1326159943115415e-05, "loss": 2.1297, "step": 18920 }, { "epoch": 0.39, "grad_norm": 0.451171875, "learning_rate": 2.1316807768053455e-05, "loss": 2.1414, "step": 18930 }, { "epoch": 0.39, "grad_norm": 0.447265625, "learning_rate": 2.1307452607196226e-05, "loss": 2.1421, "step": 18940 }, { "epoch": 0.39, "grad_norm": 0.455078125, "learning_rate": 2.1298094464965677e-05, "loss": 2.1333, "step": 18950 }, { "epoch": 0.39, "grad_norm": 0.466796875, "learning_rate": 2.1288733345785163e-05, "loss": 2.1258, "step": 18960 }, { "epoch": 0.39, "grad_norm": 0.447265625, "learning_rate": 2.127936925407945e-05, "loss": 2.1289, "step": 18970 }, { "epoch": 0.39, "grad_norm": 0.41796875, "learning_rate": 2.1270002194274698e-05, "loss": 2.1416, "step": 18980 }, { "epoch": 0.39, "grad_norm": 0.423828125, "learning_rate": 2.1260632170798494e-05, "loss": 2.0975, "step": 18990 }, { "epoch": 0.4, "grad_norm": 0.421875, "learning_rate": 2.12512591880798e-05, "loss": 2.1493, "step": 19000 }, { "epoch": 0.4, "eval_accuracy": 0.558476059045319, "eval_loss": 1.9956163167953491, "eval_runtime": 16.4379, "eval_samples_per_second": 36.197, "eval_steps_per_second": 1.156, "step": 19000 }, { "epoch": 0.4, "grad_norm": 0.423828125, "learning_rate": 2.1241883250548996e-05, "loss": 2.0851, "step": 19010 }, { "epoch": 0.4, "grad_norm": 0.44140625, "learning_rate": 2.1232504362637845e-05, "loss": 2.1371, "step": 19020 }, { "epoch": 0.4, "grad_norm": 0.44140625, "learning_rate": 2.1223122528779515e-05, "loss": 2.1261, "step": 19030 }, { "epoch": 0.4, "grad_norm": 0.431640625, "learning_rate": 2.1213737753408554e-05, "loss": 2.1002, "step": 19040 }, { "epoch": 0.4, "grad_norm": 0.4453125, "learning_rate": 2.1204350040960914e-05, "loss": 2.1104, "step": 19050 }, { "epoch": 0.4, "grad_norm": 0.43359375, "learning_rate": 2.1194959395873927e-05, "loss": 2.0982, "step": 19060 }, { "epoch": 0.4, "grad_norm": 0.486328125, "learning_rate": 2.1185565822586313e-05, "loss": 2.1183, "step": 19070 }, { "epoch": 0.4, "grad_norm": 0.41796875, "learning_rate": 2.117616932553818e-05, "loss": 2.086, "step": 19080 }, { "epoch": 0.4, "grad_norm": 0.443359375, "learning_rate": 2.116676990917101e-05, "loss": 2.1199, "step": 19090 }, { "epoch": 0.4, "grad_norm": 0.443359375, "learning_rate": 2.115736757792767e-05, "loss": 2.1368, "step": 19100 }, { "epoch": 0.4, "grad_norm": 0.4453125, "learning_rate": 2.114796233625241e-05, "loss": 2.0892, "step": 19110 }, { "epoch": 0.4, "grad_norm": 0.466796875, "learning_rate": 2.113855418859084e-05, "loss": 2.1349, "step": 19120 }, { "epoch": 0.4, "grad_norm": 0.455078125, "learning_rate": 2.1129143139389966e-05, "loss": 2.1185, "step": 19130 }, { "epoch": 0.4, "grad_norm": 0.478515625, "learning_rate": 2.1119729193098135e-05, "loss": 2.1421, "step": 19140 }, { "epoch": 0.4, "grad_norm": 0.453125, "learning_rate": 2.1110312354165102e-05, "loss": 2.12, "step": 19150 }, { "epoch": 0.4, "grad_norm": 0.43359375, "learning_rate": 2.1100892627041958e-05, "loss": 2.1586, "step": 19160 }, { "epoch": 0.4, "grad_norm": 0.4296875, "learning_rate": 2.109147001618117e-05, "loss": 2.132, "step": 19170 }, { "epoch": 0.4, "grad_norm": 0.46875, "learning_rate": 2.1082044526036564e-05, "loss": 2.1453, "step": 19180 }, { "epoch": 0.4, "grad_norm": 0.43359375, "learning_rate": 2.1072616161063334e-05, "loss": 2.146, "step": 19190 }, { "epoch": 0.4, "grad_norm": 0.46875, "learning_rate": 2.1063184925718037e-05, "loss": 2.1015, "step": 19200 }, { "epoch": 0.4, "grad_norm": 0.421875, "learning_rate": 2.1053750824458572e-05, "loss": 2.1223, "step": 19210 }, { "epoch": 0.4, "grad_norm": 0.421875, "learning_rate": 2.10443138617442e-05, "loss": 2.1169, "step": 19220 }, { "epoch": 0.4, "grad_norm": 0.4375, "learning_rate": 2.1034874042035538e-05, "loss": 2.127, "step": 19230 }, { "epoch": 0.4, "grad_norm": 0.42578125, "learning_rate": 2.1025431369794546e-05, "loss": 2.104, "step": 19240 }, { "epoch": 0.4, "grad_norm": 0.462890625, "learning_rate": 2.1015985849484534e-05, "loss": 2.1077, "step": 19250 }, { "epoch": 0.4, "grad_norm": 0.42578125, "learning_rate": 2.100653748557017e-05, "loss": 2.1217, "step": 19260 }, { "epoch": 0.4, "grad_norm": 0.4296875, "learning_rate": 2.099708628251745e-05, "loss": 2.1567, "step": 19270 }, { "epoch": 0.4, "grad_norm": 0.44140625, "learning_rate": 2.098763224479372e-05, "loss": 2.1086, "step": 19280 }, { "epoch": 0.4, "grad_norm": 0.474609375, "learning_rate": 2.0978175376867662e-05, "loss": 2.0886, "step": 19290 }, { "epoch": 0.4, "grad_norm": 0.44921875, "learning_rate": 2.096871568320931e-05, "loss": 2.1171, "step": 19300 }, { "epoch": 0.4, "grad_norm": 0.43359375, "learning_rate": 2.0959253168290006e-05, "loss": 2.1295, "step": 19310 }, { "epoch": 0.4, "grad_norm": 0.4375, "learning_rate": 2.094978783658245e-05, "loss": 2.1168, "step": 19320 }, { "epoch": 0.4, "grad_norm": 0.45703125, "learning_rate": 2.094031969256066e-05, "loss": 2.1643, "step": 19330 }, { "epoch": 0.4, "grad_norm": 0.439453125, "learning_rate": 2.0930848740699996e-05, "loss": 2.123, "step": 19340 }, { "epoch": 0.4, "grad_norm": 0.435546875, "learning_rate": 2.092137498547713e-05, "loss": 2.1311, "step": 19350 }, { "epoch": 0.4, "grad_norm": 0.427734375, "learning_rate": 2.0911898431370068e-05, "loss": 2.1213, "step": 19360 }, { "epoch": 0.4, "grad_norm": 0.4609375, "learning_rate": 2.0902419082858142e-05, "loss": 2.137, "step": 19370 }, { "epoch": 0.4, "grad_norm": 0.44140625, "learning_rate": 2.0892936944421997e-05, "loss": 2.1128, "step": 19380 }, { "epoch": 0.4, "grad_norm": 0.439453125, "learning_rate": 2.0883452020543598e-05, "loss": 2.1695, "step": 19390 }, { "epoch": 0.4, "grad_norm": 0.439453125, "learning_rate": 2.0873964315706234e-05, "loss": 2.1137, "step": 19400 }, { "epoch": 0.4, "grad_norm": 0.4453125, "learning_rate": 2.08644738343945e-05, "loss": 2.1019, "step": 19410 }, { "epoch": 0.4, "grad_norm": 0.423828125, "learning_rate": 2.0854980581094304e-05, "loss": 2.1201, "step": 19420 }, { "epoch": 0.4, "grad_norm": 0.458984375, "learning_rate": 2.084548456029287e-05, "loss": 2.1292, "step": 19430 }, { "epoch": 0.4, "grad_norm": 0.435546875, "learning_rate": 2.0835985776478722e-05, "loss": 2.1157, "step": 19440 }, { "epoch": 0.4, "grad_norm": 0.44140625, "learning_rate": 2.0826484234141703e-05, "loss": 2.1258, "step": 19450 }, { "epoch": 0.4, "grad_norm": 0.55078125, "learning_rate": 2.081697993777295e-05, "loss": 2.1206, "step": 19460 }, { "epoch": 0.4, "grad_norm": 0.44921875, "learning_rate": 2.0807472891864894e-05, "loss": 2.0988, "step": 19470 }, { "epoch": 0.4, "grad_norm": 0.435546875, "learning_rate": 2.0797963100911286e-05, "loss": 2.1064, "step": 19480 }, { "epoch": 0.41, "grad_norm": 0.42578125, "learning_rate": 2.078845056940716e-05, "loss": 2.0966, "step": 19490 }, { "epoch": 0.41, "grad_norm": 0.462890625, "learning_rate": 2.0778935301848852e-05, "loss": 2.1219, "step": 19500 }, { "epoch": 0.41, "eval_accuracy": 0.558676491124309, "eval_loss": 1.9952665567398071, "eval_runtime": 16.4341, "eval_samples_per_second": 36.205, "eval_steps_per_second": 1.156, "step": 19500 }, { "epoch": 0.41, "grad_norm": 0.435546875, "learning_rate": 2.076941730273398e-05, "loss": 2.0871, "step": 19510 }, { "epoch": 0.41, "grad_norm": 0.453125, "learning_rate": 2.075989657656147e-05, "loss": 2.1252, "step": 19520 }, { "epoch": 0.41, "grad_norm": 0.47265625, "learning_rate": 2.0750373127831525e-05, "loss": 2.1165, "step": 19530 }, { "epoch": 0.41, "grad_norm": 0.423828125, "learning_rate": 2.0740846961045637e-05, "loss": 2.1515, "step": 19540 }, { "epoch": 0.41, "grad_norm": 0.482421875, "learning_rate": 2.073131808070659e-05, "loss": 2.1221, "step": 19550 }, { "epoch": 0.41, "grad_norm": 0.42578125, "learning_rate": 2.0721786491318436e-05, "loss": 2.0964, "step": 19560 }, { "epoch": 0.41, "grad_norm": 0.44140625, "learning_rate": 2.0712252197386517e-05, "loss": 2.1163, "step": 19570 }, { "epoch": 0.41, "grad_norm": 0.447265625, "learning_rate": 2.0702715203417465e-05, "loss": 2.1202, "step": 19580 }, { "epoch": 0.41, "grad_norm": 0.4609375, "learning_rate": 2.0693175513919164e-05, "loss": 2.1448, "step": 19590 }, { "epoch": 0.41, "grad_norm": 0.435546875, "learning_rate": 2.0683633133400784e-05, "loss": 2.147, "step": 19600 }, { "epoch": 0.41, "grad_norm": 0.43359375, "learning_rate": 2.0674088066372773e-05, "loss": 2.1263, "step": 19610 }, { "epoch": 0.41, "grad_norm": 0.431640625, "learning_rate": 2.066454031734684e-05, "loss": 2.115, "step": 19620 }, { "epoch": 0.41, "grad_norm": 0.462890625, "learning_rate": 2.065498989083596e-05, "loss": 2.1071, "step": 19630 }, { "epoch": 0.41, "grad_norm": 0.451171875, "learning_rate": 2.0645436791354387e-05, "loss": 2.1369, "step": 19640 }, { "epoch": 0.41, "grad_norm": 0.4375, "learning_rate": 2.0635881023417627e-05, "loss": 2.0942, "step": 19650 }, { "epoch": 0.41, "grad_norm": 0.47265625, "learning_rate": 2.062632259154244e-05, "loss": 2.1202, "step": 19660 }, { "epoch": 0.41, "grad_norm": 0.455078125, "learning_rate": 2.0616761500246866e-05, "loss": 2.1322, "step": 19670 }, { "epoch": 0.41, "grad_norm": 0.80859375, "learning_rate": 2.060719775405019e-05, "loss": 2.0766, "step": 19680 }, { "epoch": 0.41, "grad_norm": 0.43359375, "learning_rate": 2.059763135747295e-05, "loss": 2.1346, "step": 19690 }, { "epoch": 0.41, "grad_norm": 0.455078125, "learning_rate": 2.0588062315036943e-05, "loss": 2.1397, "step": 19700 }, { "epoch": 0.41, "grad_norm": 0.4609375, "learning_rate": 2.0578490631265204e-05, "loss": 2.1286, "step": 19710 }, { "epoch": 0.41, "grad_norm": 0.4375, "learning_rate": 2.056891631068204e-05, "loss": 2.1362, "step": 19720 }, { "epoch": 0.41, "grad_norm": 0.451171875, "learning_rate": 2.0559339357812976e-05, "loss": 2.1279, "step": 19730 }, { "epoch": 0.41, "grad_norm": 0.4296875, "learning_rate": 2.0549759777184803e-05, "loss": 2.0917, "step": 19740 }, { "epoch": 0.41, "grad_norm": 0.486328125, "learning_rate": 2.0540177573325547e-05, "loss": 2.1212, "step": 19750 }, { "epoch": 0.41, "grad_norm": 0.42578125, "learning_rate": 2.053059275076447e-05, "loss": 2.1251, "step": 19760 }, { "epoch": 0.41, "grad_norm": 0.427734375, "learning_rate": 2.052100531403208e-05, "loss": 2.1012, "step": 19770 }, { "epoch": 0.41, "grad_norm": 0.421875, "learning_rate": 2.0511415267660113e-05, "loss": 2.1164, "step": 19780 }, { "epoch": 0.41, "grad_norm": 0.431640625, "learning_rate": 2.0501822616181542e-05, "loss": 2.1144, "step": 19790 }, { "epoch": 0.41, "grad_norm": 0.439453125, "learning_rate": 2.0492227364130567e-05, "loss": 2.1057, "step": 19800 }, { "epoch": 0.41, "grad_norm": 0.42578125, "learning_rate": 2.048262951604263e-05, "loss": 2.1455, "step": 19810 }, { "epoch": 0.41, "grad_norm": 0.435546875, "learning_rate": 2.0473029076454378e-05, "loss": 2.1093, "step": 19820 }, { "epoch": 0.41, "grad_norm": 0.4375, "learning_rate": 2.0463426049903707e-05, "loss": 2.116, "step": 19830 }, { "epoch": 0.41, "grad_norm": 0.44140625, "learning_rate": 2.045382044092972e-05, "loss": 2.1448, "step": 19840 }, { "epoch": 0.41, "grad_norm": 0.453125, "learning_rate": 2.0444212254072746e-05, "loss": 2.1413, "step": 19850 }, { "epoch": 0.41, "grad_norm": 0.4375, "learning_rate": 2.0434601493874336e-05, "loss": 2.0833, "step": 19860 }, { "epoch": 0.41, "grad_norm": 0.435546875, "learning_rate": 2.0424988164877246e-05, "loss": 2.1135, "step": 19870 }, { "epoch": 0.41, "grad_norm": 0.4375, "learning_rate": 2.0415372271625456e-05, "loss": 2.11, "step": 19880 }, { "epoch": 0.41, "grad_norm": 0.4609375, "learning_rate": 2.0405753818664162e-05, "loss": 2.0997, "step": 19890 }, { "epoch": 0.41, "grad_norm": 0.458984375, "learning_rate": 2.0396132810539753e-05, "loss": 2.1178, "step": 19900 }, { "epoch": 0.41, "grad_norm": 0.42578125, "learning_rate": 2.038650925179984e-05, "loss": 2.109, "step": 19910 }, { "epoch": 0.41, "grad_norm": 0.439453125, "learning_rate": 2.0376883146993242e-05, "loss": 2.1309, "step": 19920 }, { "epoch": 0.41, "grad_norm": 0.72265625, "learning_rate": 2.036725450066997e-05, "loss": 2.1595, "step": 19930 }, { "epoch": 0.41, "grad_norm": 0.451171875, "learning_rate": 2.035762331738124e-05, "loss": 2.1005, "step": 19940 }, { "epoch": 0.41, "grad_norm": 0.44140625, "learning_rate": 2.0347989601679465e-05, "loss": 2.1317, "step": 19950 }, { "epoch": 0.41, "grad_norm": 0.4375, "learning_rate": 2.0338353358118276e-05, "loss": 2.1407, "step": 19960 }, { "epoch": 0.42, "grad_norm": 0.435546875, "learning_rate": 2.0328714591252466e-05, "loss": 2.1295, "step": 19970 }, { "epoch": 0.42, "grad_norm": 0.419921875, "learning_rate": 2.0319073305638035e-05, "loss": 2.1088, "step": 19980 }, { "epoch": 0.42, "grad_norm": 0.439453125, "learning_rate": 2.0309429505832184e-05, "loss": 2.1281, "step": 19990 }, { "epoch": 0.42, "grad_norm": 0.515625, "learning_rate": 2.0299783196393288e-05, "loss": 2.1584, "step": 20000 }, { "epoch": 0.42, "eval_accuracy": 0.558804636223991, "eval_loss": 1.9951595067977905, "eval_runtime": 16.4485, "eval_samples_per_second": 36.174, "eval_steps_per_second": 1.155, "step": 20000 }, { "epoch": 0.42, "grad_norm": 0.41796875, "learning_rate": 2.029013438188092e-05, "loss": 2.1485, "step": 20010 }, { "epoch": 0.42, "grad_norm": 0.427734375, "learning_rate": 2.0280483066855823e-05, "loss": 2.1418, "step": 20020 }, { "epoch": 0.42, "grad_norm": 0.470703125, "learning_rate": 2.027082925587993e-05, "loss": 2.1516, "step": 20030 }, { "epoch": 0.42, "grad_norm": 0.421875, "learning_rate": 2.0261172953516354e-05, "loss": 2.1501, "step": 20040 }, { "epoch": 0.42, "grad_norm": 0.4296875, "learning_rate": 2.0251514164329395e-05, "loss": 2.1348, "step": 20050 }, { "epoch": 0.42, "grad_norm": 0.427734375, "learning_rate": 2.02418528928845e-05, "loss": 2.1222, "step": 20060 }, { "epoch": 0.42, "grad_norm": 0.44140625, "learning_rate": 2.023218914374832e-05, "loss": 2.1327, "step": 20070 }, { "epoch": 0.42, "grad_norm": 0.46484375, "learning_rate": 2.022252292148867e-05, "loss": 2.1228, "step": 20080 }, { "epoch": 0.42, "grad_norm": 0.59765625, "learning_rate": 2.021285423067451e-05, "loss": 2.1693, "step": 20090 }, { "epoch": 0.42, "grad_norm": 0.421875, "learning_rate": 2.0203183075876e-05, "loss": 2.0866, "step": 20100 }, { "epoch": 0.42, "grad_norm": 0.4453125, "learning_rate": 2.019350946166445e-05, "loss": 2.1188, "step": 20110 }, { "epoch": 0.42, "grad_norm": 0.42578125, "learning_rate": 2.0183833392612322e-05, "loss": 2.149, "step": 20120 }, { "epoch": 0.42, "grad_norm": 0.451171875, "learning_rate": 2.0174154873293267e-05, "loss": 2.1203, "step": 20130 }, { "epoch": 0.42, "grad_norm": 0.4375, "learning_rate": 2.0164473908282058e-05, "loss": 2.1023, "step": 20140 }, { "epoch": 0.42, "grad_norm": 0.44140625, "learning_rate": 2.0154790502154655e-05, "loss": 2.115, "step": 20150 }, { "epoch": 0.42, "grad_norm": 0.427734375, "learning_rate": 2.0145104659488152e-05, "loss": 2.1455, "step": 20160 }, { "epoch": 0.42, "grad_norm": 0.435546875, "learning_rate": 2.0135416384860805e-05, "loss": 2.0928, "step": 20170 }, { "epoch": 0.42, "grad_norm": 0.416015625, "learning_rate": 2.0125725682852017e-05, "loss": 2.1212, "step": 20180 }, { "epoch": 0.42, "grad_norm": 0.458984375, "learning_rate": 2.0116032558042337e-05, "loss": 2.1673, "step": 20190 }, { "epoch": 0.42, "grad_norm": 0.462890625, "learning_rate": 2.0106337015013458e-05, "loss": 2.1458, "step": 20200 }, { "epoch": 0.42, "grad_norm": 0.46484375, "learning_rate": 2.0096639058348217e-05, "loss": 2.1394, "step": 20210 }, { "epoch": 0.42, "grad_norm": 0.466796875, "learning_rate": 2.0086938692630597e-05, "loss": 2.1167, "step": 20220 }, { "epoch": 0.42, "grad_norm": 0.443359375, "learning_rate": 2.0077235922445714e-05, "loss": 2.1376, "step": 20230 }, { "epoch": 0.42, "grad_norm": 0.455078125, "learning_rate": 2.0067530752379823e-05, "loss": 2.0924, "step": 20240 }, { "epoch": 0.42, "grad_norm": 0.423828125, "learning_rate": 2.005782318702031e-05, "loss": 2.1112, "step": 20250 }, { "epoch": 0.42, "grad_norm": 0.42578125, "learning_rate": 2.00481132309557e-05, "loss": 2.0978, "step": 20260 }, { "epoch": 0.42, "grad_norm": 0.439453125, "learning_rate": 2.0038400888775637e-05, "loss": 2.1434, "step": 20270 }, { "epoch": 0.42, "grad_norm": 0.453125, "learning_rate": 2.00286861650709e-05, "loss": 2.1279, "step": 20280 }, { "epoch": 0.42, "grad_norm": 0.423828125, "learning_rate": 2.00189690644334e-05, "loss": 2.1358, "step": 20290 }, { "epoch": 0.42, "grad_norm": 0.439453125, "learning_rate": 2.0009249591456163e-05, "loss": 2.1218, "step": 20300 }, { "epoch": 0.42, "grad_norm": 0.443359375, "learning_rate": 1.999952775073334e-05, "loss": 2.1323, "step": 20310 }, { "epoch": 0.42, "grad_norm": 0.439453125, "learning_rate": 1.998980354686019e-05, "loss": 2.1324, "step": 20320 }, { "epoch": 0.42, "grad_norm": 0.44140625, "learning_rate": 1.99800769844331e-05, "loss": 2.1094, "step": 20330 }, { "epoch": 0.42, "grad_norm": 0.4453125, "learning_rate": 1.9970348068049575e-05, "loss": 2.139, "step": 20340 }, { "epoch": 0.42, "grad_norm": 0.423828125, "learning_rate": 1.996061680230823e-05, "loss": 2.1181, "step": 20350 }, { "epoch": 0.42, "grad_norm": 0.44921875, "learning_rate": 1.995088319180878e-05, "loss": 2.101, "step": 20360 }, { "epoch": 0.42, "grad_norm": 0.439453125, "learning_rate": 1.9941147241152062e-05, "loss": 2.1098, "step": 20370 }, { "epoch": 0.42, "grad_norm": 0.455078125, "learning_rate": 1.9931408954940017e-05, "loss": 2.1317, "step": 20380 }, { "epoch": 0.42, "grad_norm": 0.45703125, "learning_rate": 1.9921668337775677e-05, "loss": 2.1192, "step": 20390 }, { "epoch": 0.42, "grad_norm": 0.48828125, "learning_rate": 1.9911925394263195e-05, "loss": 2.0815, "step": 20400 }, { "epoch": 0.42, "grad_norm": 0.5078125, "learning_rate": 1.9902180129007806e-05, "loss": 2.0946, "step": 20410 }, { "epoch": 0.42, "grad_norm": 0.421875, "learning_rate": 1.9892432546615857e-05, "loss": 2.1091, "step": 20420 }, { "epoch": 0.42, "grad_norm": 0.4296875, "learning_rate": 1.988268265169478e-05, "loss": 2.108, "step": 20430 }, { "epoch": 0.42, "grad_norm": 0.42578125, "learning_rate": 1.9872930448853102e-05, "loss": 2.123, "step": 20440 }, { "epoch": 0.43, "grad_norm": 0.455078125, "learning_rate": 1.9863175942700448e-05, "loss": 2.1033, "step": 20450 }, { "epoch": 0.43, "grad_norm": 0.44140625, "learning_rate": 1.985341913784752e-05, "loss": 2.1119, "step": 20460 }, { "epoch": 0.43, "grad_norm": 0.427734375, "learning_rate": 1.9843660038906116e-05, "loss": 2.1265, "step": 20470 }, { "epoch": 0.43, "grad_norm": 0.427734375, "learning_rate": 1.9833898650489117e-05, "loss": 2.1382, "step": 20480 }, { "epoch": 0.43, "grad_norm": 0.439453125, "learning_rate": 1.9824134977210485e-05, "loss": 2.1237, "step": 20490 }, { "epoch": 0.43, "grad_norm": 0.4609375, "learning_rate": 1.9814369023685257e-05, "loss": 2.1167, "step": 20500 }, { "epoch": 0.43, "eval_accuracy": 0.5586912770973492, "eval_loss": 1.9950366020202637, "eval_runtime": 16.449, "eval_samples_per_second": 36.172, "eval_steps_per_second": 1.155, "step": 20500 }, { "epoch": 0.43, "grad_norm": 0.44140625, "learning_rate": 1.9804600794529553e-05, "loss": 2.1172, "step": 20510 }, { "epoch": 0.43, "grad_norm": 0.443359375, "learning_rate": 1.9794830294360576e-05, "loss": 2.1295, "step": 20520 }, { "epoch": 0.43, "grad_norm": 0.423828125, "learning_rate": 1.978505752779658e-05, "loss": 2.1252, "step": 20530 }, { "epoch": 0.43, "grad_norm": 0.455078125, "learning_rate": 1.9775282499456915e-05, "loss": 2.117, "step": 20540 }, { "epoch": 0.43, "grad_norm": 0.5078125, "learning_rate": 1.976550521396199e-05, "loss": 2.128, "step": 20550 }, { "epoch": 0.43, "grad_norm": 0.44921875, "learning_rate": 1.9755725675933274e-05, "loss": 2.123, "step": 20560 }, { "epoch": 0.43, "grad_norm": 0.4296875, "learning_rate": 1.9745943889993314e-05, "loss": 2.1237, "step": 20570 }, { "epoch": 0.43, "grad_norm": 0.421875, "learning_rate": 1.9736159860765703e-05, "loss": 2.1207, "step": 20580 }, { "epoch": 0.43, "grad_norm": 0.419921875, "learning_rate": 1.972637359287512e-05, "loss": 2.1174, "step": 20590 }, { "epoch": 0.43, "grad_norm": 0.416015625, "learning_rate": 1.9716585090947272e-05, "loss": 2.1263, "step": 20600 }, { "epoch": 0.43, "grad_norm": 0.44921875, "learning_rate": 1.9706794359608938e-05, "loss": 2.1316, "step": 20610 }, { "epoch": 0.43, "grad_norm": 0.470703125, "learning_rate": 1.9697001403487964e-05, "loss": 2.1233, "step": 20620 }, { "epoch": 0.43, "grad_norm": 0.453125, "learning_rate": 1.9687206227213207e-05, "loss": 2.1083, "step": 20630 }, { "epoch": 0.43, "grad_norm": 0.443359375, "learning_rate": 1.967740883541462e-05, "loss": 2.0951, "step": 20640 }, { "epoch": 0.43, "grad_norm": 0.423828125, "learning_rate": 1.966760923272318e-05, "loss": 2.0794, "step": 20650 }, { "epoch": 0.43, "grad_norm": 0.44921875, "learning_rate": 1.96578074237709e-05, "loss": 2.1346, "step": 20660 }, { "epoch": 0.43, "grad_norm": 0.41796875, "learning_rate": 1.9648003413190858e-05, "loss": 2.1101, "step": 20670 }, { "epoch": 0.43, "grad_norm": 0.423828125, "learning_rate": 1.963819720561716e-05, "loss": 2.1105, "step": 20680 }, { "epoch": 0.43, "grad_norm": 0.4609375, "learning_rate": 1.9628388805684946e-05, "loss": 2.1145, "step": 20690 }, { "epoch": 0.43, "grad_norm": 0.435546875, "learning_rate": 1.9618578218030406e-05, "loss": 2.1478, "step": 20700 }, { "epoch": 0.43, "grad_norm": 0.447265625, "learning_rate": 1.960876544729075e-05, "loss": 2.1112, "step": 20710 }, { "epoch": 0.43, "grad_norm": 0.515625, "learning_rate": 1.959895049810423e-05, "loss": 2.107, "step": 20720 }, { "epoch": 0.43, "grad_norm": 0.44140625, "learning_rate": 1.9589133375110132e-05, "loss": 2.0901, "step": 20730 }, { "epoch": 0.43, "grad_norm": 0.4453125, "learning_rate": 1.9579314082948746e-05, "loss": 2.1256, "step": 20740 }, { "epoch": 0.43, "grad_norm": 0.416015625, "learning_rate": 1.9569492626261414e-05, "loss": 2.1244, "step": 20750 }, { "epoch": 0.43, "grad_norm": 0.45703125, "learning_rate": 1.9559669009690488e-05, "loss": 2.1172, "step": 20760 }, { "epoch": 0.43, "grad_norm": 0.43359375, "learning_rate": 1.954984323787934e-05, "loss": 2.1233, "step": 20770 }, { "epoch": 0.43, "grad_norm": 0.44140625, "learning_rate": 1.9540015315472363e-05, "loss": 2.0865, "step": 20780 }, { "epoch": 0.43, "grad_norm": 0.435546875, "learning_rate": 1.9530185247114978e-05, "loss": 2.1073, "step": 20790 }, { "epoch": 0.43, "grad_norm": 0.439453125, "learning_rate": 1.95203530374536e-05, "loss": 2.0946, "step": 20800 }, { "epoch": 0.43, "grad_norm": 0.421875, "learning_rate": 1.951051869113567e-05, "loss": 2.1076, "step": 20810 }, { "epoch": 0.43, "grad_norm": 0.4375, "learning_rate": 1.9500682212809627e-05, "loss": 2.1217, "step": 20820 }, { "epoch": 0.43, "grad_norm": 0.43359375, "learning_rate": 1.9490843607124937e-05, "loss": 2.1223, "step": 20830 }, { "epoch": 0.43, "grad_norm": 0.466796875, "learning_rate": 1.948100287873205e-05, "loss": 2.1265, "step": 20840 }, { "epoch": 0.43, "grad_norm": 0.453125, "learning_rate": 1.9471160032282436e-05, "loss": 2.1059, "step": 20850 }, { "epoch": 0.43, "grad_norm": 0.443359375, "learning_rate": 1.9461315072428558e-05, "loss": 2.1335, "step": 20860 }, { "epoch": 0.43, "grad_norm": 0.431640625, "learning_rate": 1.945146800382388e-05, "loss": 2.1306, "step": 20870 }, { "epoch": 0.43, "grad_norm": 0.44921875, "learning_rate": 1.944161883112286e-05, "loss": 2.0951, "step": 20880 }, { "epoch": 0.43, "grad_norm": 0.423828125, "learning_rate": 1.9431767558980954e-05, "loss": 2.1494, "step": 20890 }, { "epoch": 0.43, "grad_norm": 0.443359375, "learning_rate": 1.942191419205461e-05, "loss": 2.1085, "step": 20900 }, { "epoch": 0.43, "grad_norm": 0.416015625, "learning_rate": 1.9412058735001265e-05, "loss": 2.1365, "step": 20910 }, { "epoch": 0.43, "grad_norm": 0.51953125, "learning_rate": 1.940220119247934e-05, "loss": 2.1237, "step": 20920 }, { "epoch": 0.44, "grad_norm": 0.435546875, "learning_rate": 1.9392341569148254e-05, "loss": 2.1004, "step": 20930 }, { "epoch": 0.44, "grad_norm": 0.42578125, "learning_rate": 1.93824798696684e-05, "loss": 2.1279, "step": 20940 }, { "epoch": 0.44, "grad_norm": 0.4609375, "learning_rate": 1.937261609870115e-05, "loss": 2.1135, "step": 20950 }, { "epoch": 0.44, "grad_norm": 0.427734375, "learning_rate": 1.9362750260908855e-05, "loss": 2.1207, "step": 20960 }, { "epoch": 0.44, "grad_norm": 0.453125, "learning_rate": 1.935288236095486e-05, "loss": 2.112, "step": 20970 }, { "epoch": 0.44, "grad_norm": 0.435546875, "learning_rate": 1.934301240350346e-05, "loss": 2.1004, "step": 20980 }, { "epoch": 0.44, "grad_norm": 0.455078125, "learning_rate": 1.933314039321994e-05, "loss": 2.1236, "step": 20990 }, { "epoch": 0.44, "grad_norm": 0.4296875, "learning_rate": 1.9323266334770547e-05, "loss": 2.1507, "step": 21000 }, { "epoch": 0.44, "eval_accuracy": 0.5586485620641218, "eval_loss": 1.9948058128356934, "eval_runtime": 16.4321, "eval_samples_per_second": 36.21, "eval_steps_per_second": 1.156, "step": 21000 }, { "epoch": 0.44, "grad_norm": 0.4453125, "learning_rate": 1.93133902328225e-05, "loss": 2.1003, "step": 21010 }, { "epoch": 0.44, "grad_norm": 0.421875, "learning_rate": 1.9303512092043982e-05, "loss": 2.1321, "step": 21020 }, { "epoch": 0.44, "grad_norm": 0.458984375, "learning_rate": 1.9293631917104143e-05, "loss": 2.1093, "step": 21030 }, { "epoch": 0.44, "grad_norm": 0.44140625, "learning_rate": 1.928374971267309e-05, "loss": 2.1293, "step": 21040 }, { "epoch": 0.44, "grad_norm": 0.439453125, "learning_rate": 1.927386548342189e-05, "loss": 2.1194, "step": 21050 }, { "epoch": 0.44, "grad_norm": 0.58203125, "learning_rate": 1.9263979234022565e-05, "loss": 2.1374, "step": 21060 }, { "epoch": 0.44, "grad_norm": 0.44921875, "learning_rate": 1.9254090969148103e-05, "loss": 2.0812, "step": 21070 }, { "epoch": 0.44, "grad_norm": 0.435546875, "learning_rate": 1.924420069347243e-05, "loss": 2.0827, "step": 21080 }, { "epoch": 0.44, "grad_norm": 0.427734375, "learning_rate": 1.9234308411670435e-05, "loss": 2.1191, "step": 21090 }, { "epoch": 0.44, "grad_norm": 0.453125, "learning_rate": 1.9224414128417944e-05, "loss": 2.1265, "step": 21100 }, { "epoch": 0.44, "grad_norm": 0.4453125, "learning_rate": 1.9214517848391737e-05, "loss": 2.1404, "step": 21110 }, { "epoch": 0.44, "grad_norm": 0.43359375, "learning_rate": 1.9204619576269532e-05, "loss": 2.0673, "step": 21120 }, { "epoch": 0.44, "grad_norm": 0.45703125, "learning_rate": 1.919471931673e-05, "loss": 2.1161, "step": 21130 }, { "epoch": 0.44, "grad_norm": 0.43359375, "learning_rate": 1.9184817074452735e-05, "loss": 2.1555, "step": 21140 }, { "epoch": 0.44, "grad_norm": 0.42578125, "learning_rate": 1.917491285411828e-05, "loss": 2.0993, "step": 21150 }, { "epoch": 0.44, "grad_norm": 0.435546875, "learning_rate": 1.916500666040811e-05, "loss": 2.0877, "step": 21160 }, { "epoch": 0.44, "grad_norm": 0.43359375, "learning_rate": 1.9155098498004632e-05, "loss": 2.1046, "step": 21170 }, { "epoch": 0.44, "grad_norm": 0.416015625, "learning_rate": 1.9145188371591184e-05, "loss": 2.1036, "step": 21180 }, { "epoch": 0.44, "grad_norm": 0.455078125, "learning_rate": 1.9135276285852028e-05, "loss": 2.1242, "step": 21190 }, { "epoch": 0.44, "grad_norm": 0.451171875, "learning_rate": 1.9125362245472358e-05, "loss": 2.1173, "step": 21200 }, { "epoch": 0.44, "grad_norm": 0.4375, "learning_rate": 1.9115446255138296e-05, "loss": 2.1332, "step": 21210 }, { "epoch": 0.44, "grad_norm": 0.455078125, "learning_rate": 1.9105528319536873e-05, "loss": 2.1088, "step": 21220 }, { "epoch": 0.44, "grad_norm": 0.43359375, "learning_rate": 1.9095608443356048e-05, "loss": 2.1155, "step": 21230 }, { "epoch": 0.44, "grad_norm": 0.45703125, "learning_rate": 1.9085686631284702e-05, "loss": 2.1532, "step": 21240 }, { "epoch": 0.44, "grad_norm": 0.427734375, "learning_rate": 1.9075762888012612e-05, "loss": 2.1166, "step": 21250 }, { "epoch": 0.44, "grad_norm": 0.431640625, "learning_rate": 1.9065837218230488e-05, "loss": 2.1597, "step": 21260 }, { "epoch": 0.44, "grad_norm": 0.439453125, "learning_rate": 1.9055909626629946e-05, "loss": 2.1097, "step": 21270 }, { "epoch": 0.44, "grad_norm": 0.439453125, "learning_rate": 1.9045980117903502e-05, "loss": 2.1096, "step": 21280 }, { "epoch": 0.44, "grad_norm": 0.453125, "learning_rate": 1.9036048696744583e-05, "loss": 2.1596, "step": 21290 }, { "epoch": 0.44, "grad_norm": 0.431640625, "learning_rate": 1.9026115367847525e-05, "loss": 2.1004, "step": 21300 }, { "epoch": 0.44, "grad_norm": 0.4375, "learning_rate": 1.9016180135907563e-05, "loss": 2.1107, "step": 21310 }, { "epoch": 0.44, "grad_norm": 0.44921875, "learning_rate": 1.9006243005620825e-05, "loss": 2.125, "step": 21320 }, { "epoch": 0.44, "grad_norm": 0.4609375, "learning_rate": 1.899630398168434e-05, "loss": 2.1179, "step": 21330 }, { "epoch": 0.44, "grad_norm": 0.447265625, "learning_rate": 1.8986363068796037e-05, "loss": 2.0901, "step": 21340 }, { "epoch": 0.44, "grad_norm": 0.416015625, "learning_rate": 1.8976420271654733e-05, "loss": 2.0892, "step": 21350 }, { "epoch": 0.44, "grad_norm": 0.474609375, "learning_rate": 1.8966475594960137e-05, "loss": 2.1162, "step": 21360 }, { "epoch": 0.44, "grad_norm": 0.453125, "learning_rate": 1.8956529043412844e-05, "loss": 2.1425, "step": 21370 }, { "epoch": 0.44, "grad_norm": 0.455078125, "learning_rate": 1.894658062171434e-05, "loss": 2.1121, "step": 21380 }, { "epoch": 0.44, "grad_norm": 0.46484375, "learning_rate": 1.893663033456699e-05, "loss": 2.1301, "step": 21390 }, { "epoch": 0.44, "grad_norm": 0.423828125, "learning_rate": 1.8926678186674047e-05, "loss": 2.1007, "step": 21400 }, { "epoch": 0.45, "grad_norm": 0.435546875, "learning_rate": 1.8916724182739637e-05, "loss": 2.1118, "step": 21410 }, { "epoch": 0.45, "grad_norm": 0.42578125, "learning_rate": 1.8906768327468767e-05, "loss": 2.1399, "step": 21420 }, { "epoch": 0.45, "grad_norm": 0.431640625, "learning_rate": 1.8896810625567316e-05, "loss": 2.1572, "step": 21430 }, { "epoch": 0.45, "grad_norm": 0.4453125, "learning_rate": 1.888685108174204e-05, "loss": 2.1083, "step": 21440 }, { "epoch": 0.45, "grad_norm": 0.4453125, "learning_rate": 1.887688970070056e-05, "loss": 2.1104, "step": 21450 }, { "epoch": 0.45, "grad_norm": 0.4296875, "learning_rate": 1.8866926487151374e-05, "loss": 2.131, "step": 21460 }, { "epoch": 0.45, "grad_norm": 0.455078125, "learning_rate": 1.8856961445803834e-05, "loss": 2.1342, "step": 21470 }, { "epoch": 0.45, "grad_norm": 0.470703125, "learning_rate": 1.8846994581368166e-05, "loss": 2.118, "step": 21480 }, { "epoch": 0.45, "grad_norm": 0.43359375, "learning_rate": 1.8837025898555462e-05, "loss": 2.1387, "step": 21490 }, { "epoch": 0.45, "grad_norm": 0.458984375, "learning_rate": 1.8827055402077655e-05, "loss": 2.1043, "step": 21500 }, { "epoch": 0.45, "eval_accuracy": 0.5587159203857496, "eval_loss": 1.994558572769165, "eval_runtime": 16.4208, "eval_samples_per_second": 36.234, "eval_steps_per_second": 1.157, "step": 21500 }, { "epoch": 0.45, "grad_norm": 0.44921875, "learning_rate": 1.8817083096647557e-05, "loss": 2.1209, "step": 21510 }, { "epoch": 0.45, "grad_norm": 0.43359375, "learning_rate": 1.8807108986978816e-05, "loss": 2.1143, "step": 21520 }, { "epoch": 0.45, "grad_norm": 0.44140625, "learning_rate": 1.8797133077785943e-05, "loss": 2.1108, "step": 21530 }, { "epoch": 0.45, "grad_norm": 0.466796875, "learning_rate": 1.8787155373784302e-05, "loss": 2.1291, "step": 21540 }, { "epoch": 0.45, "grad_norm": 0.462890625, "learning_rate": 1.8777175879690098e-05, "loss": 2.1061, "step": 21550 }, { "epoch": 0.45, "grad_norm": 0.4375, "learning_rate": 1.876719460022039e-05, "loss": 2.0808, "step": 21560 }, { "epoch": 0.45, "grad_norm": 0.427734375, "learning_rate": 1.8757211540093073e-05, "loss": 2.122, "step": 21570 }, { "epoch": 0.45, "grad_norm": 0.435546875, "learning_rate": 1.874722670402688e-05, "loss": 2.1077, "step": 21580 }, { "epoch": 0.45, "grad_norm": 0.421875, "learning_rate": 1.8737240096741405e-05, "loss": 2.144, "step": 21590 }, { "epoch": 0.45, "grad_norm": 0.42578125, "learning_rate": 1.8727251722957053e-05, "loss": 2.1605, "step": 21600 }, { "epoch": 0.45, "grad_norm": 0.4375, "learning_rate": 1.8717261587395077e-05, "loss": 2.1178, "step": 21610 }, { "epoch": 0.45, "grad_norm": 0.4375, "learning_rate": 1.870726969477757e-05, "loss": 2.1073, "step": 21620 }, { "epoch": 0.45, "grad_norm": 0.421875, "learning_rate": 1.869727604982744e-05, "loss": 2.1159, "step": 21630 }, { "epoch": 0.45, "grad_norm": 0.431640625, "learning_rate": 1.8687280657268428e-05, "loss": 2.1052, "step": 21640 }, { "epoch": 0.45, "grad_norm": 0.451171875, "learning_rate": 1.8677283521825105e-05, "loss": 2.1352, "step": 21650 }, { "epoch": 0.45, "grad_norm": 0.42578125, "learning_rate": 1.8667284648222872e-05, "loss": 2.1103, "step": 21660 }, { "epoch": 0.45, "grad_norm": 0.4453125, "learning_rate": 1.865728404118793e-05, "loss": 2.092, "step": 21670 }, { "epoch": 0.45, "grad_norm": 0.451171875, "learning_rate": 1.864728170544733e-05, "loss": 2.1351, "step": 21680 }, { "epoch": 0.45, "grad_norm": 0.470703125, "learning_rate": 1.863727764572891e-05, "loss": 2.1087, "step": 21690 }, { "epoch": 0.45, "grad_norm": 0.4453125, "learning_rate": 1.8627271866761335e-05, "loss": 2.1062, "step": 21700 }, { "epoch": 0.45, "grad_norm": 0.431640625, "learning_rate": 1.861726437327409e-05, "loss": 2.114, "step": 21710 }, { "epoch": 0.45, "grad_norm": 0.42578125, "learning_rate": 1.860725516999746e-05, "loss": 2.1113, "step": 21720 }, { "epoch": 0.45, "grad_norm": 0.486328125, "learning_rate": 1.8597244261662547e-05, "loss": 2.1008, "step": 21730 }, { "epoch": 0.45, "grad_norm": 0.76171875, "learning_rate": 1.8587231653001252e-05, "loss": 2.1187, "step": 21740 }, { "epoch": 0.45, "grad_norm": 0.4375, "learning_rate": 1.857721734874628e-05, "loss": 2.1457, "step": 21750 }, { "epoch": 0.45, "grad_norm": 0.439453125, "learning_rate": 1.8567201353631143e-05, "loss": 2.0929, "step": 21760 }, { "epoch": 0.45, "grad_norm": 0.431640625, "learning_rate": 1.8557183672390142e-05, "loss": 2.1049, "step": 21770 }, { "epoch": 0.45, "grad_norm": 0.443359375, "learning_rate": 1.854716430975839e-05, "loss": 2.1545, "step": 21780 }, { "epoch": 0.45, "grad_norm": 0.443359375, "learning_rate": 1.853714327047178e-05, "loss": 2.1205, "step": 21790 }, { "epoch": 0.45, "grad_norm": 0.46484375, "learning_rate": 1.8527120559267007e-05, "loss": 2.1452, "step": 21800 }, { "epoch": 0.45, "grad_norm": 0.43359375, "learning_rate": 1.8517096180881557e-05, "loss": 2.1181, "step": 21810 }, { "epoch": 0.45, "grad_norm": 0.458984375, "learning_rate": 1.8507070140053688e-05, "loss": 2.1106, "step": 21820 }, { "epoch": 0.45, "grad_norm": 0.447265625, "learning_rate": 1.8497042441522464e-05, "loss": 2.109, "step": 21830 }, { "epoch": 0.45, "grad_norm": 0.453125, "learning_rate": 1.8487013090027727e-05, "loss": 2.144, "step": 21840 }, { "epoch": 0.45, "grad_norm": 0.427734375, "learning_rate": 1.8476982090310094e-05, "loss": 2.1149, "step": 21850 }, { "epoch": 0.45, "grad_norm": 0.423828125, "learning_rate": 1.8466949447110964e-05, "loss": 2.1142, "step": 21860 }, { "epoch": 0.45, "grad_norm": 0.4296875, "learning_rate": 1.8456915165172513e-05, "loss": 2.1116, "step": 21870 }, { "epoch": 0.45, "grad_norm": 0.4296875, "learning_rate": 1.8446879249237693e-05, "loss": 2.1393, "step": 21880 }, { "epoch": 0.46, "grad_norm": 0.4375, "learning_rate": 1.8436841704050237e-05, "loss": 2.1317, "step": 21890 }, { "epoch": 0.46, "grad_norm": 0.439453125, "learning_rate": 1.8426802534354624e-05, "loss": 2.0958, "step": 21900 }, { "epoch": 0.46, "grad_norm": 0.44140625, "learning_rate": 1.8416761744896125e-05, "loss": 2.1215, "step": 21910 }, { "epoch": 0.46, "grad_norm": 0.455078125, "learning_rate": 1.8406719340420766e-05, "loss": 2.1058, "step": 21920 }, { "epoch": 0.46, "grad_norm": 0.4453125, "learning_rate": 1.8396675325675333e-05, "loss": 2.0909, "step": 21930 }, { "epoch": 0.46, "grad_norm": 0.478515625, "learning_rate": 1.8386629705407382e-05, "loss": 2.0984, "step": 21940 }, { "epoch": 0.46, "grad_norm": 0.44140625, "learning_rate": 1.8376582484365224e-05, "loss": 2.1219, "step": 21950 }, { "epoch": 0.46, "grad_norm": 0.443359375, "learning_rate": 1.836653366729792e-05, "loss": 2.102, "step": 21960 }, { "epoch": 0.46, "grad_norm": 0.427734375, "learning_rate": 1.8356483258955302e-05, "loss": 2.127, "step": 21970 }, { "epoch": 0.46, "grad_norm": 0.41796875, "learning_rate": 1.8346431264087935e-05, "loss": 2.1234, "step": 21980 }, { "epoch": 0.46, "grad_norm": 0.419921875, "learning_rate": 1.8336377687447146e-05, "loss": 2.1243, "step": 21990 }, { "epoch": 0.46, "grad_norm": 0.4296875, "learning_rate": 1.832632253378501e-05, "loss": 2.0864, "step": 22000 }, { "epoch": 0.46, "eval_accuracy": 0.5587339921305766, "eval_loss": 1.994472622871399, "eval_runtime": 16.4414, "eval_samples_per_second": 36.189, "eval_steps_per_second": 1.156, "step": 22000 }, { "epoch": 0.46, "grad_norm": 0.4296875, "learning_rate": 1.831626580785434e-05, "loss": 2.1219, "step": 22010 }, { "epoch": 0.46, "grad_norm": 0.458984375, "learning_rate": 1.830620751440869e-05, "loss": 2.0869, "step": 22020 }, { "epoch": 0.46, "grad_norm": 0.423828125, "learning_rate": 1.8296147658202372e-05, "loss": 2.1194, "step": 22030 }, { "epoch": 0.46, "grad_norm": 0.416015625, "learning_rate": 1.828608624399042e-05, "loss": 2.1369, "step": 22040 }, { "epoch": 0.46, "grad_norm": 0.443359375, "learning_rate": 1.8276023276528612e-05, "loss": 2.1218, "step": 22050 }, { "epoch": 0.46, "grad_norm": 0.41796875, "learning_rate": 1.8265958760573458e-05, "loss": 2.1488, "step": 22060 }, { "epoch": 0.46, "grad_norm": 0.4609375, "learning_rate": 1.8255892700882193e-05, "loss": 2.1405, "step": 22070 }, { "epoch": 0.46, "grad_norm": 0.451171875, "learning_rate": 1.8245825102212803e-05, "loss": 2.141, "step": 22080 }, { "epoch": 0.46, "grad_norm": 0.431640625, "learning_rate": 1.823575596932398e-05, "loss": 2.1167, "step": 22090 }, { "epoch": 0.46, "grad_norm": 0.4453125, "learning_rate": 1.8225685306975135e-05, "loss": 2.1252, "step": 22100 }, { "epoch": 0.46, "grad_norm": 0.44140625, "learning_rate": 1.821561311992644e-05, "loss": 2.1109, "step": 22110 }, { "epoch": 0.46, "grad_norm": 0.419921875, "learning_rate": 1.8205539412938747e-05, "loss": 2.1224, "step": 22120 }, { "epoch": 0.46, "grad_norm": 0.4453125, "learning_rate": 1.8195464190773643e-05, "loss": 2.1245, "step": 22130 }, { "epoch": 0.46, "grad_norm": 0.462890625, "learning_rate": 1.818538745819344e-05, "loss": 2.1176, "step": 22140 }, { "epoch": 0.46, "grad_norm": 0.46875, "learning_rate": 1.8175309219961148e-05, "loss": 2.1291, "step": 22150 }, { "epoch": 0.46, "grad_norm": 0.44140625, "learning_rate": 1.8165229480840495e-05, "loss": 2.1285, "step": 22160 }, { "epoch": 0.46, "grad_norm": 0.46875, "learning_rate": 1.8155148245595925e-05, "loss": 2.1231, "step": 22170 }, { "epoch": 0.46, "grad_norm": 0.423828125, "learning_rate": 1.814506551899257e-05, "loss": 2.1233, "step": 22180 }, { "epoch": 0.46, "grad_norm": 0.47265625, "learning_rate": 1.8134981305796295e-05, "loss": 2.1611, "step": 22190 }, { "epoch": 0.46, "grad_norm": 0.42578125, "learning_rate": 1.8124895610773645e-05, "loss": 2.127, "step": 22200 }, { "epoch": 0.46, "grad_norm": 0.439453125, "learning_rate": 1.811480843869187e-05, "loss": 2.1425, "step": 22210 }, { "epoch": 0.46, "grad_norm": 0.44921875, "learning_rate": 1.8104719794318924e-05, "loss": 2.1499, "step": 22220 }, { "epoch": 0.46, "grad_norm": 0.470703125, "learning_rate": 1.8094629682423455e-05, "loss": 2.1263, "step": 22230 }, { "epoch": 0.46, "grad_norm": 0.435546875, "learning_rate": 1.8084538107774805e-05, "loss": 2.1535, "step": 22240 }, { "epoch": 0.46, "grad_norm": 0.447265625, "learning_rate": 1.8074445075143004e-05, "loss": 2.0977, "step": 22250 }, { "epoch": 0.46, "grad_norm": 0.419921875, "learning_rate": 1.806435058929877e-05, "loss": 2.0719, "step": 22260 }, { "epoch": 0.46, "grad_norm": 0.43359375, "learning_rate": 1.805425465501352e-05, "loss": 2.1091, "step": 22270 }, { "epoch": 0.46, "grad_norm": 0.421875, "learning_rate": 1.8044157277059342e-05, "loss": 2.099, "step": 22280 }, { "epoch": 0.46, "grad_norm": 0.466796875, "learning_rate": 1.8034058460209e-05, "loss": 2.081, "step": 22290 }, { "epoch": 0.46, "grad_norm": 0.451171875, "learning_rate": 1.802395820923597e-05, "loss": 2.1037, "step": 22300 }, { "epoch": 0.46, "grad_norm": 0.431640625, "learning_rate": 1.8013856528914376e-05, "loss": 2.1107, "step": 22310 }, { "epoch": 0.46, "grad_norm": 0.443359375, "learning_rate": 1.8003753424019025e-05, "loss": 2.0914, "step": 22320 }, { "epoch": 0.46, "grad_norm": 0.435546875, "learning_rate": 1.79936488993254e-05, "loss": 2.104, "step": 22330 }, { "epoch": 0.46, "grad_norm": 0.5234375, "learning_rate": 1.798354295960965e-05, "loss": 2.1512, "step": 22340 }, { "epoch": 0.46, "grad_norm": 0.4375, "learning_rate": 1.7973435609648605e-05, "loss": 2.1104, "step": 22350 }, { "epoch": 0.46, "grad_norm": 0.4375, "learning_rate": 1.796332685421975e-05, "loss": 2.1172, "step": 22360 }, { "epoch": 0.47, "grad_norm": 0.447265625, "learning_rate": 1.7953216698101238e-05, "loss": 2.0917, "step": 22370 }, { "epoch": 0.47, "grad_norm": 0.44140625, "learning_rate": 1.7943105146071887e-05, "loss": 2.0962, "step": 22380 }, { "epoch": 0.47, "grad_norm": 0.4609375, "learning_rate": 1.7932992202911165e-05, "loss": 2.1425, "step": 22390 }, { "epoch": 0.47, "grad_norm": 0.42578125, "learning_rate": 1.792287787339921e-05, "loss": 2.107, "step": 22400 }, { "epoch": 0.47, "grad_norm": 0.43359375, "learning_rate": 1.7912762162316807e-05, "loss": 2.0787, "step": 22410 }, { "epoch": 0.47, "grad_norm": 0.435546875, "learning_rate": 1.7902645074445404e-05, "loss": 2.1219, "step": 22420 }, { "epoch": 0.47, "grad_norm": 0.4296875, "learning_rate": 1.7892526614567082e-05, "loss": 2.1216, "step": 22430 }, { "epoch": 0.47, "grad_norm": 0.419921875, "learning_rate": 1.788240678746459e-05, "loss": 2.1209, "step": 22440 }, { "epoch": 0.47, "grad_norm": 0.478515625, "learning_rate": 1.7872285597921303e-05, "loss": 2.1028, "step": 22450 }, { "epoch": 0.47, "grad_norm": 0.4609375, "learning_rate": 1.7862163050721263e-05, "loss": 2.1223, "step": 22460 }, { "epoch": 0.47, "grad_norm": 0.423828125, "learning_rate": 1.7852039150649138e-05, "loss": 2.1065, "step": 22470 }, { "epoch": 0.47, "grad_norm": 0.451171875, "learning_rate": 1.784191390249024e-05, "loss": 2.1044, "step": 22480 }, { "epoch": 0.47, "grad_norm": 0.5, "learning_rate": 1.7831787311030514e-05, "loss": 2.1274, "step": 22490 }, { "epoch": 0.47, "grad_norm": 0.431640625, "learning_rate": 1.782165938105655e-05, "loss": 2.1074, "step": 22500 }, { "epoch": 0.47, "eval_accuracy": 0.5587454923318301, "eval_loss": 1.9943138360977173, "eval_runtime": 16.4368, "eval_samples_per_second": 36.199, "eval_steps_per_second": 1.156, "step": 22500 }, { "epoch": 0.47, "grad_norm": 0.447265625, "learning_rate": 1.7811530117355557e-05, "loss": 2.1378, "step": 22510 }, { "epoch": 0.47, "grad_norm": 0.435546875, "learning_rate": 1.7801399524715385e-05, "loss": 2.1277, "step": 22520 }, { "epoch": 0.47, "grad_norm": 0.4765625, "learning_rate": 1.779126760792451e-05, "loss": 2.1212, "step": 22530 }, { "epoch": 0.47, "grad_norm": 0.439453125, "learning_rate": 1.7781134371772035e-05, "loss": 2.1201, "step": 22540 }, { "epoch": 0.47, "grad_norm": 0.421875, "learning_rate": 1.777099982104768e-05, "loss": 2.1087, "step": 22550 }, { "epoch": 0.47, "grad_norm": 0.439453125, "learning_rate": 1.7760863960541796e-05, "loss": 2.1147, "step": 22560 }, { "epoch": 0.47, "grad_norm": 0.44140625, "learning_rate": 1.7750726795045345e-05, "loss": 2.1307, "step": 22570 }, { "epoch": 0.47, "grad_norm": 0.4375, "learning_rate": 1.7740588329349905e-05, "loss": 2.1069, "step": 22580 }, { "epoch": 0.47, "grad_norm": 0.4296875, "learning_rate": 1.7730448568247672e-05, "loss": 2.1233, "step": 22590 }, { "epoch": 0.47, "grad_norm": 0.427734375, "learning_rate": 1.772030751653146e-05, "loss": 2.1263, "step": 22600 }, { "epoch": 0.47, "grad_norm": 0.4375, "learning_rate": 1.7710165178994694e-05, "loss": 2.1318, "step": 22610 }, { "epoch": 0.47, "grad_norm": 0.4453125, "learning_rate": 1.770002156043138e-05, "loss": 2.0761, "step": 22620 }, { "epoch": 0.47, "grad_norm": 0.44140625, "learning_rate": 1.7689876665636172e-05, "loss": 2.0984, "step": 22630 }, { "epoch": 0.47, "grad_norm": 0.4453125, "learning_rate": 1.7679730499404288e-05, "loss": 2.1114, "step": 22640 }, { "epoch": 0.47, "grad_norm": 0.443359375, "learning_rate": 1.7669583066531576e-05, "loss": 2.1403, "step": 22650 }, { "epoch": 0.47, "grad_norm": 0.42578125, "learning_rate": 1.765943437181447e-05, "loss": 2.1127, "step": 22660 }, { "epoch": 0.47, "grad_norm": 0.435546875, "learning_rate": 1.7649284420049995e-05, "loss": 2.1271, "step": 22670 }, { "epoch": 0.47, "grad_norm": 0.455078125, "learning_rate": 1.7639133216035776e-05, "loss": 2.1399, "step": 22680 }, { "epoch": 0.47, "grad_norm": 0.439453125, "learning_rate": 1.762898076457004e-05, "loss": 2.0878, "step": 22690 }, { "epoch": 0.47, "grad_norm": 0.41796875, "learning_rate": 1.7618827070451578e-05, "loss": 2.1376, "step": 22700 }, { "epoch": 0.47, "grad_norm": 0.44140625, "learning_rate": 1.76086721384798e-05, "loss": 2.127, "step": 22710 }, { "epoch": 0.47, "grad_norm": 0.416015625, "learning_rate": 1.7598515973454677e-05, "loss": 2.1008, "step": 22720 }, { "epoch": 0.47, "grad_norm": 0.423828125, "learning_rate": 1.7588358580176777e-05, "loss": 2.1554, "step": 22730 }, { "epoch": 0.47, "grad_norm": 0.431640625, "learning_rate": 1.757819996344724e-05, "loss": 2.1145, "step": 22740 }, { "epoch": 0.47, "grad_norm": 0.44140625, "learning_rate": 1.756804012806778e-05, "loss": 2.1001, "step": 22750 }, { "epoch": 0.47, "grad_norm": 0.42578125, "learning_rate": 1.755787907884071e-05, "loss": 2.115, "step": 22760 }, { "epoch": 0.47, "grad_norm": 0.44921875, "learning_rate": 1.7547716820568892e-05, "loss": 2.1515, "step": 22770 }, { "epoch": 0.47, "grad_norm": 0.443359375, "learning_rate": 1.7537553358055766e-05, "loss": 2.0965, "step": 22780 }, { "epoch": 0.47, "grad_norm": 0.421875, "learning_rate": 1.7527388696105345e-05, "loss": 2.0928, "step": 22790 }, { "epoch": 0.47, "grad_norm": 0.453125, "learning_rate": 1.751722283952221e-05, "loss": 2.1073, "step": 22800 }, { "epoch": 0.47, "grad_norm": 0.45703125, "learning_rate": 1.7507055793111505e-05, "loss": 2.1112, "step": 22810 }, { "epoch": 0.47, "grad_norm": 0.421875, "learning_rate": 1.7496887561678934e-05, "loss": 2.1483, "step": 22820 }, { "epoch": 0.47, "grad_norm": 0.4296875, "learning_rate": 1.748671815003076e-05, "loss": 2.1108, "step": 22830 }, { "epoch": 0.47, "grad_norm": 0.443359375, "learning_rate": 1.7476547562973814e-05, "loss": 2.1068, "step": 22840 }, { "epoch": 0.48, "grad_norm": 0.59765625, "learning_rate": 1.7466375805315468e-05, "loss": 2.1319, "step": 22850 }, { "epoch": 0.48, "grad_norm": 0.423828125, "learning_rate": 1.7456202881863656e-05, "loss": 2.1146, "step": 22860 }, { "epoch": 0.48, "grad_norm": 0.42578125, "learning_rate": 1.7446028797426864e-05, "loss": 2.114, "step": 22870 }, { "epoch": 0.48, "grad_norm": 0.455078125, "learning_rate": 1.7435853556814128e-05, "loss": 2.1013, "step": 22880 }, { "epoch": 0.48, "grad_norm": 0.45703125, "learning_rate": 1.7425677164835012e-05, "loss": 2.0942, "step": 22890 }, { "epoch": 0.48, "grad_norm": 0.419921875, "learning_rate": 1.741549962629966e-05, "loss": 2.1451, "step": 22900 }, { "epoch": 0.48, "grad_norm": 0.42578125, "learning_rate": 1.7405320946018724e-05, "loss": 2.1002, "step": 22910 }, { "epoch": 0.48, "grad_norm": 0.43359375, "learning_rate": 1.7395141128803405e-05, "loss": 2.104, "step": 22920 }, { "epoch": 0.48, "grad_norm": 0.4296875, "learning_rate": 1.738496017946546e-05, "loss": 2.092, "step": 22930 }, { "epoch": 0.48, "grad_norm": 0.42578125, "learning_rate": 1.737477810281715e-05, "loss": 2.1153, "step": 22940 }, { "epoch": 0.48, "grad_norm": 0.443359375, "learning_rate": 1.73645949036713e-05, "loss": 2.1225, "step": 22950 }, { "epoch": 0.48, "grad_norm": 0.427734375, "learning_rate": 1.735441058684124e-05, "loss": 2.0868, "step": 22960 }, { "epoch": 0.48, "grad_norm": 0.8046875, "learning_rate": 1.7344225157140838e-05, "loss": 2.1242, "step": 22970 }, { "epoch": 0.48, "grad_norm": 0.45703125, "learning_rate": 1.73340386193845e-05, "loss": 2.1192, "step": 22980 }, { "epoch": 0.48, "grad_norm": 0.427734375, "learning_rate": 1.7323850978387136e-05, "loss": 2.1318, "step": 22990 }, { "epoch": 0.48, "grad_norm": 0.435546875, "learning_rate": 1.7313662238964186e-05, "loss": 2.0858, "step": 23000 }, { "epoch": 0.48, "eval_accuracy": 0.5589574246120735, "eval_loss": 1.9941881895065308, "eval_runtime": 16.438, "eval_samples_per_second": 36.197, "eval_steps_per_second": 1.156, "step": 23000 }, { "epoch": 0.48, "grad_norm": 0.419921875, "learning_rate": 1.7303472405931614e-05, "loss": 2.1364, "step": 23010 }, { "epoch": 0.48, "grad_norm": 0.419921875, "learning_rate": 1.7293281484105893e-05, "loss": 2.1271, "step": 23020 }, { "epoch": 0.48, "grad_norm": 0.43359375, "learning_rate": 1.7283089478304015e-05, "loss": 2.108, "step": 23030 }, { "epoch": 0.48, "grad_norm": 0.45703125, "learning_rate": 1.7272896393343487e-05, "loss": 2.1292, "step": 23040 }, { "epoch": 0.48, "grad_norm": 0.4609375, "learning_rate": 1.7262702234042317e-05, "loss": 2.0969, "step": 23050 }, { "epoch": 0.48, "grad_norm": 0.44140625, "learning_rate": 1.725250700521903e-05, "loss": 2.1248, "step": 23060 }, { "epoch": 0.48, "grad_norm": 0.443359375, "learning_rate": 1.7242310711692647e-05, "loss": 2.0792, "step": 23070 }, { "epoch": 0.48, "grad_norm": 0.4453125, "learning_rate": 1.7232113358282705e-05, "loss": 2.1065, "step": 23080 }, { "epoch": 0.48, "grad_norm": 0.43359375, "learning_rate": 1.7221914949809232e-05, "loss": 2.1182, "step": 23090 }, { "epoch": 0.48, "grad_norm": 0.427734375, "learning_rate": 1.721171549109276e-05, "loss": 2.1064, "step": 23100 }, { "epoch": 0.48, "grad_norm": 0.453125, "learning_rate": 1.720151498695431e-05, "loss": 2.0981, "step": 23110 }, { "epoch": 0.48, "grad_norm": 0.427734375, "learning_rate": 1.7191313442215405e-05, "loss": 2.1122, "step": 23120 }, { "epoch": 0.48, "grad_norm": 0.431640625, "learning_rate": 1.7181110861698057e-05, "loss": 2.1262, "step": 23130 }, { "epoch": 0.48, "grad_norm": 0.423828125, "learning_rate": 1.7170907250224772e-05, "loss": 2.1333, "step": 23140 }, { "epoch": 0.48, "grad_norm": 0.421875, "learning_rate": 1.7160702612618536e-05, "loss": 2.1046, "step": 23150 }, { "epoch": 0.48, "grad_norm": 0.421875, "learning_rate": 1.715049695370282e-05, "loss": 2.1154, "step": 23160 }, { "epoch": 0.48, "grad_norm": 0.42578125, "learning_rate": 1.7140290278301584e-05, "loss": 2.1566, "step": 23170 }, { "epoch": 0.48, "grad_norm": 0.431640625, "learning_rate": 1.7130082591239265e-05, "loss": 2.1008, "step": 23180 }, { "epoch": 0.48, "grad_norm": 0.4453125, "learning_rate": 1.711987389734078e-05, "loss": 2.1029, "step": 23190 }, { "epoch": 0.48, "grad_norm": 0.431640625, "learning_rate": 1.7109664201431517e-05, "loss": 2.1031, "step": 23200 }, { "epoch": 0.48, "grad_norm": 0.431640625, "learning_rate": 1.709945350833734e-05, "loss": 2.1214, "step": 23210 }, { "epoch": 0.48, "grad_norm": 0.443359375, "learning_rate": 1.708924182288459e-05, "loss": 2.1069, "step": 23220 }, { "epoch": 0.48, "grad_norm": 0.455078125, "learning_rate": 1.7079029149900074e-05, "loss": 2.0887, "step": 23230 }, { "epoch": 0.48, "grad_norm": 0.4375, "learning_rate": 1.7068815494211056e-05, "loss": 2.1217, "step": 23240 }, { "epoch": 0.48, "grad_norm": 0.4375, "learning_rate": 1.7058600860645278e-05, "loss": 2.111, "step": 23250 }, { "epoch": 0.48, "grad_norm": 0.42578125, "learning_rate": 1.7048385254030944e-05, "loss": 2.1083, "step": 23260 }, { "epoch": 0.48, "grad_norm": 0.51171875, "learning_rate": 1.70381686791967e-05, "loss": 2.1459, "step": 23270 }, { "epoch": 0.48, "grad_norm": 0.4921875, "learning_rate": 1.7027951140971676e-05, "loss": 2.1245, "step": 23280 }, { "epoch": 0.48, "grad_norm": 0.431640625, "learning_rate": 1.701773264418544e-05, "loss": 2.0804, "step": 23290 }, { "epoch": 0.48, "grad_norm": 0.45703125, "learning_rate": 1.7007513193668003e-05, "loss": 2.1357, "step": 23300 }, { "epoch": 0.48, "grad_norm": 0.435546875, "learning_rate": 1.699729279424986e-05, "loss": 2.1199, "step": 23310 }, { "epoch": 0.48, "grad_norm": 0.43359375, "learning_rate": 1.6987071450761928e-05, "loss": 2.1095, "step": 23320 }, { "epoch": 0.49, "grad_norm": 0.466796875, "learning_rate": 1.6976849168035582e-05, "loss": 2.1155, "step": 23330 }, { "epoch": 0.49, "grad_norm": 0.435546875, "learning_rate": 1.6966625950902625e-05, "loss": 2.1025, "step": 23340 }, { "epoch": 0.49, "grad_norm": 0.4453125, "learning_rate": 1.6956401804195322e-05, "loss": 2.105, "step": 23350 }, { "epoch": 0.49, "grad_norm": 0.47265625, "learning_rate": 1.6946176732746368e-05, "loss": 2.1119, "step": 23360 }, { "epoch": 0.49, "grad_norm": 0.435546875, "learning_rate": 1.693595074138889e-05, "loss": 2.1131, "step": 23370 }, { "epoch": 0.49, "grad_norm": 0.421875, "learning_rate": 1.6925723834956462e-05, "loss": 2.1305, "step": 23380 }, { "epoch": 0.49, "grad_norm": 0.453125, "learning_rate": 1.6915496018283084e-05, "loss": 2.1165, "step": 23390 }, { "epoch": 0.49, "grad_norm": 0.443359375, "learning_rate": 1.6905267296203182e-05, "loss": 2.1396, "step": 23400 }, { "epoch": 0.49, "grad_norm": 0.470703125, "learning_rate": 1.6895037673551616e-05, "loss": 2.1197, "step": 23410 }, { "epoch": 0.49, "grad_norm": 0.5703125, "learning_rate": 1.6884807155163673e-05, "loss": 2.1365, "step": 23420 }, { "epoch": 0.49, "grad_norm": 0.4296875, "learning_rate": 1.6874575745875056e-05, "loss": 2.1316, "step": 23430 }, { "epoch": 0.49, "grad_norm": 0.421875, "learning_rate": 1.6864343450521898e-05, "loss": 2.1134, "step": 23440 }, { "epoch": 0.49, "grad_norm": 0.431640625, "learning_rate": 1.6854110273940737e-05, "loss": 2.0898, "step": 23450 }, { "epoch": 0.49, "grad_norm": 0.435546875, "learning_rate": 1.6843876220968545e-05, "loss": 2.1484, "step": 23460 }, { "epoch": 0.49, "grad_norm": 0.443359375, "learning_rate": 1.68336412964427e-05, "loss": 2.0989, "step": 23470 }, { "epoch": 0.49, "grad_norm": 0.443359375, "learning_rate": 1.682340550520099e-05, "loss": 2.1229, "step": 23480 }, { "epoch": 0.49, "grad_norm": 0.4609375, "learning_rate": 1.6813168852081612e-05, "loss": 2.1308, "step": 23490 }, { "epoch": 0.49, "grad_norm": 0.423828125, "learning_rate": 1.680293134192318e-05, "loss": 2.1178, "step": 23500 }, { "epoch": 0.49, "eval_accuracy": 0.5588112077675645, "eval_loss": 1.9940760135650635, "eval_runtime": 16.4168, "eval_samples_per_second": 36.243, "eval_steps_per_second": 1.157, "step": 23500 }, { "epoch": 0.49, "grad_norm": 0.41796875, "learning_rate": 1.679269297956469e-05, "loss": 2.0734, "step": 23510 }, { "epoch": 0.49, "grad_norm": 0.44921875, "learning_rate": 1.6782453769845578e-05, "loss": 2.1513, "step": 23520 }, { "epoch": 0.49, "grad_norm": 0.42578125, "learning_rate": 1.677221371760565e-05, "loss": 2.1038, "step": 23530 }, { "epoch": 0.49, "grad_norm": 0.43359375, "learning_rate": 1.676197282768512e-05, "loss": 2.101, "step": 23540 }, { "epoch": 0.49, "grad_norm": 0.416015625, "learning_rate": 1.67517311049246e-05, "loss": 2.0985, "step": 23550 }, { "epoch": 0.49, "grad_norm": 0.435546875, "learning_rate": 1.6741488554165085e-05, "loss": 2.1438, "step": 23560 }, { "epoch": 0.49, "grad_norm": 0.4453125, "learning_rate": 1.6731245180247977e-05, "loss": 2.133, "step": 23570 }, { "epoch": 0.49, "grad_norm": 0.443359375, "learning_rate": 1.672100098801506e-05, "loss": 2.1621, "step": 23580 }, { "epoch": 0.49, "grad_norm": 0.447265625, "learning_rate": 1.6710755982308503e-05, "loss": 2.1258, "step": 23590 }, { "epoch": 0.49, "grad_norm": 0.4375, "learning_rate": 1.670051016797086e-05, "loss": 2.1172, "step": 23600 }, { "epoch": 0.49, "grad_norm": 0.478515625, "learning_rate": 1.669026354984507e-05, "loss": 2.1488, "step": 23610 }, { "epoch": 0.49, "grad_norm": 0.51171875, "learning_rate": 1.6680016132774444e-05, "loss": 2.1051, "step": 23620 }, { "epoch": 0.49, "grad_norm": 0.43359375, "learning_rate": 1.6669767921602686e-05, "loss": 2.1017, "step": 23630 }, { "epoch": 0.49, "grad_norm": 0.427734375, "learning_rate": 1.6659518921173864e-05, "loss": 2.114, "step": 23640 }, { "epoch": 0.49, "grad_norm": 0.447265625, "learning_rate": 1.6649269136332418e-05, "loss": 2.1235, "step": 23650 }, { "epoch": 0.49, "grad_norm": 0.44921875, "learning_rate": 1.6639018571923165e-05, "loss": 2.1254, "step": 23660 }, { "epoch": 0.49, "grad_norm": 0.439453125, "learning_rate": 1.662876723279129e-05, "loss": 2.1171, "step": 23670 }, { "epoch": 0.49, "grad_norm": 0.421875, "learning_rate": 1.6618515123782337e-05, "loss": 2.1103, "step": 23680 }, { "epoch": 0.49, "grad_norm": 0.46484375, "learning_rate": 1.6608262249742214e-05, "loss": 2.1078, "step": 23690 }, { "epoch": 0.49, "grad_norm": 0.51953125, "learning_rate": 1.6598008615517204e-05, "loss": 2.1093, "step": 23700 }, { "epoch": 0.49, "grad_norm": 0.43359375, "learning_rate": 1.6587754225953938e-05, "loss": 2.1152, "step": 23710 }, { "epoch": 0.49, "grad_norm": 0.4375, "learning_rate": 1.657749908589941e-05, "loss": 2.1565, "step": 23720 }, { "epoch": 0.49, "grad_norm": 0.4375, "learning_rate": 1.656724320020095e-05, "loss": 2.1195, "step": 23730 }, { "epoch": 0.49, "grad_norm": 0.439453125, "learning_rate": 1.655698657370628e-05, "loss": 2.1436, "step": 23740 }, { "epoch": 0.49, "grad_norm": 0.451171875, "learning_rate": 1.654672921126343e-05, "loss": 2.0986, "step": 23750 }, { "epoch": 0.49, "grad_norm": 0.455078125, "learning_rate": 1.65364711177208e-05, "loss": 2.1014, "step": 23760 }, { "epoch": 0.49, "grad_norm": 0.482421875, "learning_rate": 1.6526212297927133e-05, "loss": 2.1475, "step": 23770 }, { "epoch": 0.49, "grad_norm": 0.4140625, "learning_rate": 1.651595275673151e-05, "loss": 2.1326, "step": 23780 }, { "epoch": 0.49, "grad_norm": 0.42578125, "learning_rate": 1.6505692498983367e-05, "loss": 2.1128, "step": 23790 }, { "epoch": 0.49, "grad_norm": 0.453125, "learning_rate": 1.649543152953246e-05, "loss": 2.1239, "step": 23800 }, { "epoch": 0.5, "grad_norm": 0.419921875, "learning_rate": 1.6485169853228893e-05, "loss": 2.1151, "step": 23810 }, { "epoch": 0.5, "grad_norm": 0.421875, "learning_rate": 1.6474907474923093e-05, "loss": 2.0972, "step": 23820 }, { "epoch": 0.5, "grad_norm": 0.43359375, "learning_rate": 1.6464644399465832e-05, "loss": 2.1181, "step": 23830 }, { "epoch": 0.5, "grad_norm": 0.45703125, "learning_rate": 1.645438063170821e-05, "loss": 2.1397, "step": 23840 }, { "epoch": 0.5, "grad_norm": 0.43359375, "learning_rate": 1.6444116176501643e-05, "loss": 2.0981, "step": 23850 }, { "epoch": 0.5, "grad_norm": 0.427734375, "learning_rate": 1.643385103869789e-05, "loss": 2.1302, "step": 23860 }, { "epoch": 0.5, "grad_norm": 0.427734375, "learning_rate": 1.642358522314901e-05, "loss": 2.1315, "step": 23870 }, { "epoch": 0.5, "grad_norm": 0.431640625, "learning_rate": 1.64133187347074e-05, "loss": 2.1385, "step": 23880 }, { "epoch": 0.5, "grad_norm": 0.44921875, "learning_rate": 1.6403051578225766e-05, "loss": 2.0976, "step": 23890 }, { "epoch": 0.5, "grad_norm": 0.4375, "learning_rate": 1.639278375855714e-05, "loss": 2.1141, "step": 23900 }, { "epoch": 0.5, "grad_norm": 0.439453125, "learning_rate": 1.6382515280554858e-05, "loss": 2.1236, "step": 23910 }, { "epoch": 0.5, "grad_norm": 0.4296875, "learning_rate": 1.6372246149072572e-05, "loss": 2.086, "step": 23920 }, { "epoch": 0.5, "grad_norm": 0.427734375, "learning_rate": 1.6361976368964233e-05, "loss": 2.1219, "step": 23930 }, { "epoch": 0.5, "grad_norm": 0.451171875, "learning_rate": 1.635170594508411e-05, "loss": 2.1005, "step": 23940 }, { "epoch": 0.5, "grad_norm": 0.43359375, "learning_rate": 1.634143488228678e-05, "loss": 2.1232, "step": 23950 }, { "epoch": 0.5, "grad_norm": 0.419921875, "learning_rate": 1.6331163185427106e-05, "loss": 2.1218, "step": 23960 }, { "epoch": 0.5, "grad_norm": 0.4453125, "learning_rate": 1.6320890859360256e-05, "loss": 2.1005, "step": 23970 }, { "epoch": 0.5, "grad_norm": 0.42578125, "learning_rate": 1.6310617908941707e-05, "loss": 2.1096, "step": 23980 }, { "epoch": 0.5, "grad_norm": 0.455078125, "learning_rate": 1.630034433902722e-05, "loss": 2.1064, "step": 23990 }, { "epoch": 0.5, "grad_norm": 0.46484375, "learning_rate": 1.6290070154472853e-05, "loss": 2.1148, "step": 24000 }, { "epoch": 0.5, "eval_accuracy": 0.558804636223991, "eval_loss": 1.993963360786438, "eval_runtime": 16.45, "eval_samples_per_second": 36.17, "eval_steps_per_second": 1.155, "step": 24000 }, { "epoch": 0.5, "grad_norm": 0.4453125, "learning_rate": 1.627979536013495e-05, "loss": 2.0879, "step": 24010 }, { "epoch": 0.5, "grad_norm": 0.4453125, "learning_rate": 1.626951996087015e-05, "loss": 2.1112, "step": 24020 }, { "epoch": 0.5, "grad_norm": 0.423828125, "learning_rate": 1.6259243961535372e-05, "loss": 2.1167, "step": 24030 }, { "epoch": 0.5, "grad_norm": 0.5234375, "learning_rate": 1.6248967366987822e-05, "loss": 2.1219, "step": 24040 }, { "epoch": 0.5, "grad_norm": 0.4140625, "learning_rate": 1.623869018208499e-05, "loss": 2.0934, "step": 24050 }, { "epoch": 0.5, "grad_norm": 0.447265625, "learning_rate": 1.622841241168463e-05, "loss": 2.096, "step": 24060 }, { "epoch": 0.5, "grad_norm": 0.43359375, "learning_rate": 1.6218134060644794e-05, "loss": 2.1385, "step": 24070 }, { "epoch": 0.5, "grad_norm": 0.447265625, "learning_rate": 1.6207855133823797e-05, "loss": 2.0811, "step": 24080 }, { "epoch": 0.5, "grad_norm": 0.431640625, "learning_rate": 1.6197575636080226e-05, "loss": 2.1029, "step": 24090 }, { "epoch": 0.5, "grad_norm": 0.419921875, "learning_rate": 1.618729557227294e-05, "loss": 2.1218, "step": 24100 }, { "epoch": 0.5, "grad_norm": 0.439453125, "learning_rate": 1.6177014947261068e-05, "loss": 2.0949, "step": 24110 }, { "epoch": 0.5, "grad_norm": 0.431640625, "learning_rate": 1.6166733765904e-05, "loss": 2.1215, "step": 24120 }, { "epoch": 0.5, "grad_norm": 0.421875, "learning_rate": 1.6156452033061386e-05, "loss": 2.1416, "step": 24130 }, { "epoch": 0.5, "grad_norm": 0.462890625, "learning_rate": 1.6146169753593145e-05, "loss": 2.1216, "step": 24140 }, { "epoch": 0.5, "grad_norm": 0.44140625, "learning_rate": 1.6135886932359452e-05, "loss": 2.1438, "step": 24150 }, { "epoch": 0.5, "grad_norm": 0.4375, "learning_rate": 1.612560357422073e-05, "loss": 2.1182, "step": 24160 }, { "epoch": 0.5, "grad_norm": 0.42578125, "learning_rate": 1.611531968403767e-05, "loss": 2.1294, "step": 24170 }, { "epoch": 0.5, "grad_norm": 0.427734375, "learning_rate": 1.61050352666712e-05, "loss": 2.1341, "step": 24180 }, { "epoch": 0.5, "grad_norm": 0.474609375, "learning_rate": 1.6094750326982514e-05, "loss": 2.0953, "step": 24190 }, { "epoch": 0.5, "grad_norm": 0.4609375, "learning_rate": 1.6084464869833034e-05, "loss": 2.1101, "step": 24200 }, { "epoch": 0.5, "grad_norm": 0.41796875, "learning_rate": 1.607417890008444e-05, "loss": 2.1149, "step": 24210 }, { "epoch": 0.5, "grad_norm": 0.427734375, "learning_rate": 1.6063892422598644e-05, "loss": 2.1112, "step": 24220 }, { "epoch": 0.5, "grad_norm": 0.4296875, "learning_rate": 1.6053605442237808e-05, "loss": 2.1459, "step": 24230 }, { "epoch": 0.5, "grad_norm": 0.60546875, "learning_rate": 1.6043317963864326e-05, "loss": 2.1287, "step": 24240 }, { "epoch": 0.5, "grad_norm": 0.447265625, "learning_rate": 1.603302999234083e-05, "loss": 2.1058, "step": 24250 }, { "epoch": 0.5, "grad_norm": 0.443359375, "learning_rate": 1.6022741532530184e-05, "loss": 2.1185, "step": 24260 }, { "epoch": 0.5, "grad_norm": 0.451171875, "learning_rate": 1.601245258929548e-05, "loss": 2.041, "step": 24270 }, { "epoch": 0.5, "grad_norm": 0.4375, "learning_rate": 1.6002163167500047e-05, "loss": 2.1076, "step": 24280 }, { "epoch": 0.5, "grad_norm": 0.47265625, "learning_rate": 1.599187327200743e-05, "loss": 2.119, "step": 24290 }, { "epoch": 0.51, "grad_norm": 0.466796875, "learning_rate": 1.59815829076814e-05, "loss": 2.1115, "step": 24300 }, { "epoch": 0.51, "grad_norm": 0.4375, "learning_rate": 1.5971292079385963e-05, "loss": 2.1172, "step": 24310 }, { "epoch": 0.51, "grad_norm": 0.427734375, "learning_rate": 1.596100079198532e-05, "loss": 2.0953, "step": 24320 }, { "epoch": 0.51, "grad_norm": 0.470703125, "learning_rate": 1.5950709050343912e-05, "loss": 2.1304, "step": 24330 }, { "epoch": 0.51, "grad_norm": 0.4453125, "learning_rate": 1.594041685932638e-05, "loss": 2.1117, "step": 24340 }, { "epoch": 0.51, "grad_norm": 0.427734375, "learning_rate": 1.593012422379758e-05, "loss": 2.1197, "step": 24350 }, { "epoch": 0.51, "grad_norm": 0.435546875, "learning_rate": 1.5919831148622584e-05, "loss": 2.1115, "step": 24360 }, { "epoch": 0.51, "grad_norm": 0.4453125, "learning_rate": 1.590953763866667e-05, "loss": 2.1122, "step": 24370 }, { "epoch": 0.51, "grad_norm": 0.4375, "learning_rate": 1.5899243698795317e-05, "loss": 2.1032, "step": 24380 }, { "epoch": 0.51, "grad_norm": 0.462890625, "learning_rate": 1.588894933387421e-05, "loss": 2.1344, "step": 24390 }, { "epoch": 0.51, "grad_norm": 0.447265625, "learning_rate": 1.587865454876924e-05, "loss": 2.1323, "step": 24400 }, { "epoch": 0.51, "grad_norm": 0.45703125, "learning_rate": 1.586835934834648e-05, "loss": 2.1538, "step": 24410 }, { "epoch": 0.51, "grad_norm": 0.453125, "learning_rate": 1.5858063737472222e-05, "loss": 2.1175, "step": 24420 }, { "epoch": 0.51, "grad_norm": 0.44921875, "learning_rate": 1.5847767721012938e-05, "loss": 2.1313, "step": 24430 }, { "epoch": 0.51, "grad_norm": 0.466796875, "learning_rate": 1.58374713038353e-05, "loss": 2.0902, "step": 24440 }, { "epoch": 0.51, "grad_norm": 0.43359375, "learning_rate": 1.5827174490806145e-05, "loss": 2.1322, "step": 24450 }, { "epoch": 0.51, "grad_norm": 0.443359375, "learning_rate": 1.5816877286792533e-05, "loss": 2.1229, "step": 24460 }, { "epoch": 0.51, "grad_norm": 0.427734375, "learning_rate": 1.580657969666169e-05, "loss": 2.0863, "step": 24470 }, { "epoch": 0.51, "grad_norm": 0.447265625, "learning_rate": 1.5796281725281026e-05, "loss": 2.1434, "step": 24480 }, { "epoch": 0.51, "grad_norm": 0.443359375, "learning_rate": 1.5785983377518127e-05, "loss": 2.1387, "step": 24490 }, { "epoch": 0.51, "grad_norm": 0.431640625, "learning_rate": 1.5775684658240764e-05, "loss": 2.1165, "step": 24500 }, { "epoch": 0.51, "eval_accuracy": 0.558840779713645, "eval_loss": 1.9938558340072632, "eval_runtime": 16.4192, "eval_samples_per_second": 36.238, "eval_steps_per_second": 1.157, "step": 24500 }, { "epoch": 0.51, "grad_norm": 0.462890625, "learning_rate": 1.576538557231688e-05, "loss": 2.1023, "step": 24510 }, { "epoch": 0.51, "grad_norm": 0.453125, "learning_rate": 1.5755086124614595e-05, "loss": 2.1266, "step": 24520 }, { "epoch": 0.51, "grad_norm": 0.419921875, "learning_rate": 1.5744786320002197e-05, "loss": 2.1351, "step": 24530 }, { "epoch": 0.51, "grad_norm": 0.419921875, "learning_rate": 1.5734486163348137e-05, "loss": 2.1247, "step": 24540 }, { "epoch": 0.51, "grad_norm": 0.443359375, "learning_rate": 1.5724185659521056e-05, "loss": 2.1155, "step": 24550 }, { "epoch": 0.51, "grad_norm": 0.4609375, "learning_rate": 1.5713884813389722e-05, "loss": 2.1138, "step": 24560 }, { "epoch": 0.51, "grad_norm": 0.421875, "learning_rate": 1.570358362982309e-05, "loss": 2.1336, "step": 24570 }, { "epoch": 0.51, "grad_norm": 0.44140625, "learning_rate": 1.5693282113690275e-05, "loss": 2.1368, "step": 24580 }, { "epoch": 0.51, "grad_norm": 0.439453125, "learning_rate": 1.568298026986054e-05, "loss": 2.1212, "step": 24590 }, { "epoch": 0.51, "grad_norm": 0.451171875, "learning_rate": 1.5672678103203297e-05, "loss": 2.1171, "step": 24600 }, { "epoch": 0.51, "grad_norm": 0.46875, "learning_rate": 1.5662375618588137e-05, "loss": 2.121, "step": 24610 }, { "epoch": 0.51, "grad_norm": 0.482421875, "learning_rate": 1.565207282088477e-05, "loss": 2.1147, "step": 24620 }, { "epoch": 0.51, "grad_norm": 0.435546875, "learning_rate": 1.5641769714963074e-05, "loss": 2.1134, "step": 24630 }, { "epoch": 0.51, "grad_norm": 0.4296875, "learning_rate": 1.5631466305693064e-05, "loss": 2.1189, "step": 24640 }, { "epoch": 0.51, "grad_norm": 0.458984375, "learning_rate": 1.56211625979449e-05, "loss": 2.1255, "step": 24650 }, { "epoch": 0.51, "grad_norm": 0.443359375, "learning_rate": 1.5610858596588888e-05, "loss": 2.0737, "step": 24660 }, { "epoch": 0.51, "grad_norm": 0.439453125, "learning_rate": 1.5600554306495472e-05, "loss": 2.0988, "step": 24670 }, { "epoch": 0.51, "grad_norm": 0.423828125, "learning_rate": 1.559024973253522e-05, "loss": 2.1238, "step": 24680 }, { "epoch": 0.51, "grad_norm": 0.451171875, "learning_rate": 1.557994487957885e-05, "loss": 2.1258, "step": 24690 }, { "epoch": 0.51, "grad_norm": 0.439453125, "learning_rate": 1.55696397524972e-05, "loss": 2.1146, "step": 24700 }, { "epoch": 0.51, "grad_norm": 0.431640625, "learning_rate": 1.5559334356161242e-05, "loss": 2.1377, "step": 24710 }, { "epoch": 0.51, "grad_norm": 0.431640625, "learning_rate": 1.554902869544209e-05, "loss": 2.1513, "step": 24720 }, { "epoch": 0.51, "grad_norm": 0.455078125, "learning_rate": 1.5538722775210955e-05, "loss": 2.0984, "step": 24730 }, { "epoch": 0.51, "grad_norm": 0.45703125, "learning_rate": 1.5528416600339188e-05, "loss": 2.1274, "step": 24740 }, { "epoch": 0.51, "grad_norm": 0.439453125, "learning_rate": 1.551811017569826e-05, "loss": 2.1001, "step": 24750 }, { "epoch": 0.51, "grad_norm": 0.443359375, "learning_rate": 1.5507803506159753e-05, "loss": 2.1188, "step": 24760 }, { "epoch": 0.51, "grad_norm": 0.431640625, "learning_rate": 1.5497496596595375e-05, "loss": 2.1419, "step": 24770 }, { "epoch": 0.52, "grad_norm": 0.69921875, "learning_rate": 1.548718945187694e-05, "loss": 2.0979, "step": 24780 }, { "epoch": 0.52, "grad_norm": 0.443359375, "learning_rate": 1.5476882076876366e-05, "loss": 2.1066, "step": 24790 }, { "epoch": 0.52, "grad_norm": 0.447265625, "learning_rate": 1.54665744764657e-05, "loss": 2.1154, "step": 24800 }, { "epoch": 0.52, "grad_norm": 0.431640625, "learning_rate": 1.545626665551708e-05, "loss": 2.1404, "step": 24810 }, { "epoch": 0.52, "grad_norm": 0.431640625, "learning_rate": 1.5445958618902752e-05, "loss": 2.1155, "step": 24820 }, { "epoch": 0.52, "grad_norm": 0.44921875, "learning_rate": 1.5435650371495066e-05, "loss": 2.1252, "step": 24830 }, { "epoch": 0.52, "grad_norm": 0.439453125, "learning_rate": 1.542534191816646e-05, "loss": 2.1651, "step": 24840 }, { "epoch": 0.52, "grad_norm": 0.4375, "learning_rate": 1.5415033263789493e-05, "loss": 2.1006, "step": 24850 }, { "epoch": 0.52, "grad_norm": 0.462890625, "learning_rate": 1.5404724413236798e-05, "loss": 2.1414, "step": 24860 }, { "epoch": 0.52, "grad_norm": 0.44140625, "learning_rate": 1.5394415371381105e-05, "loss": 2.1228, "step": 24870 }, { "epoch": 0.52, "grad_norm": 0.4296875, "learning_rate": 1.538410614309525e-05, "loss": 2.1602, "step": 24880 }, { "epoch": 0.52, "grad_norm": 0.45703125, "learning_rate": 1.537379673325213e-05, "loss": 2.1302, "step": 24890 }, { "epoch": 0.52, "grad_norm": 0.44140625, "learning_rate": 1.5363487146724742e-05, "loss": 2.1002, "step": 24900 }, { "epoch": 0.52, "grad_norm": 0.44140625, "learning_rate": 1.5353177388386183e-05, "loss": 2.116, "step": 24910 }, { "epoch": 0.52, "grad_norm": 0.43359375, "learning_rate": 1.5342867463109597e-05, "loss": 2.0986, "step": 24920 }, { "epoch": 0.52, "grad_norm": 0.41796875, "learning_rate": 1.533255737576824e-05, "loss": 2.1267, "step": 24930 }, { "epoch": 0.52, "grad_norm": 0.4375, "learning_rate": 1.532224713123542e-05, "loss": 2.0892, "step": 24940 }, { "epoch": 0.52, "grad_norm": 0.423828125, "learning_rate": 1.531193673438453e-05, "loss": 2.1235, "step": 24950 }, { "epoch": 0.52, "grad_norm": 0.439453125, "learning_rate": 1.5301626190089037e-05, "loss": 2.0825, "step": 24960 }, { "epoch": 0.52, "grad_norm": 0.4296875, "learning_rate": 1.5291315503222472e-05, "loss": 2.1363, "step": 24970 }, { "epoch": 0.52, "grad_norm": 0.439453125, "learning_rate": 1.5281004678658436e-05, "loss": 2.1062, "step": 24980 }, { "epoch": 0.52, "grad_norm": 0.44921875, "learning_rate": 1.5270693721270595e-05, "loss": 2.1315, "step": 24990 }, { "epoch": 0.52, "grad_norm": 0.439453125, "learning_rate": 1.526038263593268e-05, "loss": 2.1012, "step": 25000 }, { "epoch": 0.52, "eval_accuracy": 0.5589722105851138, "eval_loss": 1.9938210248947144, "eval_runtime": 16.4402, "eval_samples_per_second": 36.192, "eval_steps_per_second": 1.156, "step": 25000 }, { "epoch": 0.52, "grad_norm": 0.55078125, "learning_rate": 1.5250071427518476e-05, "loss": 2.1071, "step": 25010 }, { "epoch": 0.52, "grad_norm": 0.43359375, "learning_rate": 1.5239760100901834e-05, "loss": 2.1583, "step": 25020 }, { "epoch": 0.52, "grad_norm": 0.447265625, "learning_rate": 1.5229448660956653e-05, "loss": 2.1198, "step": 25030 }, { "epoch": 0.52, "grad_norm": 0.451171875, "learning_rate": 1.5219137112556898e-05, "loss": 2.1065, "step": 25040 }, { "epoch": 0.52, "grad_norm": 0.451171875, "learning_rate": 1.520882546057657e-05, "loss": 2.1428, "step": 25050 }, { "epoch": 0.52, "grad_norm": 0.451171875, "learning_rate": 1.5198513709889732e-05, "loss": 2.138, "step": 25060 }, { "epoch": 0.52, "grad_norm": 0.44921875, "learning_rate": 1.518820186537048e-05, "loss": 2.1452, "step": 25070 }, { "epoch": 0.52, "grad_norm": 0.44140625, "learning_rate": 1.517788993189297e-05, "loss": 2.1486, "step": 25080 }, { "epoch": 0.52, "grad_norm": 0.43359375, "learning_rate": 1.5167577914331383e-05, "loss": 2.1261, "step": 25090 }, { "epoch": 0.52, "grad_norm": 0.421875, "learning_rate": 1.5157265817559957e-05, "loss": 2.0831, "step": 25100 }, { "epoch": 0.52, "grad_norm": 0.4375, "learning_rate": 1.5146953646452954e-05, "loss": 2.1612, "step": 25110 }, { "epoch": 0.52, "grad_norm": 0.439453125, "learning_rate": 1.513664140588468e-05, "loss": 2.0915, "step": 25120 }, { "epoch": 0.52, "grad_norm": 0.453125, "learning_rate": 1.5126329100729472e-05, "loss": 2.0916, "step": 25130 }, { "epoch": 0.52, "grad_norm": 0.47265625, "learning_rate": 1.511601673586169e-05, "loss": 2.1231, "step": 25140 }, { "epoch": 0.52, "grad_norm": 0.431640625, "learning_rate": 1.5105704316155733e-05, "loss": 2.1131, "step": 25150 }, { "epoch": 0.52, "grad_norm": 0.453125, "learning_rate": 1.5095391846486018e-05, "loss": 2.1316, "step": 25160 }, { "epoch": 0.52, "grad_norm": 0.419921875, "learning_rate": 1.5085079331726993e-05, "loss": 2.1119, "step": 25170 }, { "epoch": 0.52, "grad_norm": 0.453125, "learning_rate": 1.5074766776753118e-05, "loss": 2.1289, "step": 25180 }, { "epoch": 0.52, "grad_norm": 0.42578125, "learning_rate": 1.5064454186438876e-05, "loss": 2.105, "step": 25190 }, { "epoch": 0.52, "grad_norm": 0.4375, "learning_rate": 1.5054141565658773e-05, "loss": 2.1189, "step": 25200 }, { "epoch": 0.52, "grad_norm": 0.44921875, "learning_rate": 1.5043828919287322e-05, "loss": 2.1055, "step": 25210 }, { "epoch": 0.52, "grad_norm": 0.453125, "learning_rate": 1.5033516252199047e-05, "loss": 2.1345, "step": 25220 }, { "epoch": 0.52, "grad_norm": 0.48828125, "learning_rate": 1.5023203569268486e-05, "loss": 2.1026, "step": 25230 }, { "epoch": 0.52, "grad_norm": 0.41015625, "learning_rate": 1.501289087537019e-05, "loss": 2.1427, "step": 25240 }, { "epoch": 0.52, "grad_norm": 0.462890625, "learning_rate": 1.5002578175378698e-05, "loss": 2.149, "step": 25250 }, { "epoch": 0.53, "grad_norm": 0.44921875, "learning_rate": 1.4992265474168566e-05, "loss": 2.1098, "step": 25260 }, { "epoch": 0.53, "grad_norm": 0.4296875, "learning_rate": 1.4981952776614357e-05, "loss": 2.0874, "step": 25270 }, { "epoch": 0.53, "grad_norm": 0.423828125, "learning_rate": 1.4971640087590608e-05, "loss": 2.1357, "step": 25280 }, { "epoch": 0.53, "grad_norm": 0.44921875, "learning_rate": 1.4961327411971871e-05, "loss": 2.1393, "step": 25290 }, { "epoch": 0.53, "grad_norm": 0.43359375, "learning_rate": 1.4951014754632694e-05, "loss": 2.0795, "step": 25300 }, { "epoch": 0.53, "grad_norm": 0.44921875, "learning_rate": 1.4940702120447598e-05, "loss": 2.1107, "step": 25310 }, { "epoch": 0.53, "grad_norm": 0.43359375, "learning_rate": 1.4930389514291114e-05, "loss": 2.1119, "step": 25320 }, { "epoch": 0.53, "grad_norm": 0.43359375, "learning_rate": 1.4920076941037747e-05, "loss": 2.1201, "step": 25330 }, { "epoch": 0.53, "grad_norm": 0.447265625, "learning_rate": 1.4909764405561986e-05, "loss": 2.1366, "step": 25340 }, { "epoch": 0.53, "grad_norm": 0.431640625, "learning_rate": 1.4899451912738307e-05, "loss": 2.0952, "step": 25350 }, { "epoch": 0.53, "grad_norm": 0.453125, "learning_rate": 1.4889139467441172e-05, "loss": 2.1262, "step": 25360 }, { "epoch": 0.53, "grad_norm": 0.435546875, "learning_rate": 1.4878827074545e-05, "loss": 2.1358, "step": 25370 }, { "epoch": 0.53, "grad_norm": 0.435546875, "learning_rate": 1.4868514738924204e-05, "loss": 2.1332, "step": 25380 }, { "epoch": 0.53, "grad_norm": 0.4296875, "learning_rate": 1.4858202465453175e-05, "loss": 2.1228, "step": 25390 }, { "epoch": 0.53, "grad_norm": 0.447265625, "learning_rate": 1.4847890259006244e-05, "loss": 2.1142, "step": 25400 }, { "epoch": 0.53, "grad_norm": 0.41796875, "learning_rate": 1.4837578124457742e-05, "loss": 2.137, "step": 25410 }, { "epoch": 0.53, "grad_norm": 0.453125, "learning_rate": 1.482726606668196e-05, "loss": 2.1394, "step": 25420 }, { "epoch": 0.53, "grad_norm": 0.431640625, "learning_rate": 1.4816954090553133e-05, "loss": 2.0968, "step": 25430 }, { "epoch": 0.53, "grad_norm": 0.435546875, "learning_rate": 1.480664220094548e-05, "loss": 2.1152, "step": 25440 }, { "epoch": 0.53, "grad_norm": 0.4296875, "learning_rate": 1.4796330402733166e-05, "loss": 2.1262, "step": 25450 }, { "epoch": 0.53, "grad_norm": 0.4375, "learning_rate": 1.4786018700790325e-05, "loss": 2.0961, "step": 25460 }, { "epoch": 0.53, "grad_norm": 1.0625, "learning_rate": 1.4775707099991027e-05, "loss": 2.1108, "step": 25470 }, { "epoch": 0.53, "grad_norm": 0.4296875, "learning_rate": 1.4765395605209309e-05, "loss": 2.1273, "step": 25480 }, { "epoch": 0.53, "grad_norm": 0.4453125, "learning_rate": 1.4755084221319164e-05, "loss": 2.1276, "step": 25490 }, { "epoch": 0.53, "grad_norm": 0.42578125, "learning_rate": 1.4744772953194504e-05, "loss": 2.1573, "step": 25500 }, { "epoch": 0.53, "eval_accuracy": 0.5589853536722607, "eval_loss": 1.9936455488204956, "eval_runtime": 16.4618, "eval_samples_per_second": 36.144, "eval_steps_per_second": 1.154, "step": 25500 }, { "epoch": 0.53, "grad_norm": 0.439453125, "learning_rate": 1.4734461805709214e-05, "loss": 2.1088, "step": 25510 }, { "epoch": 0.53, "grad_norm": 0.443359375, "learning_rate": 1.472415078373712e-05, "loss": 2.1284, "step": 25520 }, { "epoch": 0.53, "grad_norm": 0.42578125, "learning_rate": 1.4713839892151968e-05, "loss": 2.115, "step": 25530 }, { "epoch": 0.53, "grad_norm": 0.4296875, "learning_rate": 1.4703529135827459e-05, "loss": 2.1055, "step": 25540 }, { "epoch": 0.53, "grad_norm": 0.462890625, "learning_rate": 1.4693218519637238e-05, "loss": 2.1428, "step": 25550 }, { "epoch": 0.53, "grad_norm": 0.4296875, "learning_rate": 1.4682908048454855e-05, "loss": 2.1101, "step": 25560 }, { "epoch": 0.53, "grad_norm": 0.46875, "learning_rate": 1.4672597727153824e-05, "loss": 2.1126, "step": 25570 }, { "epoch": 0.53, "grad_norm": 0.50390625, "learning_rate": 1.4662287560607565e-05, "loss": 2.1322, "step": 25580 }, { "epoch": 0.53, "grad_norm": 0.42578125, "learning_rate": 1.4651977553689436e-05, "loss": 2.1119, "step": 25590 }, { "epoch": 0.53, "grad_norm": 0.443359375, "learning_rate": 1.4641667711272711e-05, "loss": 2.1098, "step": 25600 }, { "epoch": 0.53, "grad_norm": 0.482421875, "learning_rate": 1.4631358038230602e-05, "loss": 2.1481, "step": 25610 }, { "epoch": 0.53, "grad_norm": 0.431640625, "learning_rate": 1.4621048539436221e-05, "loss": 2.1433, "step": 25620 }, { "epoch": 0.53, "grad_norm": 0.427734375, "learning_rate": 1.4610739219762609e-05, "loss": 2.1147, "step": 25630 }, { "epoch": 0.53, "grad_norm": 0.435546875, "learning_rate": 1.4600430084082722e-05, "loss": 2.1207, "step": 25640 }, { "epoch": 0.53, "grad_norm": 0.455078125, "learning_rate": 1.4590121137269436e-05, "loss": 2.0961, "step": 25650 }, { "epoch": 0.53, "grad_norm": 0.5703125, "learning_rate": 1.4579812384195515e-05, "loss": 2.1353, "step": 25660 }, { "epoch": 0.53, "grad_norm": 0.470703125, "learning_rate": 1.4569503829733658e-05, "loss": 2.1375, "step": 25670 }, { "epoch": 0.53, "grad_norm": 0.4375, "learning_rate": 1.4559195478756451e-05, "loss": 2.1226, "step": 25680 }, { "epoch": 0.53, "grad_norm": 0.447265625, "learning_rate": 1.4548887336136394e-05, "loss": 2.1184, "step": 25690 }, { "epoch": 0.53, "grad_norm": 0.44140625, "learning_rate": 1.4538579406745878e-05, "loss": 2.0869, "step": 25700 }, { "epoch": 0.53, "grad_norm": 0.4765625, "learning_rate": 1.4528271695457215e-05, "loss": 2.1118, "step": 25710 }, { "epoch": 0.53, "grad_norm": 0.41796875, "learning_rate": 1.4517964207142584e-05, "loss": 2.0815, "step": 25720 }, { "epoch": 0.53, "grad_norm": 0.4609375, "learning_rate": 1.450765694667408e-05, "loss": 2.1251, "step": 25730 }, { "epoch": 0.54, "grad_norm": 0.419921875, "learning_rate": 1.4497349918923693e-05, "loss": 2.1044, "step": 25740 }, { "epoch": 0.54, "grad_norm": 0.416015625, "learning_rate": 1.4487043128763278e-05, "loss": 2.1204, "step": 25750 }, { "epoch": 0.54, "grad_norm": 0.421875, "learning_rate": 1.4476736581064603e-05, "loss": 2.1083, "step": 25760 }, { "epoch": 0.54, "grad_norm": 0.447265625, "learning_rate": 1.446643028069932e-05, "loss": 2.1267, "step": 25770 }, { "epoch": 0.54, "grad_norm": 0.44921875, "learning_rate": 1.4456124232538941e-05, "loss": 2.0993, "step": 25780 }, { "epoch": 0.54, "grad_norm": 0.44921875, "learning_rate": 1.4445818441454882e-05, "loss": 2.1483, "step": 25790 }, { "epoch": 0.54, "grad_norm": 0.453125, "learning_rate": 1.4435512912318436e-05, "loss": 2.1093, "step": 25800 }, { "epoch": 0.54, "grad_norm": 0.44140625, "learning_rate": 1.4425207650000752e-05, "loss": 2.1248, "step": 25810 }, { "epoch": 0.54, "grad_norm": 0.435546875, "learning_rate": 1.441490265937288e-05, "loss": 2.1181, "step": 25820 }, { "epoch": 0.54, "grad_norm": 0.447265625, "learning_rate": 1.440459794530572e-05, "loss": 2.0949, "step": 25830 }, { "epoch": 0.54, "grad_norm": 0.431640625, "learning_rate": 1.4394293512670055e-05, "loss": 2.1193, "step": 25840 }, { "epoch": 0.54, "grad_norm": 0.458984375, "learning_rate": 1.4383989366336523e-05, "loss": 2.1579, "step": 25850 }, { "epoch": 0.54, "grad_norm": 0.4296875, "learning_rate": 1.4373685511175639e-05, "loss": 2.1036, "step": 25860 }, { "epoch": 0.54, "grad_norm": 0.478515625, "learning_rate": 1.4363381952057779e-05, "loss": 2.1403, "step": 25870 }, { "epoch": 0.54, "grad_norm": 0.4296875, "learning_rate": 1.4353078693853162e-05, "loss": 2.129, "step": 25880 }, { "epoch": 0.54, "grad_norm": 0.427734375, "learning_rate": 1.4342775741431885e-05, "loss": 2.0931, "step": 25890 }, { "epoch": 0.54, "grad_norm": 0.443359375, "learning_rate": 1.43324730996639e-05, "loss": 2.1306, "step": 25900 }, { "epoch": 0.54, "grad_norm": 0.447265625, "learning_rate": 1.432217077341899e-05, "loss": 2.1432, "step": 25910 }, { "epoch": 0.54, "grad_norm": 0.42578125, "learning_rate": 1.4311868767566813e-05, "loss": 2.1181, "step": 25920 }, { "epoch": 0.54, "grad_norm": 0.66015625, "learning_rate": 1.4301567086976874e-05, "loss": 2.1151, "step": 25930 }, { "epoch": 0.54, "grad_norm": 0.435546875, "learning_rate": 1.4291265736518498e-05, "loss": 2.1105, "step": 25940 }, { "epoch": 0.54, "grad_norm": 0.4375, "learning_rate": 1.428096472106089e-05, "loss": 2.1074, "step": 25950 }, { "epoch": 0.54, "grad_norm": 0.4375, "learning_rate": 1.427066404547307e-05, "loss": 2.1439, "step": 25960 }, { "epoch": 0.54, "grad_norm": 0.447265625, "learning_rate": 1.4260363714623912e-05, "loss": 2.1512, "step": 25970 }, { "epoch": 0.54, "grad_norm": 0.44140625, "learning_rate": 1.4250063733382115e-05, "loss": 2.1296, "step": 25980 }, { "epoch": 0.54, "grad_norm": 0.458984375, "learning_rate": 1.423976410661623e-05, "loss": 2.1344, "step": 25990 }, { "epoch": 0.54, "grad_norm": 0.435546875, "learning_rate": 1.422946483919462e-05, "loss": 2.1674, "step": 26000 }, { "epoch": 0.54, "eval_accuracy": 0.5589360670954598, "eval_loss": 1.9935729503631592, "eval_runtime": 16.4394, "eval_samples_per_second": 36.194, "eval_steps_per_second": 1.156, "step": 26000 }, { "epoch": 0.54, "grad_norm": 0.443359375, "learning_rate": 1.4219165935985491e-05, "loss": 2.1371, "step": 26010 }, { "epoch": 0.54, "grad_norm": 0.466796875, "learning_rate": 1.4208867401856876e-05, "loss": 2.15, "step": 26020 }, { "epoch": 0.54, "grad_norm": 0.47265625, "learning_rate": 1.4198569241676638e-05, "loss": 2.1126, "step": 26030 }, { "epoch": 0.54, "grad_norm": 0.4453125, "learning_rate": 1.4188271460312442e-05, "loss": 2.1363, "step": 26040 }, { "epoch": 0.54, "grad_norm": 0.455078125, "learning_rate": 1.41779740626318e-05, "loss": 2.1472, "step": 26050 }, { "epoch": 0.54, "grad_norm": 0.4609375, "learning_rate": 1.416767705350203e-05, "loss": 2.1341, "step": 26060 }, { "epoch": 0.54, "grad_norm": 0.46875, "learning_rate": 1.4157380437790266e-05, "loss": 2.1106, "step": 26070 }, { "epoch": 0.54, "grad_norm": 0.51953125, "learning_rate": 1.4147084220363453e-05, "loss": 2.1236, "step": 26080 }, { "epoch": 0.54, "grad_norm": 0.43359375, "learning_rate": 1.4136788406088365e-05, "loss": 2.1035, "step": 26090 }, { "epoch": 0.54, "grad_norm": 0.47265625, "learning_rate": 1.412649299983156e-05, "loss": 2.1264, "step": 26100 }, { "epoch": 0.54, "grad_norm": 0.466796875, "learning_rate": 1.4116198006459417e-05, "loss": 2.1115, "step": 26110 }, { "epoch": 0.54, "grad_norm": 0.41796875, "learning_rate": 1.4105903430838135e-05, "loss": 2.0964, "step": 26120 }, { "epoch": 0.54, "grad_norm": 0.42578125, "learning_rate": 1.409560927783368e-05, "loss": 2.0703, "step": 26130 }, { "epoch": 0.54, "grad_norm": 0.4375, "learning_rate": 1.4085315552311846e-05, "loss": 2.0989, "step": 26140 }, { "epoch": 0.54, "grad_norm": 0.44140625, "learning_rate": 1.4075022259138223e-05, "loss": 2.1273, "step": 26150 }, { "epoch": 0.54, "grad_norm": 0.4375, "learning_rate": 1.4064729403178178e-05, "loss": 2.1313, "step": 26160 }, { "epoch": 0.54, "grad_norm": 0.4609375, "learning_rate": 1.4054436989296888e-05, "loss": 2.1341, "step": 26170 }, { "epoch": 0.54, "grad_norm": 0.478515625, "learning_rate": 1.4044145022359325e-05, "loss": 2.1349, "step": 26180 }, { "epoch": 0.54, "grad_norm": 0.458984375, "learning_rate": 1.403385350723023e-05, "loss": 2.1089, "step": 26190 }, { "epoch": 0.54, "grad_norm": 0.423828125, "learning_rate": 1.402356244877415e-05, "loss": 2.0889, "step": 26200 }, { "epoch": 0.54, "grad_norm": 0.427734375, "learning_rate": 1.40132718518554e-05, "loss": 2.1778, "step": 26210 }, { "epoch": 0.55, "grad_norm": 0.43359375, "learning_rate": 1.4002981721338097e-05, "loss": 2.1216, "step": 26220 }, { "epoch": 0.55, "grad_norm": 0.44140625, "learning_rate": 1.3992692062086111e-05, "loss": 2.0964, "step": 26230 }, { "epoch": 0.55, "grad_norm": 0.4453125, "learning_rate": 1.3982402878963114e-05, "loss": 2.1148, "step": 26240 }, { "epoch": 0.55, "grad_norm": 0.42578125, "learning_rate": 1.3972114176832542e-05, "loss": 2.1037, "step": 26250 }, { "epoch": 0.55, "grad_norm": 0.447265625, "learning_rate": 1.3961825960557598e-05, "loss": 2.1185, "step": 26260 }, { "epoch": 0.55, "grad_norm": 0.4296875, "learning_rate": 1.3951538235001262e-05, "loss": 2.1135, "step": 26270 }, { "epoch": 0.55, "grad_norm": 0.43359375, "learning_rate": 1.3941251005026294e-05, "loss": 2.1159, "step": 26280 }, { "epoch": 0.55, "grad_norm": 0.46484375, "learning_rate": 1.393096427549519e-05, "loss": 2.1127, "step": 26290 }, { "epoch": 0.55, "grad_norm": 0.4140625, "learning_rate": 1.3920678051270239e-05, "loss": 2.1071, "step": 26300 }, { "epoch": 0.55, "grad_norm": 0.4296875, "learning_rate": 1.3910392337213479e-05, "loss": 2.1105, "step": 26310 }, { "epoch": 0.55, "grad_norm": 0.42578125, "learning_rate": 1.3900107138186697e-05, "loss": 2.0986, "step": 26320 }, { "epoch": 0.55, "grad_norm": 0.427734375, "learning_rate": 1.3889822459051456e-05, "loss": 2.0751, "step": 26330 }, { "epoch": 0.55, "grad_norm": 0.439453125, "learning_rate": 1.3879538304669063e-05, "loss": 2.0935, "step": 26340 }, { "epoch": 0.55, "grad_norm": 0.4453125, "learning_rate": 1.3869254679900572e-05, "loss": 2.1099, "step": 26350 }, { "epoch": 0.55, "grad_norm": 0.4765625, "learning_rate": 1.3858971589606793e-05, "loss": 2.1489, "step": 26360 }, { "epoch": 0.55, "grad_norm": 0.453125, "learning_rate": 1.3848689038648292e-05, "loss": 2.1276, "step": 26370 }, { "epoch": 0.55, "grad_norm": 0.470703125, "learning_rate": 1.3838407031885358e-05, "loss": 2.0907, "step": 26380 }, { "epoch": 0.55, "grad_norm": 0.42578125, "learning_rate": 1.382812557417804e-05, "loss": 2.1162, "step": 26390 }, { "epoch": 0.55, "grad_norm": 0.439453125, "learning_rate": 1.3817844670386126e-05, "loss": 2.1265, "step": 26400 }, { "epoch": 0.55, "grad_norm": 0.44921875, "learning_rate": 1.3807564325369143e-05, "loss": 2.1161, "step": 26410 }, { "epoch": 0.55, "grad_norm": 0.43359375, "learning_rate": 1.379728454398634e-05, "loss": 2.133, "step": 26420 }, { "epoch": 0.55, "grad_norm": 0.43359375, "learning_rate": 1.3787005331096716e-05, "loss": 2.0751, "step": 26430 }, { "epoch": 0.55, "grad_norm": 0.423828125, "learning_rate": 1.3776726691558995e-05, "loss": 2.1407, "step": 26440 }, { "epoch": 0.55, "grad_norm": 0.431640625, "learning_rate": 1.3766448630231628e-05, "loss": 2.1342, "step": 26450 }, { "epoch": 0.55, "grad_norm": 0.41796875, "learning_rate": 1.375617115197279e-05, "loss": 2.0845, "step": 26460 }, { "epoch": 0.55, "grad_norm": 0.453125, "learning_rate": 1.3745894261640397e-05, "loss": 2.0888, "step": 26470 }, { "epoch": 0.55, "grad_norm": 0.443359375, "learning_rate": 1.3735617964092063e-05, "loss": 2.1236, "step": 26480 }, { "epoch": 0.55, "grad_norm": 0.43359375, "learning_rate": 1.372534226418514e-05, "loss": 2.0925, "step": 26490 }, { "epoch": 0.55, "grad_norm": 0.4296875, "learning_rate": 1.3715067166776694e-05, "loss": 2.1184, "step": 26500 }, { "epoch": 0.55, "eval_accuracy": 0.559028068705488, "eval_loss": 1.9934953451156616, "eval_runtime": 15.4554, "eval_samples_per_second": 38.498, "eval_steps_per_second": 1.229, "step": 26500 }, { "epoch": 0.55, "grad_norm": 0.44140625, "learning_rate": 1.3704792676723496e-05, "loss": 2.1057, "step": 26510 }, { "epoch": 0.55, "grad_norm": 0.439453125, "learning_rate": 1.3694518798882041e-05, "loss": 2.1311, "step": 26520 }, { "epoch": 0.55, "grad_norm": 0.439453125, "learning_rate": 1.3684245538108539e-05, "loss": 2.1151, "step": 26530 }, { "epoch": 0.55, "grad_norm": 0.5, "learning_rate": 1.3673972899258886e-05, "loss": 2.13, "step": 26540 }, { "epoch": 0.55, "grad_norm": 0.427734375, "learning_rate": 1.3663700887188708e-05, "loss": 2.1299, "step": 26550 }, { "epoch": 0.55, "grad_norm": 0.435546875, "learning_rate": 1.3653429506753329e-05, "loss": 2.1389, "step": 26560 }, { "epoch": 0.55, "grad_norm": 0.44921875, "learning_rate": 1.3643158762807759e-05, "loss": 2.1371, "step": 26570 }, { "epoch": 0.55, "grad_norm": 0.421875, "learning_rate": 1.3632888660206729e-05, "loss": 2.1261, "step": 26580 }, { "epoch": 0.55, "grad_norm": 0.63671875, "learning_rate": 1.3622619203804651e-05, "loss": 2.1256, "step": 26590 }, { "epoch": 0.55, "grad_norm": 0.44140625, "learning_rate": 1.361235039845564e-05, "loss": 2.1222, "step": 26600 }, { "epoch": 0.55, "grad_norm": 0.439453125, "learning_rate": 1.3602082249013498e-05, "loss": 2.1045, "step": 26610 }, { "epoch": 0.55, "grad_norm": 0.4375, "learning_rate": 1.359181476033172e-05, "loss": 2.1442, "step": 26620 }, { "epoch": 0.55, "grad_norm": 0.443359375, "learning_rate": 1.3581547937263497e-05, "loss": 2.1355, "step": 26630 }, { "epoch": 0.55, "grad_norm": 0.4609375, "learning_rate": 1.3571281784661683e-05, "loss": 2.1196, "step": 26640 }, { "epoch": 0.55, "grad_norm": 0.4375, "learning_rate": 1.356101630737883e-05, "loss": 2.1325, "step": 26650 }, { "epoch": 0.55, "grad_norm": 0.451171875, "learning_rate": 1.3550751510267186e-05, "loss": 2.1362, "step": 26660 }, { "epoch": 0.55, "grad_norm": 0.4296875, "learning_rate": 1.3540487398178641e-05, "loss": 2.1411, "step": 26670 }, { "epoch": 0.55, "grad_norm": 0.439453125, "learning_rate": 1.3530223975964793e-05, "loss": 2.1057, "step": 26680 }, { "epoch": 0.55, "grad_norm": 0.427734375, "learning_rate": 1.35199612484769e-05, "loss": 2.1329, "step": 26690 }, { "epoch": 0.56, "grad_norm": 0.44921875, "learning_rate": 1.350969922056589e-05, "loss": 2.1321, "step": 26700 }, { "epoch": 0.56, "grad_norm": 0.435546875, "learning_rate": 1.3499437897082363e-05, "loss": 2.1413, "step": 26710 }, { "epoch": 0.56, "grad_norm": 0.44140625, "learning_rate": 1.3489177282876599e-05, "loss": 2.0994, "step": 26720 }, { "epoch": 0.56, "grad_norm": 0.423828125, "learning_rate": 1.3478917382798514e-05, "loss": 2.0977, "step": 26730 }, { "epoch": 0.56, "grad_norm": 0.4453125, "learning_rate": 1.3468658201697715e-05, "loss": 2.1497, "step": 26740 }, { "epoch": 0.56, "grad_norm": 0.462890625, "learning_rate": 1.3458399744423456e-05, "loss": 2.094, "step": 26750 }, { "epoch": 0.56, "grad_norm": 0.431640625, "learning_rate": 1.3448142015824645e-05, "loss": 2.1285, "step": 26760 }, { "epoch": 0.56, "grad_norm": 0.4296875, "learning_rate": 1.3437885020749852e-05, "loss": 2.1076, "step": 26770 }, { "epoch": 0.56, "grad_norm": 0.43359375, "learning_rate": 1.3427628764047308e-05, "loss": 2.1319, "step": 26780 }, { "epoch": 0.56, "grad_norm": 0.416015625, "learning_rate": 1.3417373250564874e-05, "loss": 2.1056, "step": 26790 }, { "epoch": 0.56, "grad_norm": 0.453125, "learning_rate": 1.3407118485150077e-05, "loss": 2.1156, "step": 26800 }, { "epoch": 0.56, "grad_norm": 0.423828125, "learning_rate": 1.3396864472650087e-05, "loss": 2.1381, "step": 26810 }, { "epoch": 0.56, "grad_norm": 0.41015625, "learning_rate": 1.3386611217911718e-05, "loss": 2.111, "step": 26820 }, { "epoch": 0.56, "grad_norm": 0.443359375, "learning_rate": 1.3376358725781419e-05, "loss": 2.0966, "step": 26830 }, { "epoch": 0.56, "grad_norm": 0.423828125, "learning_rate": 1.3366107001105283e-05, "loss": 2.1492, "step": 26840 }, { "epoch": 0.56, "grad_norm": 0.443359375, "learning_rate": 1.3355856048729052e-05, "loss": 2.1115, "step": 26850 }, { "epoch": 0.56, "grad_norm": 0.46875, "learning_rate": 1.3345605873498075e-05, "loss": 2.1053, "step": 26860 }, { "epoch": 0.56, "grad_norm": 0.4609375, "learning_rate": 1.333535648025736e-05, "loss": 2.0999, "step": 26870 }, { "epoch": 0.56, "grad_norm": 0.42578125, "learning_rate": 1.3325107873851541e-05, "loss": 2.0953, "step": 26880 }, { "epoch": 0.56, "grad_norm": 0.42578125, "learning_rate": 1.3314860059124866e-05, "loss": 2.1491, "step": 26890 }, { "epoch": 0.56, "grad_norm": 0.46484375, "learning_rate": 1.3304613040921217e-05, "loss": 2.0849, "step": 26900 }, { "epoch": 0.56, "grad_norm": 0.451171875, "learning_rate": 1.3294366824084114e-05, "loss": 2.1227, "step": 26910 }, { "epoch": 0.56, "grad_norm": 0.439453125, "learning_rate": 1.3284121413456668e-05, "loss": 2.1092, "step": 26920 }, { "epoch": 0.56, "grad_norm": 0.458984375, "learning_rate": 1.327387681388163e-05, "loss": 2.1102, "step": 26930 }, { "epoch": 0.56, "grad_norm": 0.4296875, "learning_rate": 1.3263633030201376e-05, "loss": 2.1417, "step": 26940 }, { "epoch": 0.56, "grad_norm": 0.68359375, "learning_rate": 1.3253390067257865e-05, "loss": 2.1315, "step": 26950 }, { "epoch": 0.56, "grad_norm": 0.44921875, "learning_rate": 1.3243147929892697e-05, "loss": 2.1116, "step": 26960 }, { "epoch": 0.56, "grad_norm": 0.42578125, "learning_rate": 1.323290662294707e-05, "loss": 2.0774, "step": 26970 }, { "epoch": 0.56, "grad_norm": 0.443359375, "learning_rate": 1.322266615126179e-05, "loss": 2.1073, "step": 26980 }, { "epoch": 0.56, "grad_norm": 0.4453125, "learning_rate": 1.3212426519677265e-05, "loss": 2.1148, "step": 26990 }, { "epoch": 0.56, "grad_norm": 0.462890625, "learning_rate": 1.3202187733033512e-05, "loss": 2.1424, "step": 27000 }, { "epoch": 0.56, "eval_accuracy": 0.5590198542760213, "eval_loss": 1.9934771060943604, "eval_runtime": 16.4588, "eval_samples_per_second": 36.151, "eval_steps_per_second": 1.154, "step": 27000 }, { "epoch": 0.56, "grad_norm": 0.4453125, "learning_rate": 1.3191949796170156e-05, "loss": 2.0874, "step": 27010 }, { "epoch": 0.56, "grad_norm": 0.451171875, "learning_rate": 1.3181712713926398e-05, "loss": 2.1289, "step": 27020 }, { "epoch": 0.56, "grad_norm": 0.431640625, "learning_rate": 1.317147649114105e-05, "loss": 2.1437, "step": 27030 }, { "epoch": 0.56, "grad_norm": 0.443359375, "learning_rate": 1.3161241132652526e-05, "loss": 2.142, "step": 27040 }, { "epoch": 0.56, "grad_norm": 0.4375, "learning_rate": 1.3151006643298807e-05, "loss": 2.1415, "step": 27050 }, { "epoch": 0.56, "grad_norm": 0.47265625, "learning_rate": 1.3140773027917488e-05, "loss": 2.0835, "step": 27060 }, { "epoch": 0.56, "grad_norm": 0.439453125, "learning_rate": 1.3130540291345734e-05, "loss": 2.1002, "step": 27070 }, { "epoch": 0.56, "grad_norm": 0.443359375, "learning_rate": 1.3120308438420304e-05, "loss": 2.1329, "step": 27080 }, { "epoch": 0.56, "grad_norm": 0.4609375, "learning_rate": 1.311007747397753e-05, "loss": 2.123, "step": 27090 }, { "epoch": 0.56, "grad_norm": 0.44921875, "learning_rate": 1.3099847402853345e-05, "loss": 2.1045, "step": 27100 }, { "epoch": 0.56, "grad_norm": 0.4453125, "learning_rate": 1.3089618229883224e-05, "loss": 2.1065, "step": 27110 }, { "epoch": 0.56, "grad_norm": 0.44140625, "learning_rate": 1.3079389959902251e-05, "loss": 2.1408, "step": 27120 }, { "epoch": 0.56, "grad_norm": 0.421875, "learning_rate": 1.3069162597745074e-05, "loss": 2.0833, "step": 27130 }, { "epoch": 0.56, "grad_norm": 0.419921875, "learning_rate": 1.3058936148245899e-05, "loss": 2.1175, "step": 27140 }, { "epoch": 0.56, "grad_norm": 0.48046875, "learning_rate": 1.3048710616238512e-05, "loss": 2.1104, "step": 27150 }, { "epoch": 0.56, "grad_norm": 0.4453125, "learning_rate": 1.3038486006556273e-05, "loss": 2.115, "step": 27160 }, { "epoch": 0.56, "grad_norm": 0.435546875, "learning_rate": 1.3028262324032083e-05, "loss": 2.1183, "step": 27170 }, { "epoch": 0.57, "grad_norm": 0.431640625, "learning_rate": 1.3018039573498422e-05, "loss": 2.0974, "step": 27180 }, { "epoch": 0.57, "grad_norm": 0.419921875, "learning_rate": 1.3007817759787333e-05, "loss": 2.1345, "step": 27190 }, { "epoch": 0.57, "grad_norm": 0.47265625, "learning_rate": 1.2997596887730402e-05, "loss": 2.0943, "step": 27200 }, { "epoch": 0.57, "grad_norm": 0.443359375, "learning_rate": 1.2987376962158778e-05, "loss": 2.1167, "step": 27210 }, { "epoch": 0.57, "grad_norm": 0.4296875, "learning_rate": 1.297715798790316e-05, "loss": 2.1245, "step": 27220 }, { "epoch": 0.57, "grad_norm": 0.470703125, "learning_rate": 1.2966939969793804e-05, "loss": 2.1178, "step": 27230 }, { "epoch": 0.57, "grad_norm": 0.455078125, "learning_rate": 1.29567229126605e-05, "loss": 2.1221, "step": 27240 }, { "epoch": 0.57, "grad_norm": 0.462890625, "learning_rate": 1.2946506821332599e-05, "loss": 2.1227, "step": 27250 }, { "epoch": 0.57, "grad_norm": 0.451171875, "learning_rate": 1.2936291700638993e-05, "loss": 2.1038, "step": 27260 }, { "epoch": 0.57, "grad_norm": 0.4375, "learning_rate": 1.2926077555408098e-05, "loss": 2.1094, "step": 27270 }, { "epoch": 0.57, "grad_norm": 0.44921875, "learning_rate": 1.291586439046789e-05, "loss": 2.126, "step": 27280 }, { "epoch": 0.57, "grad_norm": 0.427734375, "learning_rate": 1.2905652210645882e-05, "loss": 2.1133, "step": 27290 }, { "epoch": 0.57, "grad_norm": 0.421875, "learning_rate": 1.2895441020769096e-05, "loss": 2.1307, "step": 27300 }, { "epoch": 0.57, "grad_norm": 0.443359375, "learning_rate": 1.2885230825664112e-05, "loss": 2.1296, "step": 27310 }, { "epoch": 0.57, "grad_norm": 0.431640625, "learning_rate": 1.2875021630157038e-05, "loss": 2.1107, "step": 27320 }, { "epoch": 0.57, "grad_norm": 0.421875, "learning_rate": 1.2864813439073489e-05, "loss": 2.105, "step": 27330 }, { "epoch": 0.57, "grad_norm": 0.423828125, "learning_rate": 1.2854606257238628e-05, "loss": 2.1143, "step": 27340 }, { "epoch": 0.57, "grad_norm": 0.419921875, "learning_rate": 1.2844400089477129e-05, "loss": 2.1339, "step": 27350 }, { "epoch": 0.57, "grad_norm": 0.61328125, "learning_rate": 1.2834194940613188e-05, "loss": 2.1097, "step": 27360 }, { "epoch": 0.57, "grad_norm": 0.439453125, "learning_rate": 1.282399081547052e-05, "loss": 2.081, "step": 27370 }, { "epoch": 0.57, "grad_norm": 0.44140625, "learning_rate": 1.2813787718872358e-05, "loss": 2.1473, "step": 27380 }, { "epoch": 0.57, "grad_norm": 0.435546875, "learning_rate": 1.2803585655641456e-05, "loss": 2.1149, "step": 27390 }, { "epoch": 0.57, "grad_norm": 0.42578125, "learning_rate": 1.2793384630600056e-05, "loss": 2.1518, "step": 27400 }, { "epoch": 0.57, "grad_norm": 0.435546875, "learning_rate": 1.2783184648569932e-05, "loss": 2.0944, "step": 27410 }, { "epoch": 0.57, "grad_norm": 0.4609375, "learning_rate": 1.2772985714372365e-05, "loss": 2.0976, "step": 27420 }, { "epoch": 0.57, "grad_norm": 0.453125, "learning_rate": 1.276278783282812e-05, "loss": 2.1313, "step": 27430 }, { "epoch": 0.57, "grad_norm": 0.4296875, "learning_rate": 1.2752591008757486e-05, "loss": 2.1634, "step": 27440 }, { "epoch": 0.57, "grad_norm": 0.455078125, "learning_rate": 1.2742395246980243e-05, "loss": 2.1583, "step": 27450 }, { "epoch": 0.57, "grad_norm": 0.435546875, "learning_rate": 1.2732200552315668e-05, "loss": 2.0877, "step": 27460 }, { "epoch": 0.57, "grad_norm": 0.435546875, "learning_rate": 1.2722006929582533e-05, "loss": 2.1509, "step": 27470 }, { "epoch": 0.57, "grad_norm": 0.435546875, "learning_rate": 1.2711814383599113e-05, "loss": 2.134, "step": 27480 }, { "epoch": 0.57, "grad_norm": 0.419921875, "learning_rate": 1.2701622919183155e-05, "loss": 2.1218, "step": 27490 }, { "epoch": 0.57, "grad_norm": 0.431640625, "learning_rate": 1.2691432541151916e-05, "loss": 2.1437, "step": 27500 }, { "epoch": 0.57, "eval_accuracy": 0.5590083540747678, "eval_loss": 1.9934571981430054, "eval_runtime": 16.4953, "eval_samples_per_second": 36.071, "eval_steps_per_second": 1.152, "step": 27500 }, { "epoch": 0.57, "grad_norm": 0.44921875, "learning_rate": 1.2681243254322133e-05, "loss": 2.124, "step": 27510 }, { "epoch": 0.57, "grad_norm": 0.431640625, "learning_rate": 1.2671055063510012e-05, "loss": 2.115, "step": 27520 }, { "epoch": 0.57, "grad_norm": 0.474609375, "learning_rate": 1.266086797353126e-05, "loss": 2.1179, "step": 27530 }, { "epoch": 0.57, "grad_norm": 0.4375, "learning_rate": 1.2650681989201062e-05, "loss": 2.1293, "step": 27540 }, { "epoch": 0.57, "grad_norm": 0.462890625, "learning_rate": 1.2640497115334065e-05, "loss": 2.1372, "step": 27550 }, { "epoch": 0.57, "grad_norm": 0.44140625, "learning_rate": 1.2630313356744404e-05, "loss": 2.1186, "step": 27560 }, { "epoch": 0.57, "grad_norm": 0.458984375, "learning_rate": 1.262013071824569e-05, "loss": 2.1205, "step": 27570 }, { "epoch": 0.57, "grad_norm": 0.43359375, "learning_rate": 1.2609949204650995e-05, "loss": 2.1183, "step": 27580 }, { "epoch": 0.57, "grad_norm": 0.421875, "learning_rate": 1.2599768820772864e-05, "loss": 2.1243, "step": 27590 }, { "epoch": 0.57, "grad_norm": 0.439453125, "learning_rate": 1.25895895714233e-05, "loss": 2.1108, "step": 27600 }, { "epoch": 0.57, "grad_norm": 0.486328125, "learning_rate": 1.2579411461413792e-05, "loss": 2.1356, "step": 27610 }, { "epoch": 0.57, "grad_norm": 0.412109375, "learning_rate": 1.2569234495555257e-05, "loss": 2.1289, "step": 27620 }, { "epoch": 0.57, "grad_norm": 0.451171875, "learning_rate": 1.2559058678658098e-05, "loss": 2.1485, "step": 27630 }, { "epoch": 0.57, "grad_norm": 0.4296875, "learning_rate": 1.2548884015532174e-05, "loss": 2.1122, "step": 27640 }, { "epoch": 0.57, "grad_norm": 0.439453125, "learning_rate": 1.2538710510986776e-05, "loss": 2.0887, "step": 27650 }, { "epoch": 0.58, "grad_norm": 0.4375, "learning_rate": 1.252853816983067e-05, "loss": 2.1251, "step": 27660 }, { "epoch": 0.58, "grad_norm": 0.44140625, "learning_rate": 1.251836699687207e-05, "loss": 2.1284, "step": 27670 }, { "epoch": 0.58, "grad_norm": 0.421875, "learning_rate": 1.2508196996918618e-05, "loss": 2.1318, "step": 27680 }, { "epoch": 0.58, "grad_norm": 0.4375, "learning_rate": 1.2498028174777428e-05, "loss": 2.1294, "step": 27690 }, { "epoch": 0.58, "grad_norm": 0.5625, "learning_rate": 1.2487860535255042e-05, "loss": 2.1145, "step": 27700 }, { "epoch": 0.58, "grad_norm": 0.451171875, "learning_rate": 1.2477694083157445e-05, "loss": 2.0956, "step": 27710 }, { "epoch": 0.58, "grad_norm": 0.443359375, "learning_rate": 1.2467528823290057e-05, "loss": 2.1108, "step": 27720 }, { "epoch": 0.58, "grad_norm": 0.42578125, "learning_rate": 1.2457364760457752e-05, "loss": 2.122, "step": 27730 }, { "epoch": 0.58, "grad_norm": 0.451171875, "learning_rate": 1.2447201899464811e-05, "loss": 2.0908, "step": 27740 }, { "epoch": 0.58, "grad_norm": 0.4375, "learning_rate": 1.2437040245114966e-05, "loss": 2.0817, "step": 27750 }, { "epoch": 0.58, "grad_norm": 0.42578125, "learning_rate": 1.2426879802211378e-05, "loss": 2.1055, "step": 27760 }, { "epoch": 0.58, "grad_norm": 0.416015625, "learning_rate": 1.2416720575556635e-05, "loss": 2.0944, "step": 27770 }, { "epoch": 0.58, "grad_norm": 0.43359375, "learning_rate": 1.2406562569952736e-05, "loss": 2.1228, "step": 27780 }, { "epoch": 0.58, "grad_norm": 0.4375, "learning_rate": 1.2396405790201115e-05, "loss": 2.1159, "step": 27790 }, { "epoch": 0.58, "grad_norm": 0.447265625, "learning_rate": 1.2386250241102635e-05, "loss": 2.1282, "step": 27800 }, { "epoch": 0.58, "grad_norm": 0.439453125, "learning_rate": 1.2376095927457553e-05, "loss": 2.1084, "step": 27810 }, { "epoch": 0.58, "grad_norm": 0.41796875, "learning_rate": 1.236594285406557e-05, "loss": 2.0959, "step": 27820 }, { "epoch": 0.58, "grad_norm": 0.42578125, "learning_rate": 1.2355791025725776e-05, "loss": 2.1166, "step": 27830 }, { "epoch": 0.58, "grad_norm": 0.458984375, "learning_rate": 1.234564044723669e-05, "loss": 2.1539, "step": 27840 }, { "epoch": 0.58, "grad_norm": 0.451171875, "learning_rate": 1.2335491123396228e-05, "loss": 2.1456, "step": 27850 }, { "epoch": 0.58, "grad_norm": 0.439453125, "learning_rate": 1.232534305900173e-05, "loss": 2.1151, "step": 27860 }, { "epoch": 0.58, "grad_norm": 0.447265625, "learning_rate": 1.2315196258849914e-05, "loss": 2.1129, "step": 27870 }, { "epoch": 0.58, "grad_norm": 0.43359375, "learning_rate": 1.2305050727736924e-05, "loss": 2.1252, "step": 27880 }, { "epoch": 0.58, "grad_norm": 0.451171875, "learning_rate": 1.2294906470458302e-05, "loss": 2.0908, "step": 27890 }, { "epoch": 0.58, "grad_norm": 0.44921875, "learning_rate": 1.2284763491808968e-05, "loss": 2.1296, "step": 27900 }, { "epoch": 0.58, "grad_norm": 0.44140625, "learning_rate": 1.227462179658326e-05, "loss": 2.0943, "step": 27910 }, { "epoch": 0.58, "grad_norm": 0.451171875, "learning_rate": 1.2264481389574903e-05, "loss": 2.1286, "step": 27920 }, { "epoch": 0.58, "grad_norm": 0.470703125, "learning_rate": 1.2254342275577005e-05, "loss": 2.151, "step": 27930 }, { "epoch": 0.58, "grad_norm": 0.435546875, "learning_rate": 1.224420445938207e-05, "loss": 2.1285, "step": 27940 }, { "epoch": 0.58, "grad_norm": 0.4140625, "learning_rate": 1.2234067945781991e-05, "loss": 2.1218, "step": 27950 }, { "epoch": 0.58, "grad_norm": 1.078125, "learning_rate": 1.2223932739568038e-05, "loss": 2.0688, "step": 27960 }, { "epoch": 0.58, "grad_norm": 0.45703125, "learning_rate": 1.2213798845530871e-05, "loss": 2.1179, "step": 27970 }, { "epoch": 0.58, "grad_norm": 0.423828125, "learning_rate": 1.2203666268460516e-05, "loss": 2.1146, "step": 27980 }, { "epoch": 0.58, "grad_norm": 0.455078125, "learning_rate": 1.2193535013146402e-05, "loss": 2.1362, "step": 27990 }, { "epoch": 0.58, "grad_norm": 0.4453125, "learning_rate": 1.2183405084377297e-05, "loss": 2.1244, "step": 28000 }, { "epoch": 0.58, "eval_accuracy": 0.5591069272283693, "eval_loss": 1.9933356046676636, "eval_runtime": 16.4523, "eval_samples_per_second": 36.165, "eval_steps_per_second": 1.155, "step": 28000 }, { "epoch": 0.58, "grad_norm": 0.478515625, "learning_rate": 1.2173276486941376e-05, "loss": 2.1222, "step": 28010 }, { "epoch": 0.58, "grad_norm": 0.462890625, "learning_rate": 1.2163149225626171e-05, "loss": 2.1199, "step": 28020 }, { "epoch": 0.58, "grad_norm": 0.431640625, "learning_rate": 1.2153023305218575e-05, "loss": 2.1549, "step": 28030 }, { "epoch": 0.58, "grad_norm": 0.447265625, "learning_rate": 1.2142898730504856e-05, "loss": 2.1344, "step": 28040 }, { "epoch": 0.58, "grad_norm": 0.451171875, "learning_rate": 1.2132775506270652e-05, "loss": 2.1078, "step": 28050 }, { "epoch": 0.58, "grad_norm": 0.4453125, "learning_rate": 1.2122653637300945e-05, "loss": 2.1093, "step": 28060 }, { "epoch": 0.58, "grad_norm": 0.41796875, "learning_rate": 1.2112533128380093e-05, "loss": 2.1161, "step": 28070 }, { "epoch": 0.58, "grad_norm": 0.435546875, "learning_rate": 1.2102413984291804e-05, "loss": 2.1284, "step": 28080 }, { "epoch": 0.58, "grad_norm": 0.41796875, "learning_rate": 1.2092296209819138e-05, "loss": 2.0985, "step": 28090 }, { "epoch": 0.58, "grad_norm": 0.43359375, "learning_rate": 1.208217980974451e-05, "loss": 2.133, "step": 28100 }, { "epoch": 0.58, "grad_norm": 0.44140625, "learning_rate": 1.2072064788849697e-05, "loss": 2.1152, "step": 28110 }, { "epoch": 0.58, "grad_norm": 0.4375, "learning_rate": 1.20619511519158e-05, "loss": 2.125, "step": 28120 }, { "epoch": 0.58, "grad_norm": 0.435546875, "learning_rate": 1.2051838903723286e-05, "loss": 2.1047, "step": 28130 }, { "epoch": 0.59, "grad_norm": 0.4375, "learning_rate": 1.204172804905196e-05, "loss": 2.1467, "step": 28140 }, { "epoch": 0.59, "grad_norm": 0.5859375, "learning_rate": 1.2031618592680969e-05, "loss": 2.0989, "step": 28150 }, { "epoch": 0.59, "grad_norm": 0.435546875, "learning_rate": 1.2021510539388788e-05, "loss": 2.1089, "step": 28160 }, { "epoch": 0.59, "grad_norm": 0.451171875, "learning_rate": 1.2011403893953246e-05, "loss": 2.1107, "step": 28170 }, { "epoch": 0.59, "grad_norm": 0.4453125, "learning_rate": 1.20012986611515e-05, "loss": 2.125, "step": 28180 }, { "epoch": 0.59, "grad_norm": 0.435546875, "learning_rate": 1.1991194845760031e-05, "loss": 2.0944, "step": 28190 }, { "epoch": 0.59, "grad_norm": 0.44140625, "learning_rate": 1.1981092452554665e-05, "loss": 2.1525, "step": 28200 }, { "epoch": 0.59, "grad_norm": 0.45703125, "learning_rate": 1.1970991486310543e-05, "loss": 2.1026, "step": 28210 }, { "epoch": 0.59, "grad_norm": 0.4296875, "learning_rate": 1.1960891951802137e-05, "loss": 2.1265, "step": 28220 }, { "epoch": 0.59, "grad_norm": 0.419921875, "learning_rate": 1.1950793853803238e-05, "loss": 2.1504, "step": 28230 }, { "epoch": 0.59, "grad_norm": 0.447265625, "learning_rate": 1.194069719708697e-05, "loss": 2.125, "step": 28240 }, { "epoch": 0.59, "grad_norm": 0.431640625, "learning_rate": 1.1930601986425758e-05, "loss": 2.1218, "step": 28250 }, { "epoch": 0.59, "grad_norm": 0.4765625, "learning_rate": 1.1920508226591358e-05, "loss": 2.1364, "step": 28260 }, { "epoch": 0.59, "grad_norm": 0.435546875, "learning_rate": 1.1910415922354838e-05, "loss": 2.0988, "step": 28270 }, { "epoch": 0.59, "grad_norm": 0.41796875, "learning_rate": 1.1900325078486564e-05, "loss": 2.1102, "step": 28280 }, { "epoch": 0.59, "grad_norm": 0.455078125, "learning_rate": 1.1890235699756233e-05, "loss": 2.1155, "step": 28290 }, { "epoch": 0.59, "grad_norm": 0.421875, "learning_rate": 1.1880147790932839e-05, "loss": 2.1141, "step": 28300 }, { "epoch": 0.59, "grad_norm": 0.4453125, "learning_rate": 1.1870061356784674e-05, "loss": 2.122, "step": 28310 }, { "epoch": 0.59, "grad_norm": 0.470703125, "learning_rate": 1.1859976402079346e-05, "loss": 2.1254, "step": 28320 }, { "epoch": 0.59, "grad_norm": 0.435546875, "learning_rate": 1.1849892931583755e-05, "loss": 2.1076, "step": 28330 }, { "epoch": 0.59, "grad_norm": 0.419921875, "learning_rate": 1.183981095006411e-05, "loss": 2.1195, "step": 28340 }, { "epoch": 0.59, "grad_norm": 0.435546875, "learning_rate": 1.1829730462285895e-05, "loss": 2.1006, "step": 28350 }, { "epoch": 0.59, "grad_norm": 0.43359375, "learning_rate": 1.181965147301391e-05, "loss": 2.1458, "step": 28360 }, { "epoch": 0.59, "grad_norm": 0.431640625, "learning_rate": 1.1809573987012246e-05, "loss": 2.0965, "step": 28370 }, { "epoch": 0.59, "grad_norm": 0.4296875, "learning_rate": 1.1799498009044258e-05, "loss": 2.1124, "step": 28380 }, { "epoch": 0.59, "grad_norm": 0.439453125, "learning_rate": 1.1789423543872616e-05, "loss": 2.1053, "step": 28390 }, { "epoch": 0.59, "grad_norm": 0.431640625, "learning_rate": 1.1779350596259271e-05, "loss": 2.1208, "step": 28400 }, { "epoch": 0.59, "grad_norm": 0.427734375, "learning_rate": 1.1769279170965439e-05, "loss": 2.0985, "step": 28410 }, { "epoch": 0.59, "grad_norm": 0.42578125, "learning_rate": 1.175920927275163e-05, "loss": 2.1096, "step": 28420 }, { "epoch": 0.59, "grad_norm": 0.439453125, "learning_rate": 1.1749140906377641e-05, "loss": 2.1216, "step": 28430 }, { "epoch": 0.59, "grad_norm": 0.451171875, "learning_rate": 1.1739074076602519e-05, "loss": 2.1419, "step": 28440 }, { "epoch": 0.59, "grad_norm": 0.43359375, "learning_rate": 1.172900878818461e-05, "loss": 2.1376, "step": 28450 }, { "epoch": 0.59, "grad_norm": 0.44921875, "learning_rate": 1.171894504588152e-05, "loss": 2.1519, "step": 28460 }, { "epoch": 0.59, "grad_norm": 0.43359375, "learning_rate": 1.1708882854450123e-05, "loss": 2.0907, "step": 28470 }, { "epoch": 0.59, "grad_norm": 0.427734375, "learning_rate": 1.1698822218646558e-05, "loss": 2.095, "step": 28480 }, { "epoch": 0.59, "grad_norm": 0.4375, "learning_rate": 1.1688763143226247e-05, "loss": 2.1459, "step": 28490 }, { "epoch": 0.59, "grad_norm": 0.4453125, "learning_rate": 1.1678705632943844e-05, "loss": 2.0767, "step": 28500 }, { "epoch": 0.59, "eval_accuracy": 0.5589377099813533, "eval_loss": 1.9933265447616577, "eval_runtime": 16.4552, "eval_samples_per_second": 36.159, "eval_steps_per_second": 1.155, "step": 28500 }, { "epoch": 0.59, "grad_norm": 0.447265625, "learning_rate": 1.1668649692553287e-05, "loss": 2.1222, "step": 28510 }, { "epoch": 0.59, "grad_norm": 0.423828125, "learning_rate": 1.1658595326807766e-05, "loss": 2.1317, "step": 28520 }, { "epoch": 0.59, "grad_norm": 0.455078125, "learning_rate": 1.1648542540459731e-05, "loss": 2.1365, "step": 28530 }, { "epoch": 0.59, "grad_norm": 0.431640625, "learning_rate": 1.1638491338260869e-05, "loss": 2.1198, "step": 28540 }, { "epoch": 0.59, "grad_norm": 0.458984375, "learning_rate": 1.162844172496213e-05, "loss": 2.155, "step": 28550 }, { "epoch": 0.59, "grad_norm": 0.4921875, "learning_rate": 1.1618393705313723e-05, "loss": 2.1436, "step": 28560 }, { "epoch": 0.59, "grad_norm": 0.439453125, "learning_rate": 1.160834728406508e-05, "loss": 2.1108, "step": 28570 }, { "epoch": 0.59, "grad_norm": 0.421875, "learning_rate": 1.1598302465964897e-05, "loss": 2.123, "step": 28580 }, { "epoch": 0.59, "grad_norm": 0.427734375, "learning_rate": 1.1588259255761105e-05, "loss": 2.1177, "step": 28590 }, { "epoch": 0.59, "grad_norm": 0.4375, "learning_rate": 1.1578217658200875e-05, "loss": 2.112, "step": 28600 }, { "epoch": 0.59, "grad_norm": 0.470703125, "learning_rate": 1.1568177678030612e-05, "loss": 2.1178, "step": 28610 }, { "epoch": 0.6, "grad_norm": 0.4296875, "learning_rate": 1.1558139319995971e-05, "loss": 2.1127, "step": 28620 }, { "epoch": 0.6, "grad_norm": 0.439453125, "learning_rate": 1.1548102588841817e-05, "loss": 2.1346, "step": 28630 }, { "epoch": 0.6, "grad_norm": 0.439453125, "learning_rate": 1.1538067489312264e-05, "loss": 2.1185, "step": 28640 }, { "epoch": 0.6, "grad_norm": 0.431640625, "learning_rate": 1.1528034026150658e-05, "loss": 2.1163, "step": 28650 }, { "epoch": 0.6, "grad_norm": 0.443359375, "learning_rate": 1.151800220409955e-05, "loss": 2.075, "step": 28660 }, { "epoch": 0.6, "grad_norm": 0.4296875, "learning_rate": 1.1507972027900736e-05, "loss": 2.1443, "step": 28670 }, { "epoch": 0.6, "grad_norm": 0.447265625, "learning_rate": 1.149794350229523e-05, "loss": 2.1195, "step": 28680 }, { "epoch": 0.6, "grad_norm": 0.458984375, "learning_rate": 1.1487916632023252e-05, "loss": 2.1143, "step": 28690 }, { "epoch": 0.6, "grad_norm": 0.412109375, "learning_rate": 1.147789142182426e-05, "loss": 2.0925, "step": 28700 }, { "epoch": 0.6, "grad_norm": 0.447265625, "learning_rate": 1.1467867876436909e-05, "loss": 2.1458, "step": 28710 }, { "epoch": 0.6, "grad_norm": 0.451171875, "learning_rate": 1.1457846000599086e-05, "loss": 2.1353, "step": 28720 }, { "epoch": 0.6, "grad_norm": 0.439453125, "learning_rate": 1.1447825799047868e-05, "loss": 2.0754, "step": 28730 }, { "epoch": 0.6, "grad_norm": 0.416015625, "learning_rate": 1.143780727651955e-05, "loss": 2.1034, "step": 28740 }, { "epoch": 0.6, "grad_norm": 0.439453125, "learning_rate": 1.1427790437749648e-05, "loss": 2.1467, "step": 28750 }, { "epoch": 0.6, "grad_norm": 0.4296875, "learning_rate": 1.1417775287472853e-05, "loss": 2.1286, "step": 28760 }, { "epoch": 0.6, "grad_norm": 0.443359375, "learning_rate": 1.140776183042308e-05, "loss": 2.1096, "step": 28770 }, { "epoch": 0.6, "grad_norm": 0.46875, "learning_rate": 1.139775007133344e-05, "loss": 2.1491, "step": 28780 }, { "epoch": 0.6, "grad_norm": 0.4453125, "learning_rate": 1.1387740014936229e-05, "loss": 2.1065, "step": 28790 }, { "epoch": 0.6, "grad_norm": 0.44140625, "learning_rate": 1.1377731665962954e-05, "loss": 2.1321, "step": 28800 }, { "epoch": 0.6, "grad_norm": 0.69140625, "learning_rate": 1.1367725029144315e-05, "loss": 2.101, "step": 28810 }, { "epoch": 0.6, "grad_norm": 0.4375, "learning_rate": 1.135772010921018e-05, "loss": 2.103, "step": 28820 }, { "epoch": 0.6, "grad_norm": 0.427734375, "learning_rate": 1.1347716910889635e-05, "loss": 2.115, "step": 28830 }, { "epoch": 0.6, "grad_norm": 0.443359375, "learning_rate": 1.1337715438910934e-05, "loss": 2.1064, "step": 28840 }, { "epoch": 0.6, "grad_norm": 0.435546875, "learning_rate": 1.1327715698001518e-05, "loss": 2.0888, "step": 28850 }, { "epoch": 0.6, "grad_norm": 0.447265625, "learning_rate": 1.1317717692888014e-05, "loss": 2.0912, "step": 28860 }, { "epoch": 0.6, "grad_norm": 0.427734375, "learning_rate": 1.1307721428296229e-05, "loss": 2.133, "step": 28870 }, { "epoch": 0.6, "grad_norm": 0.443359375, "learning_rate": 1.1297726908951137e-05, "loss": 2.098, "step": 28880 }, { "epoch": 0.6, "grad_norm": 0.4375, "learning_rate": 1.1287734139576898e-05, "loss": 2.081, "step": 28890 }, { "epoch": 0.6, "grad_norm": 0.453125, "learning_rate": 1.1277743124896842e-05, "loss": 2.1387, "step": 28900 }, { "epoch": 0.6, "grad_norm": 0.455078125, "learning_rate": 1.1267753869633476e-05, "loss": 2.1141, "step": 28910 }, { "epoch": 0.6, "grad_norm": 0.44921875, "learning_rate": 1.1257766378508454e-05, "loss": 2.1073, "step": 28920 }, { "epoch": 0.6, "grad_norm": 0.4375, "learning_rate": 1.1247780656242618e-05, "loss": 2.117, "step": 28930 }, { "epoch": 0.6, "grad_norm": 0.447265625, "learning_rate": 1.1237796707555972e-05, "loss": 2.1142, "step": 28940 }, { "epoch": 0.6, "grad_norm": 0.50390625, "learning_rate": 1.1227814537167666e-05, "loss": 2.0913, "step": 28950 }, { "epoch": 0.6, "grad_norm": 0.451171875, "learning_rate": 1.1217834149796028e-05, "loss": 2.1046, "step": 28960 }, { "epoch": 0.6, "grad_norm": 0.43359375, "learning_rate": 1.1207855550158526e-05, "loss": 2.1097, "step": 28970 }, { "epoch": 0.6, "grad_norm": 0.44921875, "learning_rate": 1.1197878742971799e-05, "loss": 2.1279, "step": 28980 }, { "epoch": 0.6, "grad_norm": 0.42578125, "learning_rate": 1.1187903732951624e-05, "loss": 2.1096, "step": 28990 }, { "epoch": 0.6, "grad_norm": 0.443359375, "learning_rate": 1.1177930524812946e-05, "loss": 2.1182, "step": 29000 }, { "epoch": 0.6, "eval_accuracy": 0.5591019985706893, "eval_loss": 1.993354082107544, "eval_runtime": 16.4416, "eval_samples_per_second": 36.189, "eval_steps_per_second": 1.156, "step": 29000 }, { "epoch": 0.6, "grad_norm": 0.44140625, "learning_rate": 1.1167959123269838e-05, "loss": 2.0716, "step": 29010 }, { "epoch": 0.6, "grad_norm": 0.4375, "learning_rate": 1.1157989533035534e-05, "loss": 2.0974, "step": 29020 }, { "epoch": 0.6, "grad_norm": 0.443359375, "learning_rate": 1.1148021758822412e-05, "loss": 2.1374, "step": 29030 }, { "epoch": 0.6, "grad_norm": 0.470703125, "learning_rate": 1.1138055805341977e-05, "loss": 2.1611, "step": 29040 }, { "epoch": 0.6, "grad_norm": 0.451171875, "learning_rate": 1.1128091677304888e-05, "loss": 2.1267, "step": 29050 }, { "epoch": 0.6, "grad_norm": 0.42578125, "learning_rate": 1.1118129379420942e-05, "loss": 2.1332, "step": 29060 }, { "epoch": 0.6, "grad_norm": 0.435546875, "learning_rate": 1.1108168916399054e-05, "loss": 2.0888, "step": 29070 }, { "epoch": 0.6, "grad_norm": 0.4375, "learning_rate": 1.109821029294729e-05, "loss": 2.1189, "step": 29080 }, { "epoch": 0.6, "grad_norm": 0.453125, "learning_rate": 1.1088253513772837e-05, "loss": 2.127, "step": 29090 }, { "epoch": 0.6, "grad_norm": 0.4375, "learning_rate": 1.107829858358202e-05, "loss": 2.1072, "step": 29100 }, { "epoch": 0.61, "grad_norm": 0.4296875, "learning_rate": 1.106834550708027e-05, "loss": 2.1126, "step": 29110 }, { "epoch": 0.61, "grad_norm": 0.458984375, "learning_rate": 1.105839428897216e-05, "loss": 2.1192, "step": 29120 }, { "epoch": 0.61, "grad_norm": 0.419921875, "learning_rate": 1.1048444933961387e-05, "loss": 2.0924, "step": 29130 }, { "epoch": 0.61, "grad_norm": 0.46484375, "learning_rate": 1.1038497446750746e-05, "loss": 2.1034, "step": 29140 }, { "epoch": 0.61, "grad_norm": 0.46875, "learning_rate": 1.1028551832042169e-05, "loss": 2.1216, "step": 29150 }, { "epoch": 0.61, "grad_norm": 0.43359375, "learning_rate": 1.1018608094536704e-05, "loss": 2.0961, "step": 29160 }, { "epoch": 0.61, "grad_norm": 0.4375, "learning_rate": 1.1008666238934492e-05, "loss": 2.1109, "step": 29170 }, { "epoch": 0.61, "grad_norm": 0.43359375, "learning_rate": 1.09987262699348e-05, "loss": 2.1036, "step": 29180 }, { "epoch": 0.61, "grad_norm": 0.44140625, "learning_rate": 1.0988788192236007e-05, "loss": 2.1045, "step": 29190 }, { "epoch": 0.61, "grad_norm": 0.431640625, "learning_rate": 1.097885201053558e-05, "loss": 2.1096, "step": 29200 }, { "epoch": 0.61, "grad_norm": 0.43359375, "learning_rate": 1.096891772953011e-05, "loss": 2.1355, "step": 29210 }, { "epoch": 0.61, "grad_norm": 0.4140625, "learning_rate": 1.0958985353915275e-05, "loss": 2.1055, "step": 29220 }, { "epoch": 0.61, "grad_norm": 0.50390625, "learning_rate": 1.0949054888385862e-05, "loss": 2.1157, "step": 29230 }, { "epoch": 0.61, "grad_norm": 0.49609375, "learning_rate": 1.0939126337635745e-05, "loss": 2.1231, "step": 29240 }, { "epoch": 0.61, "grad_norm": 0.41796875, "learning_rate": 1.0929199706357906e-05, "loss": 2.1276, "step": 29250 }, { "epoch": 0.61, "grad_norm": 0.427734375, "learning_rate": 1.0919274999244405e-05, "loss": 2.1238, "step": 29260 }, { "epoch": 0.61, "grad_norm": 0.4296875, "learning_rate": 1.0909352220986401e-05, "loss": 2.1075, "step": 29270 }, { "epoch": 0.61, "grad_norm": 0.43359375, "learning_rate": 1.0899431376274147e-05, "loss": 2.1133, "step": 29280 }, { "epoch": 0.61, "grad_norm": 0.451171875, "learning_rate": 1.0889512469796976e-05, "loss": 2.0715, "step": 29290 }, { "epoch": 0.61, "grad_norm": 0.5, "learning_rate": 1.0879595506243291e-05, "loss": 2.1133, "step": 29300 }, { "epoch": 0.61, "grad_norm": 0.44140625, "learning_rate": 1.0869680490300606e-05, "loss": 2.1344, "step": 29310 }, { "epoch": 0.61, "grad_norm": 0.4765625, "learning_rate": 1.0859767426655488e-05, "loss": 2.1221, "step": 29320 }, { "epoch": 0.61, "grad_norm": 0.451171875, "learning_rate": 1.0849856319993595e-05, "loss": 2.0857, "step": 29330 }, { "epoch": 0.61, "grad_norm": 0.431640625, "learning_rate": 1.0839947174999653e-05, "loss": 2.0728, "step": 29340 }, { "epoch": 0.61, "grad_norm": 0.421875, "learning_rate": 1.0830039996357473e-05, "loss": 2.1091, "step": 29350 }, { "epoch": 0.61, "grad_norm": 0.439453125, "learning_rate": 1.0820134788749916e-05, "loss": 2.1013, "step": 29360 }, { "epoch": 0.61, "grad_norm": 0.43359375, "learning_rate": 1.081023155685893e-05, "loss": 2.0933, "step": 29370 }, { "epoch": 0.61, "grad_norm": 0.443359375, "learning_rate": 1.0800330305365526e-05, "loss": 2.1147, "step": 29380 }, { "epoch": 0.61, "grad_norm": 0.455078125, "learning_rate": 1.0790431038949764e-05, "loss": 2.1455, "step": 29390 }, { "epoch": 0.61, "grad_norm": 0.46875, "learning_rate": 1.0780533762290782e-05, "loss": 2.1274, "step": 29400 }, { "epoch": 0.61, "grad_norm": 0.431640625, "learning_rate": 1.0770638480066782e-05, "loss": 2.1096, "step": 29410 }, { "epoch": 0.61, "grad_norm": 0.48828125, "learning_rate": 1.0760745196954998e-05, "loss": 2.1219, "step": 29420 }, { "epoch": 0.61, "grad_norm": 0.4765625, "learning_rate": 1.075085391763174e-05, "loss": 2.1362, "step": 29430 }, { "epoch": 0.61, "grad_norm": 0.4453125, "learning_rate": 1.0740964646772377e-05, "loss": 2.1168, "step": 29440 }, { "epoch": 0.61, "grad_norm": 0.4453125, "learning_rate": 1.07310773890513e-05, "loss": 2.1243, "step": 29450 }, { "epoch": 0.61, "grad_norm": 0.435546875, "learning_rate": 1.0721192149141976e-05, "loss": 2.1261, "step": 29460 }, { "epoch": 0.61, "grad_norm": 0.447265625, "learning_rate": 1.0711308931716903e-05, "loss": 2.1282, "step": 29470 }, { "epoch": 0.61, "grad_norm": 0.494140625, "learning_rate": 1.0701427741447635e-05, "loss": 2.1249, "step": 29480 }, { "epoch": 0.61, "grad_norm": 0.427734375, "learning_rate": 1.0691548583004752e-05, "loss": 2.1242, "step": 29490 }, { "epoch": 0.61, "grad_norm": 0.4375, "learning_rate": 1.0681671461057884e-05, "loss": 2.1277, "step": 29500 }, { "epoch": 0.61, "eval_accuracy": 0.5591184274296229, "eval_loss": 1.9932727813720703, "eval_runtime": 16.4505, "eval_samples_per_second": 36.169, "eval_steps_per_second": 1.155, "step": 29500 }, { "epoch": 0.61, "grad_norm": 0.46484375, "learning_rate": 1.0671796380275705e-05, "loss": 2.1436, "step": 29510 }, { "epoch": 0.61, "grad_norm": 0.4375, "learning_rate": 1.0661923345325905e-05, "loss": 2.1217, "step": 29520 }, { "epoch": 0.61, "grad_norm": 0.4375, "learning_rate": 1.0652052360875222e-05, "loss": 2.1073, "step": 29530 }, { "epoch": 0.61, "grad_norm": 0.435546875, "learning_rate": 1.0642183431589429e-05, "loss": 2.1244, "step": 29540 }, { "epoch": 0.61, "grad_norm": 0.44140625, "learning_rate": 1.0632316562133306e-05, "loss": 2.1187, "step": 29550 }, { "epoch": 0.61, "grad_norm": 0.42578125, "learning_rate": 1.0622451757170674e-05, "loss": 2.1234, "step": 29560 }, { "epoch": 0.61, "grad_norm": 0.44140625, "learning_rate": 1.0612589021364392e-05, "loss": 2.1303, "step": 29570 }, { "epoch": 0.61, "grad_norm": 0.4375, "learning_rate": 1.0602728359376307e-05, "loss": 2.0886, "step": 29580 }, { "epoch": 0.62, "grad_norm": 0.443359375, "learning_rate": 1.0592869775867316e-05, "loss": 2.1196, "step": 29590 }, { "epoch": 0.62, "grad_norm": 0.4375, "learning_rate": 1.0583013275497318e-05, "loss": 2.1159, "step": 29600 }, { "epoch": 0.62, "grad_norm": 0.44921875, "learning_rate": 1.0573158862925233e-05, "loss": 2.1427, "step": 29610 }, { "epoch": 0.62, "grad_norm": 0.474609375, "learning_rate": 1.0563306542808988e-05, "loss": 2.1101, "step": 29620 }, { "epoch": 0.62, "grad_norm": 0.43359375, "learning_rate": 1.0553456319805535e-05, "loss": 2.1227, "step": 29630 }, { "epoch": 0.62, "grad_norm": 0.451171875, "learning_rate": 1.0543608198570815e-05, "loss": 2.1621, "step": 29640 }, { "epoch": 0.62, "grad_norm": 0.44140625, "learning_rate": 1.0533762183759788e-05, "loss": 2.1057, "step": 29650 }, { "epoch": 0.62, "grad_norm": 0.5546875, "learning_rate": 1.052391828002642e-05, "loss": 2.1247, "step": 29660 }, { "epoch": 0.62, "grad_norm": 0.42578125, "learning_rate": 1.0514076492023678e-05, "loss": 2.1071, "step": 29670 }, { "epoch": 0.62, "grad_norm": 0.462890625, "learning_rate": 1.0504236824403517e-05, "loss": 2.1102, "step": 29680 }, { "epoch": 0.62, "grad_norm": 0.455078125, "learning_rate": 1.0494399281816907e-05, "loss": 2.1103, "step": 29690 }, { "epoch": 0.62, "grad_norm": 0.470703125, "learning_rate": 1.0484563868913804e-05, "loss": 2.1005, "step": 29700 }, { "epoch": 0.62, "grad_norm": 0.4375, "learning_rate": 1.0474730590343155e-05, "loss": 2.1277, "step": 29710 }, { "epoch": 0.62, "grad_norm": 0.42578125, "learning_rate": 1.0464899450752905e-05, "loss": 2.1271, "step": 29720 }, { "epoch": 0.62, "grad_norm": 0.431640625, "learning_rate": 1.045507045478999e-05, "loss": 2.125, "step": 29730 }, { "epoch": 0.62, "grad_norm": 0.45703125, "learning_rate": 1.0445243607100315e-05, "loss": 2.1203, "step": 29740 }, { "epoch": 0.62, "grad_norm": 0.4453125, "learning_rate": 1.0435418912328789e-05, "loss": 2.1179, "step": 29750 }, { "epoch": 0.62, "grad_norm": 0.419921875, "learning_rate": 1.0425596375119306e-05, "loss": 2.1003, "step": 29760 }, { "epoch": 0.62, "grad_norm": 0.462890625, "learning_rate": 1.0415776000114716e-05, "loss": 2.0789, "step": 29770 }, { "epoch": 0.62, "grad_norm": 0.416015625, "learning_rate": 1.0405957791956867e-05, "loss": 2.1184, "step": 29780 }, { "epoch": 0.62, "grad_norm": 0.439453125, "learning_rate": 1.0396141755286586e-05, "loss": 2.1058, "step": 29790 }, { "epoch": 0.62, "grad_norm": 0.4375, "learning_rate": 1.0386327894743653e-05, "loss": 2.1393, "step": 29800 }, { "epoch": 0.62, "grad_norm": 0.4296875, "learning_rate": 1.0376516214966835e-05, "loss": 2.1409, "step": 29810 }, { "epoch": 0.62, "grad_norm": 0.4375, "learning_rate": 1.0366706720593874e-05, "loss": 2.1125, "step": 29820 }, { "epoch": 0.62, "grad_norm": 0.4296875, "learning_rate": 1.0356899416261457e-05, "loss": 2.1123, "step": 29830 }, { "epoch": 0.62, "grad_norm": 0.4375, "learning_rate": 1.0347094306605257e-05, "loss": 2.102, "step": 29840 }, { "epoch": 0.62, "grad_norm": 0.421875, "learning_rate": 1.0337291396259897e-05, "loss": 2.1292, "step": 29850 }, { "epoch": 0.62, "grad_norm": 0.435546875, "learning_rate": 1.0327490689858974e-05, "loss": 2.1375, "step": 29860 }, { "epoch": 0.62, "grad_norm": 0.439453125, "learning_rate": 1.0317692192035016e-05, "loss": 2.1037, "step": 29870 }, { "epoch": 0.62, "grad_norm": 0.447265625, "learning_rate": 1.030789590741954e-05, "loss": 2.1209, "step": 29880 }, { "epoch": 0.62, "grad_norm": 0.5390625, "learning_rate": 1.0298101840643003e-05, "loss": 2.1366, "step": 29890 }, { "epoch": 0.62, "grad_norm": 0.494140625, "learning_rate": 1.0288309996334802e-05, "loss": 2.0962, "step": 29900 }, { "epoch": 0.62, "grad_norm": 0.43359375, "learning_rate": 1.0278520379123297e-05, "loss": 2.135, "step": 29910 }, { "epoch": 0.62, "grad_norm": 0.423828125, "learning_rate": 1.0268732993635803e-05, "loss": 2.1327, "step": 29920 }, { "epoch": 0.62, "grad_norm": 0.43359375, "learning_rate": 1.0258947844498555e-05, "loss": 2.1022, "step": 29930 }, { "epoch": 0.62, "grad_norm": 0.451171875, "learning_rate": 1.0249164936336752e-05, "loss": 2.1302, "step": 29940 }, { "epoch": 0.62, "grad_norm": 0.431640625, "learning_rate": 1.0239384273774532e-05, "loss": 2.1288, "step": 29950 }, { "epoch": 0.62, "grad_norm": 0.443359375, "learning_rate": 1.0229605861434957e-05, "loss": 2.1141, "step": 29960 }, { "epoch": 0.62, "grad_norm": 0.419921875, "learning_rate": 1.0219829703940047e-05, "loss": 2.1062, "step": 29970 }, { "epoch": 0.62, "grad_norm": 0.439453125, "learning_rate": 1.0210055805910734e-05, "loss": 2.1297, "step": 29980 }, { "epoch": 0.62, "grad_norm": 0.42578125, "learning_rate": 1.0200284171966896e-05, "loss": 2.1158, "step": 29990 }, { "epoch": 0.62, "grad_norm": 0.43359375, "learning_rate": 1.0190514806727339e-05, "loss": 2.1407, "step": 30000 }, { "epoch": 0.62, "eval_accuracy": 0.5591019985706893, "eval_loss": 1.9932328462600708, "eval_runtime": 16.4688, "eval_samples_per_second": 36.129, "eval_steps_per_second": 1.154, "step": 30000 }, { "epoch": 0.62, "grad_norm": 0.470703125, "learning_rate": 1.0180747714809794e-05, "loss": 2.1311, "step": 30010 }, { "epoch": 0.62, "grad_norm": 0.474609375, "learning_rate": 1.0170982900830914e-05, "loss": 2.0843, "step": 30020 }, { "epoch": 0.62, "grad_norm": 0.458984375, "learning_rate": 1.0161220369406283e-05, "loss": 2.0854, "step": 30030 }, { "epoch": 0.62, "grad_norm": 0.447265625, "learning_rate": 1.0151460125150402e-05, "loss": 2.1222, "step": 30040 }, { "epoch": 0.62, "grad_norm": 0.435546875, "learning_rate": 1.01417021726767e-05, "loss": 2.1272, "step": 30050 }, { "epoch": 0.62, "grad_norm": 0.44140625, "learning_rate": 1.01319465165975e-05, "loss": 2.1244, "step": 30060 }, { "epoch": 0.63, "grad_norm": 0.43359375, "learning_rate": 1.0122193161524064e-05, "loss": 2.1287, "step": 30070 }, { "epoch": 0.63, "grad_norm": 0.44140625, "learning_rate": 1.0112442112066552e-05, "loss": 2.1257, "step": 30080 }, { "epoch": 0.63, "grad_norm": 0.427734375, "learning_rate": 1.0102693372834042e-05, "loss": 2.1363, "step": 30090 }, { "epoch": 0.63, "grad_norm": 0.44921875, "learning_rate": 1.009294694843451e-05, "loss": 2.1155, "step": 30100 }, { "epoch": 0.63, "grad_norm": 0.431640625, "learning_rate": 1.0083202843474852e-05, "loss": 2.0924, "step": 30110 }, { "epoch": 0.63, "grad_norm": 0.42578125, "learning_rate": 1.0073461062560853e-05, "loss": 2.1383, "step": 30120 }, { "epoch": 0.63, "grad_norm": 0.44921875, "learning_rate": 1.0063721610297207e-05, "loss": 2.1162, "step": 30130 }, { "epoch": 0.63, "grad_norm": 0.421875, "learning_rate": 1.0053984491287517e-05, "loss": 2.1221, "step": 30140 }, { "epoch": 0.63, "grad_norm": 0.421875, "learning_rate": 1.004424971013426e-05, "loss": 2.1075, "step": 30150 }, { "epoch": 0.63, "grad_norm": 0.42578125, "learning_rate": 1.0034517271438826e-05, "loss": 2.1451, "step": 30160 }, { "epoch": 0.63, "grad_norm": 0.431640625, "learning_rate": 1.00247871798015e-05, "loss": 2.1068, "step": 30170 }, { "epoch": 0.63, "grad_norm": 0.4296875, "learning_rate": 1.0015059439821434e-05, "loss": 2.1205, "step": 30180 }, { "epoch": 0.63, "grad_norm": 0.458984375, "learning_rate": 1.0005334056096695e-05, "loss": 2.1149, "step": 30190 }, { "epoch": 0.63, "grad_norm": 0.431640625, "learning_rate": 9.995611033224234e-06, "loss": 2.1312, "step": 30200 }, { "epoch": 0.63, "grad_norm": 0.4296875, "learning_rate": 9.985890375799858e-06, "loss": 2.1447, "step": 30210 }, { "epoch": 0.63, "grad_norm": 0.44140625, "learning_rate": 9.976172088418291e-06, "loss": 2.1118, "step": 30220 }, { "epoch": 0.63, "grad_norm": 0.435546875, "learning_rate": 9.966456175673114e-06, "loss": 2.1096, "step": 30230 }, { "epoch": 0.63, "grad_norm": 0.419921875, "learning_rate": 9.956742642156803e-06, "loss": 2.1126, "step": 30240 }, { "epoch": 0.63, "grad_norm": 0.466796875, "learning_rate": 9.947031492460686e-06, "loss": 2.1153, "step": 30250 }, { "epoch": 0.63, "grad_norm": 0.431640625, "learning_rate": 9.937322731174985e-06, "loss": 2.1039, "step": 30260 }, { "epoch": 0.63, "grad_norm": 0.42578125, "learning_rate": 9.927616362888795e-06, "loss": 2.1111, "step": 30270 }, { "epoch": 0.63, "grad_norm": 0.455078125, "learning_rate": 9.917912392190051e-06, "loss": 2.129, "step": 30280 }, { "epoch": 0.63, "grad_norm": 0.44140625, "learning_rate": 9.908210823665587e-06, "loss": 2.0921, "step": 30290 }, { "epoch": 0.63, "grad_norm": 0.42578125, "learning_rate": 9.898511661901095e-06, "loss": 2.0968, "step": 30300 }, { "epoch": 0.63, "grad_norm": 0.423828125, "learning_rate": 9.888814911481105e-06, "loss": 2.1397, "step": 30310 }, { "epoch": 0.63, "grad_norm": 0.45703125, "learning_rate": 9.879120576989045e-06, "loss": 2.099, "step": 30320 }, { "epoch": 0.63, "grad_norm": 0.462890625, "learning_rate": 9.86942866300717e-06, "loss": 2.1389, "step": 30330 }, { "epoch": 0.63, "grad_norm": 0.421875, "learning_rate": 9.859739174116606e-06, "loss": 2.1263, "step": 30340 }, { "epoch": 0.63, "grad_norm": 0.44140625, "learning_rate": 9.850052114897325e-06, "loss": 2.1329, "step": 30350 }, { "epoch": 0.63, "grad_norm": 0.421875, "learning_rate": 9.840367489928166e-06, "loss": 2.0913, "step": 30360 }, { "epoch": 0.63, "grad_norm": 0.443359375, "learning_rate": 9.830685303786792e-06, "loss": 2.1319, "step": 30370 }, { "epoch": 0.63, "grad_norm": 0.44921875, "learning_rate": 9.821005561049735e-06, "loss": 2.1326, "step": 30380 }, { "epoch": 0.63, "grad_norm": 0.443359375, "learning_rate": 9.811328266292373e-06, "loss": 2.096, "step": 30390 }, { "epoch": 0.63, "grad_norm": 0.423828125, "learning_rate": 9.8016534240889e-06, "loss": 2.1223, "step": 30400 }, { "epoch": 0.63, "grad_norm": 0.578125, "learning_rate": 9.791981039012381e-06, "loss": 2.1531, "step": 30410 }, { "epoch": 0.63, "grad_norm": 0.439453125, "learning_rate": 9.782311115634706e-06, "loss": 2.1453, "step": 30420 }, { "epoch": 0.63, "grad_norm": 0.46875, "learning_rate": 9.772643658526608e-06, "loss": 2.1339, "step": 30430 }, { "epoch": 0.63, "grad_norm": 0.4296875, "learning_rate": 9.762978672257639e-06, "loss": 2.1072, "step": 30440 }, { "epoch": 0.63, "grad_norm": 0.4453125, "learning_rate": 9.753316161396201e-06, "loss": 2.0828, "step": 30450 }, { "epoch": 0.63, "grad_norm": 0.43359375, "learning_rate": 9.743656130509518e-06, "loss": 2.1321, "step": 30460 }, { "epoch": 0.63, "grad_norm": 0.435546875, "learning_rate": 9.733998584163641e-06, "loss": 2.1196, "step": 30470 }, { "epoch": 0.63, "grad_norm": 0.419921875, "learning_rate": 9.724343526923442e-06, "loss": 2.1043, "step": 30480 }, { "epoch": 0.63, "grad_norm": 0.421875, "learning_rate": 9.714690963352637e-06, "loss": 2.1051, "step": 30490 }, { "epoch": 0.63, "grad_norm": 0.427734375, "learning_rate": 9.705040898013733e-06, "loss": 2.1222, "step": 30500 }, { "epoch": 0.63, "eval_accuracy": 0.5590510691079951, "eval_loss": 1.9932135343551636, "eval_runtime": 16.4428, "eval_samples_per_second": 36.186, "eval_steps_per_second": 1.156, "step": 30500 }, { "epoch": 0.63, "grad_norm": 0.4296875, "learning_rate": 9.695393335468078e-06, "loss": 2.1234, "step": 30510 }, { "epoch": 0.63, "grad_norm": 0.435546875, "learning_rate": 9.685748280275837e-06, "loss": 2.1049, "step": 30520 }, { "epoch": 0.63, "grad_norm": 0.4375, "learning_rate": 9.676105736995972e-06, "loss": 2.1076, "step": 30530 }, { "epoch": 0.63, "grad_norm": 0.43359375, "learning_rate": 9.666465710186276e-06, "loss": 2.1172, "step": 30540 }, { "epoch": 0.64, "grad_norm": 0.43359375, "learning_rate": 9.656828204403352e-06, "loss": 2.1073, "step": 30550 }, { "epoch": 0.64, "grad_norm": 0.421875, "learning_rate": 9.647193224202591e-06, "loss": 2.1068, "step": 30560 }, { "epoch": 0.64, "grad_norm": 0.421875, "learning_rate": 9.637560774138213e-06, "loss": 2.0964, "step": 30570 }, { "epoch": 0.64, "grad_norm": 0.447265625, "learning_rate": 9.627930858763237e-06, "loss": 2.1041, "step": 30580 }, { "epoch": 0.64, "grad_norm": 0.455078125, "learning_rate": 9.618303482629472e-06, "loss": 2.1592, "step": 30590 }, { "epoch": 0.64, "grad_norm": 0.43359375, "learning_rate": 9.608678650287541e-06, "loss": 2.1262, "step": 30600 }, { "epoch": 0.64, "grad_norm": 0.453125, "learning_rate": 9.599056366286854e-06, "loss": 2.1262, "step": 30610 }, { "epoch": 0.64, "grad_norm": 0.4375, "learning_rate": 9.58943663517563e-06, "loss": 2.1378, "step": 30620 }, { "epoch": 0.64, "grad_norm": 0.435546875, "learning_rate": 9.579819461500861e-06, "loss": 2.1316, "step": 30630 }, { "epoch": 0.64, "grad_norm": 0.435546875, "learning_rate": 9.570204849808345e-06, "loss": 2.1083, "step": 30640 }, { "epoch": 0.64, "grad_norm": 0.484375, "learning_rate": 9.560592804642678e-06, "loss": 2.1088, "step": 30650 }, { "epoch": 0.64, "grad_norm": 0.443359375, "learning_rate": 9.550983330547212e-06, "loss": 2.1397, "step": 30660 }, { "epoch": 0.64, "grad_norm": 0.4765625, "learning_rate": 9.541376432064113e-06, "loss": 2.1238, "step": 30670 }, { "epoch": 0.64, "grad_norm": 0.451171875, "learning_rate": 9.531772113734325e-06, "loss": 2.1097, "step": 30680 }, { "epoch": 0.64, "grad_norm": 0.46875, "learning_rate": 9.52217038009755e-06, "loss": 2.1322, "step": 30690 }, { "epoch": 0.64, "grad_norm": 0.443359375, "learning_rate": 9.512571235692296e-06, "loss": 2.1074, "step": 30700 }, { "epoch": 0.64, "grad_norm": 0.46484375, "learning_rate": 9.502974685055835e-06, "loss": 2.1342, "step": 30710 }, { "epoch": 0.64, "grad_norm": 0.46875, "learning_rate": 9.493380732724211e-06, "loss": 2.137, "step": 30720 }, { "epoch": 0.64, "grad_norm": 0.43359375, "learning_rate": 9.483789383232241e-06, "loss": 2.1163, "step": 30730 }, { "epoch": 0.64, "grad_norm": 0.43359375, "learning_rate": 9.47420064111352e-06, "loss": 2.1196, "step": 30740 }, { "epoch": 0.64, "grad_norm": 0.423828125, "learning_rate": 9.464614510900396e-06, "loss": 2.1264, "step": 30750 }, { "epoch": 0.64, "grad_norm": 0.4375, "learning_rate": 9.455030997123993e-06, "loss": 2.0968, "step": 30760 }, { "epoch": 0.64, "grad_norm": 0.439453125, "learning_rate": 9.4454501043142e-06, "loss": 2.1388, "step": 30770 }, { "epoch": 0.64, "grad_norm": 0.435546875, "learning_rate": 9.435871836999657e-06, "loss": 2.0876, "step": 30780 }, { "epoch": 0.64, "grad_norm": 0.4609375, "learning_rate": 9.426296199707769e-06, "loss": 2.1163, "step": 30790 }, { "epoch": 0.64, "grad_norm": 0.455078125, "learning_rate": 9.4167231969647e-06, "loss": 2.1536, "step": 30800 }, { "epoch": 0.64, "grad_norm": 0.419921875, "learning_rate": 9.407152833295373e-06, "loss": 2.0823, "step": 30810 }, { "epoch": 0.64, "grad_norm": 0.4375, "learning_rate": 9.397585113223441e-06, "loss": 2.1023, "step": 30820 }, { "epoch": 0.64, "grad_norm": 0.439453125, "learning_rate": 9.388020041271338e-06, "loss": 2.1229, "step": 30830 }, { "epoch": 0.64, "grad_norm": 0.431640625, "learning_rate": 9.378457621960224e-06, "loss": 2.0858, "step": 30840 }, { "epoch": 0.64, "grad_norm": 0.478515625, "learning_rate": 9.368897859810015e-06, "loss": 2.1202, "step": 30850 }, { "epoch": 0.64, "grad_norm": 0.462890625, "learning_rate": 9.359340759339365e-06, "loss": 2.1152, "step": 30860 }, { "epoch": 0.64, "grad_norm": 0.439453125, "learning_rate": 9.349786325065682e-06, "loss": 2.0813, "step": 30870 }, { "epoch": 0.64, "grad_norm": 0.41015625, "learning_rate": 9.340234561505094e-06, "loss": 2.1322, "step": 30880 }, { "epoch": 0.64, "grad_norm": 0.416015625, "learning_rate": 9.330685473172485e-06, "loss": 2.1322, "step": 30890 }, { "epoch": 0.64, "grad_norm": 0.435546875, "learning_rate": 9.321139064581473e-06, "loss": 2.107, "step": 30900 }, { "epoch": 0.64, "grad_norm": 0.43359375, "learning_rate": 9.311595340244392e-06, "loss": 2.1386, "step": 30910 }, { "epoch": 0.64, "grad_norm": 0.458984375, "learning_rate": 9.302054304672324e-06, "loss": 2.1254, "step": 30920 }, { "epoch": 0.64, "grad_norm": 0.451171875, "learning_rate": 9.292515962375081e-06, "loss": 2.1013, "step": 30930 }, { "epoch": 0.64, "grad_norm": 0.439453125, "learning_rate": 9.282980317861188e-06, "loss": 2.0934, "step": 30940 }, { "epoch": 0.64, "grad_norm": 0.427734375, "learning_rate": 9.273447375637903e-06, "loss": 2.1306, "step": 30950 }, { "epoch": 0.64, "grad_norm": 0.50390625, "learning_rate": 9.263917140211218e-06, "loss": 2.1026, "step": 30960 }, { "epoch": 0.64, "grad_norm": 0.4453125, "learning_rate": 9.254389616085822e-06, "loss": 2.0771, "step": 30970 }, { "epoch": 0.64, "grad_norm": 0.443359375, "learning_rate": 9.244864807765137e-06, "loss": 2.0995, "step": 30980 }, { "epoch": 0.64, "grad_norm": 0.443359375, "learning_rate": 9.235342719751303e-06, "loss": 2.1427, "step": 30990 }, { "epoch": 0.64, "grad_norm": 0.4375, "learning_rate": 9.22582335654517e-06, "loss": 2.1146, "step": 31000 }, { "epoch": 0.64, "eval_accuracy": 0.5590855697117557, "eval_loss": 1.9931367635726929, "eval_runtime": 16.4389, "eval_samples_per_second": 36.195, "eval_steps_per_second": 1.156, "step": 31000 }, { "epoch": 0.64, "grad_norm": 0.427734375, "learning_rate": 9.216306722646294e-06, "loss": 2.1145, "step": 31010 }, { "epoch": 0.64, "grad_norm": 0.49609375, "learning_rate": 9.20679282255295e-06, "loss": 2.1444, "step": 31020 }, { "epoch": 0.65, "grad_norm": 0.46484375, "learning_rate": 9.197281660762127e-06, "loss": 2.0652, "step": 31030 }, { "epoch": 0.65, "grad_norm": 0.423828125, "learning_rate": 9.187773241769498e-06, "loss": 2.114, "step": 31040 }, { "epoch": 0.65, "grad_norm": 0.41796875, "learning_rate": 9.178267570069455e-06, "loss": 2.1009, "step": 31050 }, { "epoch": 0.65, "grad_norm": 0.4453125, "learning_rate": 9.168764650155096e-06, "loss": 2.119, "step": 31060 }, { "epoch": 0.65, "grad_norm": 0.451171875, "learning_rate": 9.1592644865182e-06, "loss": 2.138, "step": 31070 }, { "epoch": 0.65, "grad_norm": 0.4296875, "learning_rate": 9.149767083649265e-06, "loss": 2.1119, "step": 31080 }, { "epoch": 0.65, "grad_norm": 0.447265625, "learning_rate": 9.140272446037467e-06, "loss": 2.1347, "step": 31090 }, { "epoch": 0.65, "grad_norm": 0.416015625, "learning_rate": 9.130780578170681e-06, "loss": 2.0966, "step": 31100 }, { "epoch": 0.65, "grad_norm": 0.447265625, "learning_rate": 9.12129148453547e-06, "loss": 2.1309, "step": 31110 }, { "epoch": 0.65, "grad_norm": 0.43359375, "learning_rate": 9.111805169617101e-06, "loss": 2.1442, "step": 31120 }, { "epoch": 0.65, "grad_norm": 0.43359375, "learning_rate": 9.102321637899498e-06, "loss": 2.0949, "step": 31130 }, { "epoch": 0.65, "grad_norm": 0.4296875, "learning_rate": 9.092840893865297e-06, "loss": 2.1191, "step": 31140 }, { "epoch": 0.65, "grad_norm": 0.431640625, "learning_rate": 9.08336294199581e-06, "loss": 2.1242, "step": 31150 }, { "epoch": 0.65, "grad_norm": 0.46875, "learning_rate": 9.073887786771011e-06, "loss": 2.115, "step": 31160 }, { "epoch": 0.65, "grad_norm": 0.443359375, "learning_rate": 9.064415432669576e-06, "loss": 2.1381, "step": 31170 }, { "epoch": 0.65, "grad_norm": 0.421875, "learning_rate": 9.054945884168844e-06, "loss": 2.0843, "step": 31180 }, { "epoch": 0.65, "grad_norm": 0.44921875, "learning_rate": 9.04547914574484e-06, "loss": 2.0895, "step": 31190 }, { "epoch": 0.65, "grad_norm": 0.423828125, "learning_rate": 9.03601522187224e-06, "loss": 2.1387, "step": 31200 }, { "epoch": 0.65, "grad_norm": 0.42578125, "learning_rate": 9.026554117024405e-06, "loss": 2.0937, "step": 31210 }, { "epoch": 0.65, "grad_norm": 0.427734375, "learning_rate": 9.017095835673363e-06, "loss": 2.1074, "step": 31220 }, { "epoch": 0.65, "grad_norm": 0.4375, "learning_rate": 9.007640382289804e-06, "loss": 2.1211, "step": 31230 }, { "epoch": 0.65, "grad_norm": 0.439453125, "learning_rate": 8.998187761343075e-06, "loss": 2.1179, "step": 31240 }, { "epoch": 0.65, "grad_norm": 0.421875, "learning_rate": 8.9887379773012e-06, "loss": 2.0998, "step": 31250 }, { "epoch": 0.65, "grad_norm": 0.431640625, "learning_rate": 8.979291034630845e-06, "loss": 2.1055, "step": 31260 }, { "epoch": 0.65, "grad_norm": 0.4296875, "learning_rate": 8.969846937797346e-06, "loss": 2.0696, "step": 31270 }, { "epoch": 0.65, "grad_norm": 0.43359375, "learning_rate": 8.960405691264691e-06, "loss": 2.1269, "step": 31280 }, { "epoch": 0.65, "grad_norm": 0.4375, "learning_rate": 8.95096729949551e-06, "loss": 2.1051, "step": 31290 }, { "epoch": 0.65, "grad_norm": 0.4375, "learning_rate": 8.941531766951097e-06, "loss": 2.1113, "step": 31300 }, { "epoch": 0.65, "grad_norm": 0.439453125, "learning_rate": 8.932099098091395e-06, "loss": 2.1291, "step": 31310 }, { "epoch": 0.65, "grad_norm": 0.439453125, "learning_rate": 8.922669297374976e-06, "loss": 2.1434, "step": 31320 }, { "epoch": 0.65, "grad_norm": 0.43359375, "learning_rate": 8.913242369259076e-06, "loss": 2.0872, "step": 31330 }, { "epoch": 0.65, "grad_norm": 0.41796875, "learning_rate": 8.903818318199569e-06, "loss": 2.0831, "step": 31340 }, { "epoch": 0.65, "grad_norm": 0.4296875, "learning_rate": 8.894397148650958e-06, "loss": 2.1078, "step": 31350 }, { "epoch": 0.65, "grad_norm": 0.412109375, "learning_rate": 8.88497886506639e-06, "loss": 2.0866, "step": 31360 }, { "epoch": 0.65, "grad_norm": 0.435546875, "learning_rate": 8.875563471897663e-06, "loss": 2.1092, "step": 31370 }, { "epoch": 0.65, "grad_norm": 0.447265625, "learning_rate": 8.866150973595179e-06, "loss": 2.1217, "step": 31380 }, { "epoch": 0.65, "grad_norm": 0.42578125, "learning_rate": 8.856741374607992e-06, "loss": 2.1097, "step": 31390 }, { "epoch": 0.65, "grad_norm": 0.439453125, "learning_rate": 8.847334679383789e-06, "loss": 2.0865, "step": 31400 }, { "epoch": 0.65, "grad_norm": 0.46875, "learning_rate": 8.837930892368876e-06, "loss": 2.1002, "step": 31410 }, { "epoch": 0.65, "grad_norm": 0.6875, "learning_rate": 8.828530018008175e-06, "loss": 2.1029, "step": 31420 }, { "epoch": 0.65, "grad_norm": 0.435546875, "learning_rate": 8.819132060745248e-06, "loss": 2.0935, "step": 31430 }, { "epoch": 0.65, "grad_norm": 0.46484375, "learning_rate": 8.809737025022279e-06, "loss": 2.1301, "step": 31440 }, { "epoch": 0.65, "grad_norm": 0.4296875, "learning_rate": 8.80034491528005e-06, "loss": 2.1212, "step": 31450 }, { "epoch": 0.65, "grad_norm": 0.439453125, "learning_rate": 8.790955735957984e-06, "loss": 2.117, "step": 31460 }, { "epoch": 0.65, "grad_norm": 0.4296875, "learning_rate": 8.781569491494104e-06, "loss": 2.1244, "step": 31470 }, { "epoch": 0.65, "grad_norm": 0.451171875, "learning_rate": 8.772186186325053e-06, "loss": 2.1206, "step": 31480 }, { "epoch": 0.65, "grad_norm": 0.439453125, "learning_rate": 8.762805824886077e-06, "loss": 2.1079, "step": 31490 }, { "epoch": 0.65, "grad_norm": 0.453125, "learning_rate": 8.753428411611046e-06, "loss": 2.1441, "step": 31500 }, { "epoch": 0.65, "eval_accuracy": 0.5591299276308764, "eval_loss": 1.9931890964508057, "eval_runtime": 16.4439, "eval_samples_per_second": 36.184, "eval_steps_per_second": 1.155, "step": 31500 }, { "epoch": 0.66, "grad_norm": 0.4296875, "learning_rate": 8.744053950932412e-06, "loss": 2.1286, "step": 31510 }, { "epoch": 0.66, "grad_norm": 0.458984375, "learning_rate": 8.73468244728125e-06, "loss": 2.1114, "step": 31520 }, { "epoch": 0.66, "grad_norm": 0.4453125, "learning_rate": 8.725313905087245e-06, "loss": 2.1194, "step": 31530 }, { "epoch": 0.66, "grad_norm": 0.490234375, "learning_rate": 8.715948328778651e-06, "loss": 2.1349, "step": 31540 }, { "epoch": 0.66, "grad_norm": 0.4296875, "learning_rate": 8.706585722782351e-06, "loss": 2.1535, "step": 31550 }, { "epoch": 0.66, "grad_norm": 0.515625, "learning_rate": 8.697226091523814e-06, "loss": 2.108, "step": 31560 }, { "epoch": 0.66, "grad_norm": 0.431640625, "learning_rate": 8.687869439427092e-06, "loss": 2.0905, "step": 31570 }, { "epoch": 0.66, "grad_norm": 0.49609375, "learning_rate": 8.67851577091484e-06, "loss": 2.1003, "step": 31580 }, { "epoch": 0.66, "grad_norm": 0.431640625, "learning_rate": 8.669165090408303e-06, "loss": 2.1145, "step": 31590 }, { "epoch": 0.66, "grad_norm": 0.4375, "learning_rate": 8.659817402327317e-06, "loss": 2.1379, "step": 31600 }, { "epoch": 0.66, "grad_norm": 0.43359375, "learning_rate": 8.650472711090288e-06, "loss": 2.1164, "step": 31610 }, { "epoch": 0.66, "grad_norm": 0.447265625, "learning_rate": 8.641131021114216e-06, "loss": 2.1077, "step": 31620 }, { "epoch": 0.66, "grad_norm": 0.47265625, "learning_rate": 8.631792336814692e-06, "loss": 2.1436, "step": 31630 }, { "epoch": 0.66, "grad_norm": 0.458984375, "learning_rate": 8.62245666260586e-06, "loss": 2.1146, "step": 31640 }, { "epoch": 0.66, "grad_norm": 0.474609375, "learning_rate": 8.613124002900472e-06, "loss": 2.1544, "step": 31650 }, { "epoch": 0.66, "grad_norm": 0.578125, "learning_rate": 8.60379436210983e-06, "loss": 2.1234, "step": 31660 }, { "epoch": 0.66, "grad_norm": 0.4375, "learning_rate": 8.594467744643827e-06, "loss": 2.1442, "step": 31670 }, { "epoch": 0.66, "grad_norm": 0.431640625, "learning_rate": 8.585144154910909e-06, "loss": 2.1102, "step": 31680 }, { "epoch": 0.66, "grad_norm": 0.4375, "learning_rate": 8.575823597318113e-06, "loss": 2.1153, "step": 31690 }, { "epoch": 0.66, "grad_norm": 0.416015625, "learning_rate": 8.566506076271019e-06, "loss": 2.1011, "step": 31700 }, { "epoch": 0.66, "grad_norm": 0.453125, "learning_rate": 8.55719159617379e-06, "loss": 2.1486, "step": 31710 }, { "epoch": 0.66, "grad_norm": 0.431640625, "learning_rate": 8.54788016142915e-06, "loss": 2.1039, "step": 31720 }, { "epoch": 0.66, "grad_norm": 0.43359375, "learning_rate": 8.538571776438367e-06, "loss": 2.1376, "step": 31730 }, { "epoch": 0.66, "grad_norm": 0.42578125, "learning_rate": 8.529266445601286e-06, "loss": 2.1132, "step": 31740 }, { "epoch": 0.66, "grad_norm": 0.50390625, "learning_rate": 8.519964173316307e-06, "loss": 2.1408, "step": 31750 }, { "epoch": 0.66, "grad_norm": 0.427734375, "learning_rate": 8.510664963980367e-06, "loss": 2.1182, "step": 31760 }, { "epoch": 0.66, "grad_norm": 0.423828125, "learning_rate": 8.501368821988972e-06, "loss": 2.0857, "step": 31770 }, { "epoch": 0.66, "grad_norm": 0.443359375, "learning_rate": 8.492075751736175e-06, "loss": 2.0936, "step": 31780 }, { "epoch": 0.66, "grad_norm": 0.4453125, "learning_rate": 8.48278575761458e-06, "loss": 2.1202, "step": 31790 }, { "epoch": 0.66, "grad_norm": 0.42578125, "learning_rate": 8.47349884401532e-06, "loss": 2.1272, "step": 31800 }, { "epoch": 0.66, "grad_norm": 0.5078125, "learning_rate": 8.464215015328092e-06, "loss": 2.1261, "step": 31810 }, { "epoch": 0.66, "grad_norm": 0.48828125, "learning_rate": 8.454934275941129e-06, "loss": 2.157, "step": 31820 }, { "epoch": 0.66, "grad_norm": 0.42578125, "learning_rate": 8.445656630241196e-06, "loss": 2.1043, "step": 31830 }, { "epoch": 0.66, "grad_norm": 0.451171875, "learning_rate": 8.436382082613604e-06, "loss": 2.1176, "step": 31840 }, { "epoch": 0.66, "grad_norm": 0.42578125, "learning_rate": 8.427110637442202e-06, "loss": 2.1065, "step": 31850 }, { "epoch": 0.66, "grad_norm": 0.91796875, "learning_rate": 8.417842299109356e-06, "loss": 2.095, "step": 31860 }, { "epoch": 0.66, "grad_norm": 0.447265625, "learning_rate": 8.408577071995984e-06, "loss": 2.0945, "step": 31870 }, { "epoch": 0.66, "grad_norm": 0.4375, "learning_rate": 8.399314960481527e-06, "loss": 2.0827, "step": 31880 }, { "epoch": 0.66, "grad_norm": 0.416015625, "learning_rate": 8.390055968943941e-06, "loss": 2.1257, "step": 31890 }, { "epoch": 0.66, "grad_norm": 0.46875, "learning_rate": 8.380800101759727e-06, "loss": 2.1136, "step": 31900 }, { "epoch": 0.66, "grad_norm": 0.474609375, "learning_rate": 8.37154736330389e-06, "loss": 2.1352, "step": 31910 }, { "epoch": 0.66, "grad_norm": 0.431640625, "learning_rate": 8.362297757949979e-06, "loss": 2.1248, "step": 31920 }, { "epoch": 0.66, "grad_norm": 0.453125, "learning_rate": 8.353051290070028e-06, "loss": 2.1162, "step": 31930 }, { "epoch": 0.66, "grad_norm": 0.53515625, "learning_rate": 8.34380796403463e-06, "loss": 2.1407, "step": 31940 }, { "epoch": 0.66, "grad_norm": 0.416015625, "learning_rate": 8.334567784212855e-06, "loss": 2.1092, "step": 31950 }, { "epoch": 0.66, "grad_norm": 0.427734375, "learning_rate": 8.325330754972311e-06, "loss": 2.1082, "step": 31960 }, { "epoch": 0.66, "grad_norm": 0.443359375, "learning_rate": 8.316096880679102e-06, "loss": 2.0892, "step": 31970 }, { "epoch": 0.66, "grad_norm": 0.48046875, "learning_rate": 8.30686616569786e-06, "loss": 2.1229, "step": 31980 }, { "epoch": 0.67, "grad_norm": 0.423828125, "learning_rate": 8.297638614391693e-06, "loss": 2.0932, "step": 31990 }, { "epoch": 0.67, "grad_norm": 0.435546875, "learning_rate": 8.288414231122242e-06, "loss": 2.1224, "step": 32000 }, { "epoch": 0.67, "eval_accuracy": 0.5589754963569006, "eval_loss": 1.9931281805038452, "eval_runtime": 16.4271, "eval_samples_per_second": 36.221, "eval_steps_per_second": 1.157, "step": 32000 }, { "epoch": 0.67, "grad_norm": 0.4609375, "learning_rate": 8.279193020249642e-06, "loss": 2.1093, "step": 32010 }, { "epoch": 0.67, "grad_norm": 0.48046875, "learning_rate": 8.269974986132514e-06, "loss": 2.1047, "step": 32020 }, { "epoch": 0.67, "grad_norm": 0.435546875, "learning_rate": 8.260760133128e-06, "loss": 2.1104, "step": 32030 }, { "epoch": 0.67, "grad_norm": 0.44921875, "learning_rate": 8.25154846559173e-06, "loss": 2.1044, "step": 32040 }, { "epoch": 0.67, "grad_norm": 0.458984375, "learning_rate": 8.242339987877813e-06, "loss": 2.1089, "step": 32050 }, { "epoch": 0.67, "grad_norm": 0.431640625, "learning_rate": 8.233134704338874e-06, "loss": 2.0808, "step": 32060 }, { "epoch": 0.67, "grad_norm": 0.462890625, "learning_rate": 8.223932619326022e-06, "loss": 2.1162, "step": 32070 }, { "epoch": 0.67, "grad_norm": 0.4453125, "learning_rate": 8.214733737188835e-06, "loss": 2.1038, "step": 32080 }, { "epoch": 0.67, "grad_norm": 0.44140625, "learning_rate": 8.205538062275402e-06, "loss": 2.0947, "step": 32090 }, { "epoch": 0.67, "grad_norm": 0.447265625, "learning_rate": 8.196345598932289e-06, "loss": 2.1025, "step": 32100 }, { "epoch": 0.67, "grad_norm": 0.447265625, "learning_rate": 8.18715635150453e-06, "loss": 2.1128, "step": 32110 }, { "epoch": 0.67, "grad_norm": 0.431640625, "learning_rate": 8.177970324335658e-06, "loss": 2.11, "step": 32120 }, { "epoch": 0.67, "grad_norm": 0.443359375, "learning_rate": 8.168787521767677e-06, "loss": 2.1063, "step": 32130 }, { "epoch": 0.67, "grad_norm": 0.46875, "learning_rate": 8.159607948141061e-06, "loss": 2.1135, "step": 32140 }, { "epoch": 0.67, "grad_norm": 0.451171875, "learning_rate": 8.150431607794765e-06, "loss": 2.1076, "step": 32150 }, { "epoch": 0.67, "grad_norm": 0.4375, "learning_rate": 8.141258505066207e-06, "loss": 2.1104, "step": 32160 }, { "epoch": 0.67, "grad_norm": 0.4453125, "learning_rate": 8.13208864429129e-06, "loss": 2.1295, "step": 32170 }, { "epoch": 0.67, "grad_norm": 0.4453125, "learning_rate": 8.122922029804365e-06, "loss": 2.1304, "step": 32180 }, { "epoch": 0.67, "grad_norm": 0.435546875, "learning_rate": 8.113758665938264e-06, "loss": 2.1188, "step": 32190 }, { "epoch": 0.67, "grad_norm": 0.435546875, "learning_rate": 8.104598557024284e-06, "loss": 2.0994, "step": 32200 }, { "epoch": 0.67, "grad_norm": 0.43359375, "learning_rate": 8.095441707392164e-06, "loss": 2.1194, "step": 32210 }, { "epoch": 0.67, "grad_norm": 0.44921875, "learning_rate": 8.086288121370122e-06, "loss": 2.109, "step": 32220 }, { "epoch": 0.67, "grad_norm": 0.4140625, "learning_rate": 8.077137803284833e-06, "loss": 2.1332, "step": 32230 }, { "epoch": 0.67, "grad_norm": 0.4453125, "learning_rate": 8.067990757461406e-06, "loss": 2.1041, "step": 32240 }, { "epoch": 0.67, "grad_norm": 0.439453125, "learning_rate": 8.05884698822343e-06, "loss": 2.1283, "step": 32250 }, { "epoch": 0.67, "grad_norm": 0.453125, "learning_rate": 8.049706499892931e-06, "loss": 2.1538, "step": 32260 }, { "epoch": 0.67, "grad_norm": 0.419921875, "learning_rate": 8.040569296790383e-06, "loss": 2.1161, "step": 32270 }, { "epoch": 0.67, "grad_norm": 0.41796875, "learning_rate": 8.031435383234714e-06, "loss": 2.1173, "step": 32280 }, { "epoch": 0.67, "grad_norm": 0.416015625, "learning_rate": 8.022304763543297e-06, "loss": 2.1175, "step": 32290 }, { "epoch": 0.67, "grad_norm": 0.44140625, "learning_rate": 8.013177442031935e-06, "loss": 2.168, "step": 32300 }, { "epoch": 0.67, "grad_norm": 0.419921875, "learning_rate": 8.00405342301489e-06, "loss": 2.1067, "step": 32310 }, { "epoch": 0.67, "grad_norm": 0.44140625, "learning_rate": 7.994932710804858e-06, "loss": 2.109, "step": 32320 }, { "epoch": 0.67, "grad_norm": 0.435546875, "learning_rate": 7.985815309712959e-06, "loss": 2.0894, "step": 32330 }, { "epoch": 0.67, "grad_norm": 0.41796875, "learning_rate": 7.976701224048763e-06, "loss": 2.1172, "step": 32340 }, { "epoch": 0.67, "grad_norm": 0.443359375, "learning_rate": 7.967590458120267e-06, "loss": 2.1222, "step": 32350 }, { "epoch": 0.67, "grad_norm": 0.42578125, "learning_rate": 7.95848301623391e-06, "loss": 2.1209, "step": 32360 }, { "epoch": 0.67, "grad_norm": 0.42578125, "learning_rate": 7.94937890269453e-06, "loss": 2.1348, "step": 32370 }, { "epoch": 0.67, "grad_norm": 0.41796875, "learning_rate": 7.940278121805422e-06, "loss": 2.1401, "step": 32380 }, { "epoch": 0.67, "grad_norm": 0.435546875, "learning_rate": 7.931180677868302e-06, "loss": 2.1679, "step": 32390 }, { "epoch": 0.67, "grad_norm": 0.453125, "learning_rate": 7.922086575183286e-06, "loss": 2.1095, "step": 32400 }, { "epoch": 0.67, "grad_norm": 0.421875, "learning_rate": 7.912995818048942e-06, "loss": 2.1217, "step": 32410 }, { "epoch": 0.67, "grad_norm": 0.435546875, "learning_rate": 7.903908410762225e-06, "loss": 2.0845, "step": 32420 }, { "epoch": 0.67, "grad_norm": 0.458984375, "learning_rate": 7.894824357618537e-06, "loss": 2.158, "step": 32430 }, { "epoch": 0.67, "grad_norm": 0.43359375, "learning_rate": 7.88574366291167e-06, "loss": 2.0827, "step": 32440 }, { "epoch": 0.67, "grad_norm": 0.42578125, "learning_rate": 7.876666330933849e-06, "loss": 2.1272, "step": 32450 }, { "epoch": 0.67, "grad_norm": 0.45703125, "learning_rate": 7.867592365975685e-06, "loss": 2.1443, "step": 32460 }, { "epoch": 0.68, "grad_norm": 0.47265625, "learning_rate": 7.858521772326219e-06, "loss": 2.0929, "step": 32470 }, { "epoch": 0.68, "grad_norm": 0.419921875, "learning_rate": 7.8494545542729e-06, "loss": 2.1117, "step": 32480 }, { "epoch": 0.68, "grad_norm": 0.455078125, "learning_rate": 7.840390716101557e-06, "loss": 2.1476, "step": 32490 }, { "epoch": 0.68, "grad_norm": 0.427734375, "learning_rate": 7.831330262096444e-06, "loss": 2.0878, "step": 32500 }, { "epoch": 0.68, "eval_accuracy": 0.5590691408528221, "eval_loss": 1.993199110031128, "eval_runtime": 16.4617, "eval_samples_per_second": 36.145, "eval_steps_per_second": 1.154, "step": 32500 }, { "epoch": 0.68, "grad_norm": 0.44921875, "learning_rate": 7.822273196540217e-06, "loss": 2.0906, "step": 32510 }, { "epoch": 0.68, "grad_norm": 0.447265625, "learning_rate": 7.813219523713905e-06, "loss": 2.127, "step": 32520 }, { "epoch": 0.68, "grad_norm": 0.435546875, "learning_rate": 7.804169247896964e-06, "loss": 2.1301, "step": 32530 }, { "epoch": 0.68, "grad_norm": 0.44921875, "learning_rate": 7.79512237336722e-06, "loss": 2.1127, "step": 32540 }, { "epoch": 0.68, "grad_norm": 0.453125, "learning_rate": 7.786078904400916e-06, "loss": 2.1312, "step": 32550 }, { "epoch": 0.68, "grad_norm": 0.42578125, "learning_rate": 7.777038845272656e-06, "loss": 2.1347, "step": 32560 }, { "epoch": 0.68, "grad_norm": 0.4453125, "learning_rate": 7.768002200255454e-06, "loss": 2.1091, "step": 32570 }, { "epoch": 0.68, "grad_norm": 0.427734375, "learning_rate": 7.758968973620709e-06, "loss": 2.1449, "step": 32580 }, { "epoch": 0.68, "grad_norm": 0.4296875, "learning_rate": 7.749939169638187e-06, "loss": 2.1581, "step": 32590 }, { "epoch": 0.68, "grad_norm": 0.419921875, "learning_rate": 7.740912792576052e-06, "loss": 2.0935, "step": 32600 }, { "epoch": 0.68, "grad_norm": 0.439453125, "learning_rate": 7.731889846700856e-06, "loss": 2.0973, "step": 32610 }, { "epoch": 0.68, "grad_norm": 0.4765625, "learning_rate": 7.722870336277497e-06, "loss": 2.1215, "step": 32620 }, { "epoch": 0.68, "grad_norm": 0.431640625, "learning_rate": 7.713854265569283e-06, "loss": 2.1051, "step": 32630 }, { "epoch": 0.68, "grad_norm": 0.4296875, "learning_rate": 7.704841638837883e-06, "loss": 2.1251, "step": 32640 }, { "epoch": 0.68, "grad_norm": 0.443359375, "learning_rate": 7.695832460343331e-06, "loss": 2.1171, "step": 32650 }, { "epoch": 0.68, "grad_norm": 0.43359375, "learning_rate": 7.686826734344045e-06, "loss": 2.1084, "step": 32660 }, { "epoch": 0.68, "grad_norm": 0.44140625, "learning_rate": 7.677824465096794e-06, "loss": 2.1094, "step": 32670 }, { "epoch": 0.68, "grad_norm": 0.4375, "learning_rate": 7.668825656856736e-06, "loss": 2.1289, "step": 32680 }, { "epoch": 0.68, "grad_norm": 0.439453125, "learning_rate": 7.659830313877367e-06, "loss": 2.0985, "step": 32690 }, { "epoch": 0.68, "grad_norm": 0.4453125, "learning_rate": 7.650838440410566e-06, "loss": 2.0966, "step": 32700 }, { "epoch": 0.68, "grad_norm": 0.451171875, "learning_rate": 7.641850040706562e-06, "loss": 2.0937, "step": 32710 }, { "epoch": 0.68, "grad_norm": 0.42578125, "learning_rate": 7.63286511901394e-06, "loss": 2.091, "step": 32720 }, { "epoch": 0.68, "grad_norm": 0.466796875, "learning_rate": 7.623883679579651e-06, "loss": 2.1084, "step": 32730 }, { "epoch": 0.68, "grad_norm": 0.455078125, "learning_rate": 7.614905726649e-06, "loss": 2.1007, "step": 32740 }, { "epoch": 0.68, "grad_norm": 0.46875, "learning_rate": 7.605931264465624e-06, "loss": 2.1229, "step": 32750 }, { "epoch": 0.68, "grad_norm": 0.48828125, "learning_rate": 7.596960297271531e-06, "loss": 2.0927, "step": 32760 }, { "epoch": 0.68, "grad_norm": 0.439453125, "learning_rate": 7.587992829307079e-06, "loss": 2.1181, "step": 32770 }, { "epoch": 0.68, "grad_norm": 0.44921875, "learning_rate": 7.579028864810948e-06, "loss": 2.0925, "step": 32780 }, { "epoch": 0.68, "grad_norm": 0.427734375, "learning_rate": 7.570068408020184e-06, "loss": 2.1103, "step": 32790 }, { "epoch": 0.68, "grad_norm": 0.435546875, "learning_rate": 7.561111463170176e-06, "loss": 2.0649, "step": 32800 }, { "epoch": 0.68, "grad_norm": 0.466796875, "learning_rate": 7.55215803449463e-06, "loss": 2.1158, "step": 32810 }, { "epoch": 0.68, "grad_norm": 0.44921875, "learning_rate": 7.543208126225616e-06, "loss": 2.1479, "step": 32820 }, { "epoch": 0.68, "grad_norm": 0.44921875, "learning_rate": 7.534261742593529e-06, "loss": 2.1205, "step": 32830 }, { "epoch": 0.68, "grad_norm": 0.439453125, "learning_rate": 7.5253188878270914e-06, "loss": 2.1224, "step": 32840 }, { "epoch": 0.68, "grad_norm": 0.44140625, "learning_rate": 7.516379566153369e-06, "loss": 2.1551, "step": 32850 }, { "epoch": 0.68, "grad_norm": 0.41015625, "learning_rate": 7.507443781797759e-06, "loss": 2.12, "step": 32860 }, { "epoch": 0.68, "grad_norm": 0.427734375, "learning_rate": 7.498511538983971e-06, "loss": 2.1026, "step": 32870 }, { "epoch": 0.68, "grad_norm": 0.4296875, "learning_rate": 7.4895828419340505e-06, "loss": 2.1311, "step": 32880 }, { "epoch": 0.68, "grad_norm": 0.50390625, "learning_rate": 7.480657694868378e-06, "loss": 2.1483, "step": 32890 }, { "epoch": 0.68, "grad_norm": 0.427734375, "learning_rate": 7.471736102005631e-06, "loss": 2.0904, "step": 32900 }, { "epoch": 0.68, "grad_norm": 0.478515625, "learning_rate": 7.462818067562832e-06, "loss": 2.1419, "step": 32910 }, { "epoch": 0.68, "grad_norm": 0.44140625, "learning_rate": 7.4539035957553e-06, "loss": 2.1459, "step": 32920 }, { "epoch": 0.68, "grad_norm": 0.44140625, "learning_rate": 7.444992690796691e-06, "loss": 2.1515, "step": 32930 }, { "epoch": 0.68, "grad_norm": 0.4296875, "learning_rate": 7.4360853568989515e-06, "loss": 2.086, "step": 32940 }, { "epoch": 0.69, "grad_norm": 0.443359375, "learning_rate": 7.427181598272361e-06, "loss": 2.1231, "step": 32950 }, { "epoch": 0.69, "grad_norm": 0.451171875, "learning_rate": 7.418281419125507e-06, "loss": 2.0773, "step": 32960 }, { "epoch": 0.69, "grad_norm": 0.41015625, "learning_rate": 7.4093848236652676e-06, "loss": 2.0991, "step": 32970 }, { "epoch": 0.69, "grad_norm": 0.4296875, "learning_rate": 7.400491816096841e-06, "loss": 2.106, "step": 32980 }, { "epoch": 0.69, "grad_norm": 0.4609375, "learning_rate": 7.391602400623736e-06, "loss": 2.1078, "step": 32990 }, { "epoch": 0.69, "grad_norm": 0.447265625, "learning_rate": 7.382716581447743e-06, "loss": 2.1172, "step": 33000 }, { "epoch": 0.69, "eval_accuracy": 0.5589935681017275, "eval_loss": 1.9931697845458984, "eval_runtime": 16.4436, "eval_samples_per_second": 36.184, "eval_steps_per_second": 1.155, "step": 33000 }, { "epoch": 0.69, "grad_norm": 0.439453125, "learning_rate": 7.373834362768968e-06, "loss": 2.1184, "step": 33010 }, { "epoch": 0.69, "grad_norm": 0.435546875, "learning_rate": 7.364955748785817e-06, "loss": 2.1333, "step": 33020 }, { "epoch": 0.69, "grad_norm": 0.4375, "learning_rate": 7.356080743694976e-06, "loss": 2.1396, "step": 33030 }, { "epoch": 0.69, "grad_norm": 0.431640625, "learning_rate": 7.347209351691439e-06, "loss": 2.1666, "step": 33040 }, { "epoch": 0.69, "grad_norm": 0.42578125, "learning_rate": 7.338341576968498e-06, "loss": 2.1363, "step": 33050 }, { "epoch": 0.69, "grad_norm": 0.431640625, "learning_rate": 7.329477423717712e-06, "loss": 2.1047, "step": 33060 }, { "epoch": 0.69, "grad_norm": 0.4375, "learning_rate": 7.3206168961289465e-06, "loss": 2.1478, "step": 33070 }, { "epoch": 0.69, "grad_norm": 0.43359375, "learning_rate": 7.3117599983903585e-06, "loss": 2.1274, "step": 33080 }, { "epoch": 0.69, "grad_norm": 0.427734375, "learning_rate": 7.302906734688363e-06, "loss": 2.1379, "step": 33090 }, { "epoch": 0.69, "grad_norm": 0.423828125, "learning_rate": 7.294057109207684e-06, "loss": 2.0885, "step": 33100 }, { "epoch": 0.69, "grad_norm": 0.482421875, "learning_rate": 7.285211126131313e-06, "loss": 2.1811, "step": 33110 }, { "epoch": 0.69, "grad_norm": 0.455078125, "learning_rate": 7.276368789640529e-06, "loss": 2.1439, "step": 33120 }, { "epoch": 0.69, "grad_norm": 0.431640625, "learning_rate": 7.26753010391487e-06, "loss": 2.0994, "step": 33130 }, { "epoch": 0.69, "grad_norm": 0.439453125, "learning_rate": 7.258695073132171e-06, "loss": 2.128, "step": 33140 }, { "epoch": 0.69, "grad_norm": 0.4609375, "learning_rate": 7.2498637014685144e-06, "loss": 2.1156, "step": 33150 }, { "epoch": 0.69, "grad_norm": 0.455078125, "learning_rate": 7.241035993098279e-06, "loss": 2.1207, "step": 33160 }, { "epoch": 0.69, "grad_norm": 0.43359375, "learning_rate": 7.232211952194086e-06, "loss": 2.0954, "step": 33170 }, { "epoch": 0.69, "grad_norm": 0.443359375, "learning_rate": 7.22339158292685e-06, "loss": 2.1122, "step": 33180 }, { "epoch": 0.69, "grad_norm": 0.4375, "learning_rate": 7.2145748894657235e-06, "loss": 2.1424, "step": 33190 }, { "epoch": 0.69, "grad_norm": 0.4453125, "learning_rate": 7.205761875978144e-06, "loss": 2.1173, "step": 33200 }, { "epoch": 0.69, "grad_norm": 0.431640625, "learning_rate": 7.196952546629799e-06, "loss": 2.1047, "step": 33210 }, { "epoch": 0.69, "grad_norm": 0.443359375, "learning_rate": 7.18814690558463e-06, "loss": 2.1304, "step": 33220 }, { "epoch": 0.69, "grad_norm": 0.447265625, "learning_rate": 7.179344957004843e-06, "loss": 2.1111, "step": 33230 }, { "epoch": 0.69, "grad_norm": 0.4453125, "learning_rate": 7.170546705050905e-06, "loss": 2.1318, "step": 33240 }, { "epoch": 0.69, "grad_norm": 0.431640625, "learning_rate": 7.161752153881514e-06, "loss": 2.135, "step": 33250 }, { "epoch": 0.69, "grad_norm": 0.4375, "learning_rate": 7.152961307653638e-06, "loss": 2.1171, "step": 33260 }, { "epoch": 0.69, "grad_norm": 0.43359375, "learning_rate": 7.144174170522494e-06, "loss": 2.1379, "step": 33270 }, { "epoch": 0.69, "grad_norm": 0.455078125, "learning_rate": 7.135390746641527e-06, "loss": 2.122, "step": 33280 }, { "epoch": 0.69, "grad_norm": 0.4453125, "learning_rate": 7.126611040162446e-06, "loss": 2.1015, "step": 33290 }, { "epoch": 0.69, "grad_norm": 0.447265625, "learning_rate": 7.117835055235195e-06, "loss": 2.1353, "step": 33300 }, { "epoch": 0.69, "grad_norm": 0.412109375, "learning_rate": 7.109062796007967e-06, "loss": 2.0953, "step": 33310 }, { "epoch": 0.69, "grad_norm": 0.578125, "learning_rate": 7.100294266627175e-06, "loss": 2.0993, "step": 33320 }, { "epoch": 0.69, "grad_norm": 0.490234375, "learning_rate": 7.091529471237487e-06, "loss": 2.1095, "step": 33330 }, { "epoch": 0.69, "grad_norm": 0.423828125, "learning_rate": 7.082768413981808e-06, "loss": 2.1414, "step": 33340 }, { "epoch": 0.69, "grad_norm": 0.4453125, "learning_rate": 7.074011099001252e-06, "loss": 2.1345, "step": 33350 }, { "epoch": 0.69, "grad_norm": 0.439453125, "learning_rate": 7.065257530435188e-06, "loss": 2.1595, "step": 33360 }, { "epoch": 0.69, "grad_norm": 0.4453125, "learning_rate": 7.056507712421214e-06, "loss": 2.1433, "step": 33370 }, { "epoch": 0.69, "grad_norm": 0.431640625, "learning_rate": 7.047761649095135e-06, "loss": 2.1021, "step": 33380 }, { "epoch": 0.69, "grad_norm": 0.46484375, "learning_rate": 7.039019344591e-06, "loss": 2.1227, "step": 33390 }, { "epoch": 0.69, "grad_norm": 0.435546875, "learning_rate": 7.030280803041081e-06, "loss": 2.1248, "step": 33400 }, { "epoch": 0.69, "grad_norm": 0.50390625, "learning_rate": 7.021546028575854e-06, "loss": 2.1323, "step": 33410 }, { "epoch": 0.69, "grad_norm": 0.447265625, "learning_rate": 7.012815025324036e-06, "loss": 2.1227, "step": 33420 }, { "epoch": 0.7, "grad_norm": 0.474609375, "learning_rate": 7.0040877974125416e-06, "loss": 2.1408, "step": 33430 }, { "epoch": 0.7, "grad_norm": 0.44140625, "learning_rate": 6.995364348966521e-06, "loss": 2.0959, "step": 33440 }, { "epoch": 0.7, "grad_norm": 0.45703125, "learning_rate": 6.986644684109318e-06, "loss": 2.1235, "step": 33450 }, { "epoch": 0.7, "grad_norm": 0.4296875, "learning_rate": 6.9779288069625075e-06, "loss": 2.1302, "step": 33460 }, { "epoch": 0.7, "grad_norm": 0.431640625, "learning_rate": 6.969216721645853e-06, "loss": 2.0938, "step": 33470 }, { "epoch": 0.7, "grad_norm": 0.439453125, "learning_rate": 6.960508432277345e-06, "loss": 2.1156, "step": 33480 }, { "epoch": 0.7, "grad_norm": 0.4453125, "learning_rate": 6.951803942973166e-06, "loss": 2.099, "step": 33490 }, { "epoch": 0.7, "grad_norm": 0.4296875, "learning_rate": 6.943103257847719e-06, "loss": 2.1166, "step": 33500 }, { "epoch": 0.7, "eval_accuracy": 0.5592005717242909, "eval_loss": 1.9930750131607056, "eval_runtime": 16.4841, "eval_samples_per_second": 36.095, "eval_steps_per_second": 1.153, "step": 33500 }, { "epoch": 0.7, "grad_norm": 0.44921875, "learning_rate": 6.934406381013584e-06, "loss": 2.1251, "step": 33510 }, { "epoch": 0.7, "grad_norm": 0.41796875, "learning_rate": 6.925713316581562e-06, "loss": 2.079, "step": 33520 }, { "epoch": 0.7, "grad_norm": 0.447265625, "learning_rate": 6.917024068660649e-06, "loss": 2.1075, "step": 33530 }, { "epoch": 0.7, "grad_norm": 0.455078125, "learning_rate": 6.908338641358021e-06, "loss": 2.112, "step": 33540 }, { "epoch": 0.7, "grad_norm": 0.435546875, "learning_rate": 6.899657038779067e-06, "loss": 2.0905, "step": 33550 }, { "epoch": 0.7, "grad_norm": 0.43359375, "learning_rate": 6.890979265027365e-06, "loss": 2.1401, "step": 33560 }, { "epoch": 0.7, "grad_norm": 0.46875, "learning_rate": 6.882305324204669e-06, "loss": 2.11, "step": 33570 }, { "epoch": 0.7, "grad_norm": 0.453125, "learning_rate": 6.873635220410938e-06, "loss": 2.1147, "step": 33580 }, { "epoch": 0.7, "grad_norm": 0.42578125, "learning_rate": 6.864968957744314e-06, "loss": 2.1247, "step": 33590 }, { "epoch": 0.7, "grad_norm": 0.4609375, "learning_rate": 6.85630654030111e-06, "loss": 2.1491, "step": 33600 }, { "epoch": 0.7, "grad_norm": 0.435546875, "learning_rate": 6.8476479721758375e-06, "loss": 2.1205, "step": 33610 }, { "epoch": 0.7, "grad_norm": 0.515625, "learning_rate": 6.838993257461189e-06, "loss": 2.1041, "step": 33620 }, { "epoch": 0.7, "grad_norm": 0.4375, "learning_rate": 6.830342400248017e-06, "loss": 2.1365, "step": 33630 }, { "epoch": 0.7, "grad_norm": 0.4375, "learning_rate": 6.821695404625369e-06, "loss": 2.1081, "step": 33640 }, { "epoch": 0.7, "grad_norm": 0.447265625, "learning_rate": 6.813052274680465e-06, "loss": 2.0924, "step": 33650 }, { "epoch": 0.7, "grad_norm": 0.48046875, "learning_rate": 6.804413014498685e-06, "loss": 2.0904, "step": 33660 }, { "epoch": 0.7, "grad_norm": 0.41015625, "learning_rate": 6.795777628163599e-06, "loss": 2.145, "step": 33670 }, { "epoch": 0.7, "grad_norm": 0.431640625, "learning_rate": 6.787146119756926e-06, "loss": 2.0912, "step": 33680 }, { "epoch": 0.7, "grad_norm": 0.431640625, "learning_rate": 6.778518493358571e-06, "loss": 2.1224, "step": 33690 }, { "epoch": 0.7, "grad_norm": 0.427734375, "learning_rate": 6.7698947530465835e-06, "loss": 2.1197, "step": 33700 }, { "epoch": 0.7, "grad_norm": 0.439453125, "learning_rate": 6.761274902897194e-06, "loss": 2.0811, "step": 33710 }, { "epoch": 0.7, "grad_norm": 0.439453125, "learning_rate": 6.752658946984793e-06, "loss": 2.1076, "step": 33720 }, { "epoch": 0.7, "grad_norm": 0.42578125, "learning_rate": 6.744046889381914e-06, "loss": 2.1044, "step": 33730 }, { "epoch": 0.7, "grad_norm": 0.43359375, "learning_rate": 6.735438734159263e-06, "loss": 2.1192, "step": 33740 }, { "epoch": 0.7, "grad_norm": 0.443359375, "learning_rate": 6.726834485385706e-06, "loss": 2.1285, "step": 33750 }, { "epoch": 0.7, "grad_norm": 0.443359375, "learning_rate": 6.71823414712824e-06, "loss": 2.097, "step": 33760 }, { "epoch": 0.7, "grad_norm": 0.4375, "learning_rate": 6.709637723452033e-06, "loss": 2.1062, "step": 33770 }, { "epoch": 0.7, "grad_norm": 0.4375, "learning_rate": 6.701045218420403e-06, "loss": 2.0759, "step": 33780 }, { "epoch": 0.7, "grad_norm": 0.439453125, "learning_rate": 6.692456636094799e-06, "loss": 2.1428, "step": 33790 }, { "epoch": 0.7, "grad_norm": 0.4453125, "learning_rate": 6.683871980534833e-06, "loss": 2.1225, "step": 33800 }, { "epoch": 0.7, "grad_norm": 0.42578125, "learning_rate": 6.675291255798259e-06, "loss": 2.1032, "step": 33810 }, { "epoch": 0.7, "grad_norm": 0.443359375, "learning_rate": 6.6667144659409565e-06, "loss": 2.1256, "step": 33820 }, { "epoch": 0.7, "grad_norm": 0.4296875, "learning_rate": 6.658141615016966e-06, "loss": 2.1025, "step": 33830 }, { "epoch": 0.7, "grad_norm": 0.427734375, "learning_rate": 6.649572707078458e-06, "loss": 2.1004, "step": 33840 }, { "epoch": 0.7, "grad_norm": 0.4296875, "learning_rate": 6.641007746175733e-06, "loss": 2.1044, "step": 33850 }, { "epoch": 0.7, "grad_norm": 0.416015625, "learning_rate": 6.632446736357233e-06, "loss": 2.1147, "step": 33860 }, { "epoch": 0.7, "grad_norm": 0.45703125, "learning_rate": 6.623889681669531e-06, "loss": 2.096, "step": 33870 }, { "epoch": 0.7, "grad_norm": 0.427734375, "learning_rate": 6.6153365861573384e-06, "loss": 2.1515, "step": 33880 }, { "epoch": 0.7, "grad_norm": 0.443359375, "learning_rate": 6.606787453863472e-06, "loss": 2.0919, "step": 33890 }, { "epoch": 0.7, "grad_norm": 0.4375, "learning_rate": 6.598242288828906e-06, "loss": 2.1363, "step": 33900 }, { "epoch": 0.7, "grad_norm": 0.455078125, "learning_rate": 6.589701095092712e-06, "loss": 2.0989, "step": 33910 }, { "epoch": 0.71, "grad_norm": 0.462890625, "learning_rate": 6.581163876692103e-06, "loss": 2.1017, "step": 33920 }, { "epoch": 0.71, "grad_norm": 0.41796875, "learning_rate": 6.5726306376624e-06, "loss": 2.0953, "step": 33930 }, { "epoch": 0.71, "grad_norm": 0.421875, "learning_rate": 6.564101382037057e-06, "loss": 2.1325, "step": 33940 }, { "epoch": 0.71, "grad_norm": 0.447265625, "learning_rate": 6.5555761138476306e-06, "loss": 2.0983, "step": 33950 }, { "epoch": 0.71, "grad_norm": 0.412109375, "learning_rate": 6.5470548371238006e-06, "loss": 2.1065, "step": 33960 }, { "epoch": 0.71, "grad_norm": 0.443359375, "learning_rate": 6.538537555893365e-06, "loss": 2.0793, "step": 33970 }, { "epoch": 0.71, "grad_norm": 0.4375, "learning_rate": 6.530024274182218e-06, "loss": 2.0884, "step": 33980 }, { "epoch": 0.71, "grad_norm": 0.43359375, "learning_rate": 6.521514996014378e-06, "loss": 2.1178, "step": 33990 }, { "epoch": 0.71, "grad_norm": 0.43359375, "learning_rate": 6.513009725411971e-06, "loss": 2.1054, "step": 34000 }, { "epoch": 0.71, "eval_accuracy": 0.559082283939969, "eval_loss": 1.9931409358978271, "eval_runtime": 16.4535, "eval_samples_per_second": 36.162, "eval_steps_per_second": 1.155, "step": 34000 }, { "epoch": 0.71, "grad_norm": 0.462890625, "learning_rate": 6.504508466395211e-06, "loss": 2.1161, "step": 34010 }, { "epoch": 0.71, "grad_norm": 0.462890625, "learning_rate": 6.496011222982434e-06, "loss": 2.1408, "step": 34020 }, { "epoch": 0.71, "grad_norm": 0.42578125, "learning_rate": 6.487517999190078e-06, "loss": 2.1301, "step": 34030 }, { "epoch": 0.71, "grad_norm": 0.427734375, "learning_rate": 6.479028799032664e-06, "loss": 2.1109, "step": 34040 }, { "epoch": 0.71, "grad_norm": 0.421875, "learning_rate": 6.47054362652283e-06, "loss": 2.1177, "step": 34050 }, { "epoch": 0.71, "grad_norm": 0.4296875, "learning_rate": 6.462062485671299e-06, "loss": 2.1432, "step": 34060 }, { "epoch": 0.71, "grad_norm": 0.455078125, "learning_rate": 6.453585380486896e-06, "loss": 2.1276, "step": 34070 }, { "epoch": 0.71, "grad_norm": 0.44140625, "learning_rate": 6.445112314976527e-06, "loss": 2.1064, "step": 34080 }, { "epoch": 0.71, "grad_norm": 0.423828125, "learning_rate": 6.4366432931452015e-06, "loss": 2.1064, "step": 34090 }, { "epoch": 0.71, "grad_norm": 0.453125, "learning_rate": 6.428178318996016e-06, "loss": 2.1184, "step": 34100 }, { "epoch": 0.71, "grad_norm": 0.427734375, "learning_rate": 6.419717396530137e-06, "loss": 2.0953, "step": 34110 }, { "epoch": 0.71, "grad_norm": 0.41796875, "learning_rate": 6.411260529746836e-06, "loss": 2.124, "step": 34120 }, { "epoch": 0.71, "grad_norm": 0.447265625, "learning_rate": 6.4028077226434644e-06, "loss": 2.1018, "step": 34130 }, { "epoch": 0.71, "grad_norm": 0.419921875, "learning_rate": 6.3943589792154425e-06, "loss": 2.1278, "step": 34140 }, { "epoch": 0.71, "grad_norm": 0.43359375, "learning_rate": 6.385914303456283e-06, "loss": 2.0978, "step": 34150 }, { "epoch": 0.71, "grad_norm": 0.470703125, "learning_rate": 6.377473699357565e-06, "loss": 2.1102, "step": 34160 }, { "epoch": 0.71, "grad_norm": 0.439453125, "learning_rate": 6.369037170908957e-06, "loss": 2.126, "step": 34170 }, { "epoch": 0.71, "grad_norm": 0.439453125, "learning_rate": 6.360604722098182e-06, "loss": 2.0955, "step": 34180 }, { "epoch": 0.71, "grad_norm": 0.431640625, "learning_rate": 6.352176356911056e-06, "loss": 2.1175, "step": 34190 }, { "epoch": 0.71, "grad_norm": 0.44140625, "learning_rate": 6.343752079331443e-06, "loss": 2.1395, "step": 34200 }, { "epoch": 0.71, "grad_norm": 0.423828125, "learning_rate": 6.335331893341294e-06, "loss": 2.1183, "step": 34210 }, { "epoch": 0.71, "grad_norm": 0.41015625, "learning_rate": 6.326915802920619e-06, "loss": 2.1076, "step": 34220 }, { "epoch": 0.71, "grad_norm": 0.431640625, "learning_rate": 6.318503812047483e-06, "loss": 2.1039, "step": 34230 }, { "epoch": 0.71, "grad_norm": 0.4453125, "learning_rate": 6.310095924698026e-06, "loss": 2.1221, "step": 34240 }, { "epoch": 0.71, "grad_norm": 0.455078125, "learning_rate": 6.301692144846444e-06, "loss": 2.1192, "step": 34250 }, { "epoch": 0.71, "grad_norm": 0.43359375, "learning_rate": 6.293292476464997e-06, "loss": 2.1157, "step": 34260 }, { "epoch": 0.71, "grad_norm": 0.48828125, "learning_rate": 6.284896923523983e-06, "loss": 2.0826, "step": 34270 }, { "epoch": 0.71, "grad_norm": 0.484375, "learning_rate": 6.276505489991775e-06, "loss": 2.1246, "step": 34280 }, { "epoch": 0.71, "grad_norm": 0.41796875, "learning_rate": 6.268118179834796e-06, "loss": 2.0997, "step": 34290 }, { "epoch": 0.71, "grad_norm": 0.44921875, "learning_rate": 6.259734997017506e-06, "loss": 2.1264, "step": 34300 }, { "epoch": 0.71, "grad_norm": 0.43359375, "learning_rate": 6.2513559455024275e-06, "loss": 2.1287, "step": 34310 }, { "epoch": 0.71, "grad_norm": 0.443359375, "learning_rate": 6.242981029250131e-06, "loss": 2.1016, "step": 34320 }, { "epoch": 0.71, "grad_norm": 0.458984375, "learning_rate": 6.234610252219217e-06, "loss": 2.1278, "step": 34330 }, { "epoch": 0.71, "grad_norm": 0.462890625, "learning_rate": 6.226243618366347e-06, "loss": 2.1235, "step": 34340 }, { "epoch": 0.71, "grad_norm": 0.44921875, "learning_rate": 6.217881131646221e-06, "loss": 2.1028, "step": 34350 }, { "epoch": 0.71, "grad_norm": 0.423828125, "learning_rate": 6.209522796011566e-06, "loss": 2.137, "step": 34360 }, { "epoch": 0.71, "grad_norm": 0.4453125, "learning_rate": 6.20116861541316e-06, "loss": 2.0866, "step": 34370 }, { "epoch": 0.71, "grad_norm": 0.4453125, "learning_rate": 6.192818593799818e-06, "loss": 2.0927, "step": 34380 }, { "epoch": 0.71, "grad_norm": 0.451171875, "learning_rate": 6.184472735118374e-06, "loss": 2.1024, "step": 34390 }, { "epoch": 0.72, "grad_norm": 0.4453125, "learning_rate": 6.17613104331371e-06, "loss": 2.094, "step": 34400 }, { "epoch": 0.72, "grad_norm": 0.4296875, "learning_rate": 6.1677935223287365e-06, "loss": 2.1166, "step": 34410 }, { "epoch": 0.72, "grad_norm": 0.462890625, "learning_rate": 6.159460176104379e-06, "loss": 2.1187, "step": 34420 }, { "epoch": 0.72, "grad_norm": 0.435546875, "learning_rate": 6.15113100857961e-06, "loss": 2.1338, "step": 34430 }, { "epoch": 0.72, "grad_norm": 0.4296875, "learning_rate": 6.142806023691406e-06, "loss": 2.1129, "step": 34440 }, { "epoch": 0.72, "grad_norm": 0.44140625, "learning_rate": 6.134485225374787e-06, "loss": 2.1129, "step": 34450 }, { "epoch": 0.72, "grad_norm": 0.4375, "learning_rate": 6.126168617562774e-06, "loss": 2.1016, "step": 34460 }, { "epoch": 0.72, "grad_norm": 0.423828125, "learning_rate": 6.117856204186423e-06, "loss": 2.1158, "step": 34470 }, { "epoch": 0.72, "grad_norm": 0.43359375, "learning_rate": 6.109547989174806e-06, "loss": 2.1232, "step": 34480 }, { "epoch": 0.72, "grad_norm": 0.4375, "learning_rate": 6.101243976454995e-06, "loss": 2.1365, "step": 34490 }, { "epoch": 0.72, "grad_norm": 0.44921875, "learning_rate": 6.092944169952092e-06, "loss": 2.0972, "step": 34500 }, { "epoch": 0.72, "eval_accuracy": 0.5589804250145806, "eval_loss": 1.9931070804595947, "eval_runtime": 16.4414, "eval_samples_per_second": 36.189, "eval_steps_per_second": 1.156, "step": 34500 }, { "epoch": 0.72, "grad_norm": 0.427734375, "learning_rate": 6.084648573589212e-06, "loss": 2.1009, "step": 34510 }, { "epoch": 0.72, "grad_norm": 0.44140625, "learning_rate": 6.076357191287463e-06, "loss": 2.1011, "step": 34520 }, { "epoch": 0.72, "grad_norm": 0.455078125, "learning_rate": 6.068070026965976e-06, "loss": 2.1411, "step": 34530 }, { "epoch": 0.72, "grad_norm": 0.447265625, "learning_rate": 6.0597870845418895e-06, "loss": 2.1089, "step": 34540 }, { "epoch": 0.72, "grad_norm": 0.4453125, "learning_rate": 6.051508367930332e-06, "loss": 2.1593, "step": 34550 }, { "epoch": 0.72, "grad_norm": 0.423828125, "learning_rate": 6.043233881044445e-06, "loss": 2.1074, "step": 34560 }, { "epoch": 0.72, "grad_norm": 0.427734375, "learning_rate": 6.0349636277953785e-06, "loss": 2.1371, "step": 34570 }, { "epoch": 0.72, "grad_norm": 0.453125, "learning_rate": 6.026697612092259e-06, "loss": 2.0958, "step": 34580 }, { "epoch": 0.72, "grad_norm": 0.423828125, "learning_rate": 6.0184358378422296e-06, "loss": 2.149, "step": 34590 }, { "epoch": 0.72, "grad_norm": 0.439453125, "learning_rate": 6.010178308950428e-06, "loss": 2.1108, "step": 34600 }, { "epoch": 0.72, "grad_norm": 0.443359375, "learning_rate": 6.001925029319966e-06, "loss": 2.1496, "step": 34610 }, { "epoch": 0.72, "grad_norm": 0.443359375, "learning_rate": 5.993676002851967e-06, "loss": 2.1262, "step": 34620 }, { "epoch": 0.72, "grad_norm": 0.447265625, "learning_rate": 5.985431233445538e-06, "loss": 2.1116, "step": 34630 }, { "epoch": 0.72, "grad_norm": 0.498046875, "learning_rate": 5.977190724997777e-06, "loss": 2.0973, "step": 34640 }, { "epoch": 0.72, "grad_norm": 0.439453125, "learning_rate": 5.968954481403753e-06, "loss": 2.1023, "step": 34650 }, { "epoch": 0.72, "grad_norm": 0.4609375, "learning_rate": 5.960722506556543e-06, "loss": 2.1212, "step": 34660 }, { "epoch": 0.72, "grad_norm": 0.43359375, "learning_rate": 5.95249480434718e-06, "loss": 2.1311, "step": 34670 }, { "epoch": 0.72, "grad_norm": 0.435546875, "learning_rate": 5.944271378664703e-06, "loss": 2.1415, "step": 34680 }, { "epoch": 0.72, "grad_norm": 0.435546875, "learning_rate": 5.936052233396106e-06, "loss": 2.0964, "step": 34690 }, { "epoch": 0.72, "grad_norm": 0.439453125, "learning_rate": 5.927837372426381e-06, "loss": 2.1169, "step": 34700 }, { "epoch": 0.72, "grad_norm": 0.4453125, "learning_rate": 5.919626799638477e-06, "loss": 2.0898, "step": 34710 }, { "epoch": 0.72, "grad_norm": 0.443359375, "learning_rate": 5.911420518913327e-06, "loss": 2.1329, "step": 34720 }, { "epoch": 0.72, "grad_norm": 0.4453125, "learning_rate": 5.903218534129837e-06, "loss": 2.1129, "step": 34730 }, { "epoch": 0.72, "grad_norm": 0.423828125, "learning_rate": 5.89502084916487e-06, "loss": 2.1183, "step": 34740 }, { "epoch": 0.72, "grad_norm": 0.419921875, "learning_rate": 5.886827467893268e-06, "loss": 2.1374, "step": 34750 }, { "epoch": 0.72, "grad_norm": 0.44140625, "learning_rate": 5.87863839418784e-06, "loss": 2.1091, "step": 34760 }, { "epoch": 0.72, "grad_norm": 0.427734375, "learning_rate": 5.870453631919345e-06, "loss": 2.1035, "step": 34770 }, { "epoch": 0.72, "grad_norm": 0.431640625, "learning_rate": 5.862273184956515e-06, "loss": 2.1388, "step": 34780 }, { "epoch": 0.72, "grad_norm": 0.494140625, "learning_rate": 5.854097057166051e-06, "loss": 2.1034, "step": 34790 }, { "epoch": 0.72, "grad_norm": 0.474609375, "learning_rate": 5.845925252412588e-06, "loss": 2.1206, "step": 34800 }, { "epoch": 0.72, "grad_norm": 0.423828125, "learning_rate": 5.837757774558735e-06, "loss": 2.1278, "step": 34810 }, { "epoch": 0.72, "grad_norm": 0.51171875, "learning_rate": 5.829594627465056e-06, "loss": 2.092, "step": 34820 }, { "epoch": 0.72, "grad_norm": 0.453125, "learning_rate": 5.821435814990067e-06, "loss": 2.1259, "step": 34830 }, { "epoch": 0.72, "grad_norm": 0.419921875, "learning_rate": 5.8132813409902226e-06, "loss": 2.1105, "step": 34840 }, { "epoch": 0.72, "grad_norm": 0.435546875, "learning_rate": 5.805131209319941e-06, "loss": 2.1231, "step": 34850 }, { "epoch": 0.72, "grad_norm": 0.44921875, "learning_rate": 5.796985423831591e-06, "loss": 2.1064, "step": 34860 }, { "epoch": 0.72, "grad_norm": 0.451171875, "learning_rate": 5.7888439883754646e-06, "loss": 2.1156, "step": 34870 }, { "epoch": 0.73, "grad_norm": 0.455078125, "learning_rate": 5.780706906799822e-06, "loss": 2.1169, "step": 34880 }, { "epoch": 0.73, "grad_norm": 0.4375, "learning_rate": 5.772574182950857e-06, "loss": 2.1124, "step": 34890 }, { "epoch": 0.73, "grad_norm": 0.42578125, "learning_rate": 5.764445820672695e-06, "loss": 2.1061, "step": 34900 }, { "epoch": 0.73, "grad_norm": 0.494140625, "learning_rate": 5.756321823807419e-06, "loss": 2.1136, "step": 34910 }, { "epoch": 0.73, "grad_norm": 0.431640625, "learning_rate": 5.748202196195022e-06, "loss": 2.1088, "step": 34920 }, { "epoch": 0.73, "grad_norm": 0.4296875, "learning_rate": 5.74008694167346e-06, "loss": 2.1101, "step": 34930 }, { "epoch": 0.73, "grad_norm": 0.4296875, "learning_rate": 5.731976064078597e-06, "loss": 2.1189, "step": 34940 }, { "epoch": 0.73, "grad_norm": 0.453125, "learning_rate": 5.723869567244252e-06, "loss": 2.1191, "step": 34950 }, { "epoch": 0.73, "grad_norm": 0.4453125, "learning_rate": 5.715767455002149e-06, "loss": 2.0806, "step": 34960 }, { "epoch": 0.73, "grad_norm": 0.48046875, "learning_rate": 5.707669731181955e-06, "loss": 2.1283, "step": 34970 }, { "epoch": 0.73, "grad_norm": 0.44140625, "learning_rate": 5.699576399611267e-06, "loss": 2.1105, "step": 34980 }, { "epoch": 0.73, "grad_norm": 0.46484375, "learning_rate": 5.6914874641155886e-06, "loss": 2.0672, "step": 34990 }, { "epoch": 0.73, "grad_norm": 0.447265625, "learning_rate": 5.683402928518359e-06, "loss": 2.1228, "step": 35000 }, { "epoch": 0.73, "eval_accuracy": 0.5590444975644216, "eval_loss": 1.993085265159607, "eval_runtime": 16.4517, "eval_samples_per_second": 36.166, "eval_steps_per_second": 1.155, "step": 35000 }, { "epoch": 0.73, "grad_norm": 0.439453125, "learning_rate": 5.675322796640932e-06, "loss": 2.0986, "step": 35010 }, { "epoch": 0.73, "grad_norm": 0.439453125, "learning_rate": 5.667247072302589e-06, "loss": 2.1006, "step": 35020 }, { "epoch": 0.73, "grad_norm": 0.4453125, "learning_rate": 5.659175759320509e-06, "loss": 2.1034, "step": 35030 }, { "epoch": 0.73, "grad_norm": 0.498046875, "learning_rate": 5.6511088615098036e-06, "loss": 2.1207, "step": 35040 }, { "epoch": 0.73, "grad_norm": 0.4453125, "learning_rate": 5.643046382683497e-06, "loss": 2.126, "step": 35050 }, { "epoch": 0.73, "grad_norm": 0.466796875, "learning_rate": 5.634988326652507e-06, "loss": 2.1249, "step": 35060 }, { "epoch": 0.73, "grad_norm": 0.48828125, "learning_rate": 5.626934697225679e-06, "loss": 2.0962, "step": 35070 }, { "epoch": 0.73, "grad_norm": 0.4375, "learning_rate": 5.6188854982097646e-06, "loss": 2.1108, "step": 35080 }, { "epoch": 0.73, "grad_norm": 0.4453125, "learning_rate": 5.6108407334094074e-06, "loss": 2.1243, "step": 35090 }, { "epoch": 0.73, "grad_norm": 0.427734375, "learning_rate": 5.6028004066271686e-06, "loss": 2.1265, "step": 35100 }, { "epoch": 0.73, "grad_norm": 0.4296875, "learning_rate": 5.594764521663512e-06, "loss": 2.1318, "step": 35110 }, { "epoch": 0.73, "grad_norm": 0.421875, "learning_rate": 5.586733082316788e-06, "loss": 2.1226, "step": 35120 }, { "epoch": 0.73, "grad_norm": 0.45703125, "learning_rate": 5.57870609238326e-06, "loss": 2.125, "step": 35130 }, { "epoch": 0.73, "grad_norm": 0.439453125, "learning_rate": 5.570683555657087e-06, "loss": 2.1059, "step": 35140 }, { "epoch": 0.73, "grad_norm": 0.4375, "learning_rate": 5.5626654759303085e-06, "loss": 2.1077, "step": 35150 }, { "epoch": 0.73, "grad_norm": 0.423828125, "learning_rate": 5.554651856992882e-06, "loss": 2.1458, "step": 35160 }, { "epoch": 0.73, "grad_norm": 0.435546875, "learning_rate": 5.546642702632627e-06, "loss": 2.1169, "step": 35170 }, { "epoch": 0.73, "grad_norm": 0.427734375, "learning_rate": 5.538638016635284e-06, "loss": 2.1383, "step": 35180 }, { "epoch": 0.73, "grad_norm": 0.44921875, "learning_rate": 5.530637802784449e-06, "loss": 2.1674, "step": 35190 }, { "epoch": 0.73, "grad_norm": 0.435546875, "learning_rate": 5.522642064861633e-06, "loss": 2.1128, "step": 35200 }, { "epoch": 0.73, "grad_norm": 0.4375, "learning_rate": 5.514650806646218e-06, "loss": 2.1206, "step": 35210 }, { "epoch": 0.73, "grad_norm": 0.43359375, "learning_rate": 5.506664031915462e-06, "loss": 2.1029, "step": 35220 }, { "epoch": 0.73, "grad_norm": 0.44140625, "learning_rate": 5.498681744444518e-06, "loss": 2.1, "step": 35230 }, { "epoch": 0.73, "grad_norm": 0.57421875, "learning_rate": 5.490703948006415e-06, "loss": 2.1042, "step": 35240 }, { "epoch": 0.73, "grad_norm": 0.43359375, "learning_rate": 5.482730646372044e-06, "loss": 2.101, "step": 35250 }, { "epoch": 0.73, "grad_norm": 0.42578125, "learning_rate": 5.47476184331019e-06, "loss": 2.0874, "step": 35260 }, { "epoch": 0.73, "grad_norm": 0.447265625, "learning_rate": 5.466797542587509e-06, "loss": 2.1039, "step": 35270 }, { "epoch": 0.73, "grad_norm": 0.4453125, "learning_rate": 5.458837747968516e-06, "loss": 2.1162, "step": 35280 }, { "epoch": 0.73, "grad_norm": 0.439453125, "learning_rate": 5.450882463215607e-06, "loss": 2.1125, "step": 35290 }, { "epoch": 0.73, "grad_norm": 0.43359375, "learning_rate": 5.442931692089051e-06, "loss": 2.1358, "step": 35300 }, { "epoch": 0.73, "grad_norm": 0.44140625, "learning_rate": 5.434985438346966e-06, "loss": 2.1118, "step": 35310 }, { "epoch": 0.73, "grad_norm": 0.423828125, "learning_rate": 5.427043705745348e-06, "loss": 2.1036, "step": 35320 }, { "epoch": 0.73, "grad_norm": 0.451171875, "learning_rate": 5.419106498038063e-06, "loss": 2.1052, "step": 35330 }, { "epoch": 0.73, "grad_norm": 0.4140625, "learning_rate": 5.4111738189768124e-06, "loss": 2.1178, "step": 35340 }, { "epoch": 0.73, "grad_norm": 0.44140625, "learning_rate": 5.403245672311182e-06, "loss": 2.1325, "step": 35350 }, { "epoch": 0.74, "grad_norm": 0.455078125, "learning_rate": 5.39532206178861e-06, "loss": 2.142, "step": 35360 }, { "epoch": 0.74, "grad_norm": 0.458984375, "learning_rate": 5.387402991154377e-06, "loss": 2.1371, "step": 35370 }, { "epoch": 0.74, "grad_norm": 0.435546875, "learning_rate": 5.379488464151632e-06, "loss": 2.1593, "step": 35380 }, { "epoch": 0.74, "grad_norm": 0.53515625, "learning_rate": 5.3715784845213715e-06, "loss": 2.1418, "step": 35390 }, { "epoch": 0.74, "grad_norm": 0.443359375, "learning_rate": 5.3636730560024484e-06, "loss": 2.132, "step": 35400 }, { "epoch": 0.74, "grad_norm": 0.431640625, "learning_rate": 5.35577218233155e-06, "loss": 2.1306, "step": 35410 }, { "epoch": 0.74, "grad_norm": 0.46484375, "learning_rate": 5.347875867243227e-06, "loss": 2.1581, "step": 35420 }, { "epoch": 0.74, "grad_norm": 0.439453125, "learning_rate": 5.339984114469859e-06, "loss": 2.1423, "step": 35430 }, { "epoch": 0.74, "grad_norm": 0.451171875, "learning_rate": 5.332096927741687e-06, "loss": 2.109, "step": 35440 }, { "epoch": 0.74, "grad_norm": 0.58984375, "learning_rate": 5.324214310786777e-06, "loss": 2.1108, "step": 35450 }, { "epoch": 0.74, "grad_norm": 0.427734375, "learning_rate": 5.316336267331051e-06, "loss": 2.1136, "step": 35460 }, { "epoch": 0.74, "grad_norm": 0.462890625, "learning_rate": 5.308462801098251e-06, "loss": 2.1321, "step": 35470 }, { "epoch": 0.74, "grad_norm": 0.451171875, "learning_rate": 5.30059391580997e-06, "loss": 2.1358, "step": 35480 }, { "epoch": 0.74, "grad_norm": 0.423828125, "learning_rate": 5.292729615185637e-06, "loss": 2.15, "step": 35490 }, { "epoch": 0.74, "grad_norm": 0.43359375, "learning_rate": 5.284869902942497e-06, "loss": 2.1231, "step": 35500 }, { "epoch": 0.74, "eval_accuracy": 0.5592170005832245, "eval_loss": 1.9931291341781616, "eval_runtime": 16.439, "eval_samples_per_second": 36.194, "eval_steps_per_second": 1.156, "step": 35500 }, { "epoch": 0.74, "grad_norm": 0.421875, "learning_rate": 5.277014782795643e-06, "loss": 2.1469, "step": 35510 }, { "epoch": 0.74, "grad_norm": 0.4453125, "learning_rate": 5.269164258457997e-06, "loss": 2.1019, "step": 35520 }, { "epoch": 0.74, "grad_norm": 0.4453125, "learning_rate": 5.261318333640291e-06, "loss": 2.1239, "step": 35530 }, { "epoch": 0.74, "grad_norm": 0.44140625, "learning_rate": 5.253477012051105e-06, "loss": 2.1449, "step": 35540 }, { "epoch": 0.74, "grad_norm": 0.423828125, "learning_rate": 5.245640297396832e-06, "loss": 2.1256, "step": 35550 }, { "epoch": 0.74, "grad_norm": 0.42578125, "learning_rate": 5.2378081933816845e-06, "loss": 2.0897, "step": 35560 }, { "epoch": 0.74, "grad_norm": 0.4375, "learning_rate": 5.229980703707701e-06, "loss": 2.1485, "step": 35570 }, { "epoch": 0.74, "grad_norm": 0.44921875, "learning_rate": 5.222157832074741e-06, "loss": 2.111, "step": 35580 }, { "epoch": 0.74, "grad_norm": 0.4375, "learning_rate": 5.214339582180481e-06, "loss": 2.0669, "step": 35590 }, { "epoch": 0.74, "grad_norm": 0.431640625, "learning_rate": 5.206525957720399e-06, "loss": 2.1298, "step": 35600 }, { "epoch": 0.74, "grad_norm": 0.4609375, "learning_rate": 5.198716962387806e-06, "loss": 2.1181, "step": 35610 }, { "epoch": 0.74, "grad_norm": 0.458984375, "learning_rate": 5.190912599873818e-06, "loss": 2.1329, "step": 35620 }, { "epoch": 0.74, "grad_norm": 0.43359375, "learning_rate": 5.183112873867353e-06, "loss": 2.1173, "step": 35630 }, { "epoch": 0.74, "grad_norm": 0.42578125, "learning_rate": 5.175317788055149e-06, "loss": 2.1174, "step": 35640 }, { "epoch": 0.74, "grad_norm": 0.435546875, "learning_rate": 5.167527346121747e-06, "loss": 2.1275, "step": 35650 }, { "epoch": 0.74, "grad_norm": 0.4375, "learning_rate": 5.159741551749487e-06, "loss": 2.077, "step": 35660 }, { "epoch": 0.74, "grad_norm": 0.4375, "learning_rate": 5.151960408618524e-06, "loss": 2.0951, "step": 35670 }, { "epoch": 0.74, "grad_norm": 0.435546875, "learning_rate": 5.144183920406798e-06, "loss": 2.0967, "step": 35680 }, { "epoch": 0.74, "grad_norm": 0.431640625, "learning_rate": 5.1364120907900675e-06, "loss": 2.1244, "step": 35690 }, { "epoch": 0.74, "grad_norm": 0.453125, "learning_rate": 5.128644923441872e-06, "loss": 2.1489, "step": 35700 }, { "epoch": 0.74, "grad_norm": 0.451171875, "learning_rate": 5.120882422033562e-06, "loss": 2.1294, "step": 35710 }, { "epoch": 0.74, "grad_norm": 0.443359375, "learning_rate": 5.11312459023427e-06, "loss": 2.1346, "step": 35720 }, { "epoch": 0.74, "grad_norm": 0.439453125, "learning_rate": 5.105371431710929e-06, "loss": 2.1249, "step": 35730 }, { "epoch": 0.74, "grad_norm": 0.435546875, "learning_rate": 5.097622950128265e-06, "loss": 2.1315, "step": 35740 }, { "epoch": 0.74, "grad_norm": 0.4375, "learning_rate": 5.089879149148781e-06, "loss": 2.124, "step": 35750 }, { "epoch": 0.74, "grad_norm": 0.4296875, "learning_rate": 5.082140032432781e-06, "loss": 2.108, "step": 35760 }, { "epoch": 0.74, "grad_norm": 0.419921875, "learning_rate": 5.074405603638349e-06, "loss": 2.1048, "step": 35770 }, { "epoch": 0.74, "grad_norm": 0.48046875, "learning_rate": 5.0666758664213595e-06, "loss": 2.1274, "step": 35780 }, { "epoch": 0.74, "grad_norm": 0.4296875, "learning_rate": 5.058950824435452e-06, "loss": 2.1327, "step": 35790 }, { "epoch": 0.74, "grad_norm": 0.419921875, "learning_rate": 5.051230481332065e-06, "loss": 2.112, "step": 35800 }, { "epoch": 0.74, "grad_norm": 0.439453125, "learning_rate": 5.043514840760415e-06, "loss": 2.1442, "step": 35810 }, { "epoch": 0.74, "grad_norm": 0.4296875, "learning_rate": 5.035803906367478e-06, "loss": 2.1296, "step": 35820 }, { "epoch": 0.74, "grad_norm": 0.4453125, "learning_rate": 5.028097681798026e-06, "loss": 2.1406, "step": 35830 }, { "epoch": 0.75, "grad_norm": 0.4296875, "learning_rate": 5.020396170694598e-06, "loss": 2.0958, "step": 35840 }, { "epoch": 0.75, "grad_norm": 0.455078125, "learning_rate": 5.012699376697496e-06, "loss": 2.1102, "step": 35850 }, { "epoch": 0.75, "grad_norm": 0.439453125, "learning_rate": 5.005007303444804e-06, "loss": 2.1232, "step": 35860 }, { "epoch": 0.75, "grad_norm": 0.443359375, "learning_rate": 4.9973199545723755e-06, "loss": 2.1201, "step": 35870 }, { "epoch": 0.75, "grad_norm": 0.451171875, "learning_rate": 4.989637333713814e-06, "loss": 2.0996, "step": 35880 }, { "epoch": 0.75, "grad_norm": 0.423828125, "learning_rate": 4.981959444500509e-06, "loss": 2.1139, "step": 35890 }, { "epoch": 0.75, "grad_norm": 0.486328125, "learning_rate": 4.974286290561606e-06, "loss": 2.0859, "step": 35900 }, { "epoch": 0.75, "grad_norm": 0.423828125, "learning_rate": 4.966617875524003e-06, "loss": 2.1293, "step": 35910 }, { "epoch": 0.75, "grad_norm": 0.43359375, "learning_rate": 4.958954203012375e-06, "loss": 2.1068, "step": 35920 }, { "epoch": 0.75, "grad_norm": 0.44921875, "learning_rate": 4.9512952766491344e-06, "loss": 2.1348, "step": 35930 }, { "epoch": 0.75, "grad_norm": 0.451171875, "learning_rate": 4.943641100054476e-06, "loss": 2.1355, "step": 35940 }, { "epoch": 0.75, "grad_norm": 0.40625, "learning_rate": 4.935991676846321e-06, "loss": 2.0624, "step": 35950 }, { "epoch": 0.75, "grad_norm": 0.470703125, "learning_rate": 4.928347010640368e-06, "loss": 2.1113, "step": 35960 }, { "epoch": 0.75, "grad_norm": 0.44140625, "learning_rate": 4.920707105050059e-06, "loss": 2.1175, "step": 35970 }, { "epoch": 0.75, "grad_norm": 0.44921875, "learning_rate": 4.913071963686575e-06, "loss": 2.1449, "step": 35980 }, { "epoch": 0.75, "grad_norm": 0.421875, "learning_rate": 4.905441590158859e-06, "loss": 2.1075, "step": 35990 }, { "epoch": 0.75, "grad_norm": 0.4453125, "learning_rate": 4.8978159880736e-06, "loss": 2.0974, "step": 36000 }, { "epoch": 0.75, "eval_accuracy": 0.5590329973631681, "eval_loss": 1.9931119680404663, "eval_runtime": 16.4794, "eval_samples_per_second": 36.106, "eval_steps_per_second": 1.153, "step": 36000 }, { "epoch": 0.75, "grad_norm": 0.431640625, "learning_rate": 4.89019516103522e-06, "loss": 2.1181, "step": 36010 }, { "epoch": 0.75, "grad_norm": 0.431640625, "learning_rate": 4.882579112645893e-06, "loss": 2.1242, "step": 36020 }, { "epoch": 0.75, "grad_norm": 0.5, "learning_rate": 4.874967846505539e-06, "loss": 2.1335, "step": 36030 }, { "epoch": 0.75, "grad_norm": 0.4375, "learning_rate": 4.8673613662118015e-06, "loss": 2.123, "step": 36040 }, { "epoch": 0.75, "grad_norm": 0.439453125, "learning_rate": 4.8597596753600745e-06, "loss": 2.1119, "step": 36050 }, { "epoch": 0.75, "grad_norm": 0.421875, "learning_rate": 4.852162777543491e-06, "loss": 2.1142, "step": 36060 }, { "epoch": 0.75, "grad_norm": 0.51953125, "learning_rate": 4.844570676352903e-06, "loss": 2.1036, "step": 36070 }, { "epoch": 0.75, "grad_norm": 0.453125, "learning_rate": 4.83698337537691e-06, "loss": 2.1171, "step": 36080 }, { "epoch": 0.75, "grad_norm": 0.474609375, "learning_rate": 4.82940087820184e-06, "loss": 2.1225, "step": 36090 }, { "epoch": 0.75, "grad_norm": 0.443359375, "learning_rate": 4.821823188411741e-06, "loss": 2.1206, "step": 36100 }, { "epoch": 0.75, "grad_norm": 0.43359375, "learning_rate": 4.814250309588398e-06, "loss": 2.1136, "step": 36110 }, { "epoch": 0.75, "grad_norm": 0.435546875, "learning_rate": 4.806682245311324e-06, "loss": 2.117, "step": 36120 }, { "epoch": 0.75, "grad_norm": 0.5078125, "learning_rate": 4.799118999157744e-06, "loss": 2.122, "step": 36130 }, { "epoch": 0.75, "grad_norm": 0.43359375, "learning_rate": 4.791560574702614e-06, "loss": 2.1155, "step": 36140 }, { "epoch": 0.75, "grad_norm": 0.4375, "learning_rate": 4.784006975518619e-06, "loss": 2.0965, "step": 36150 }, { "epoch": 0.75, "grad_norm": 0.447265625, "learning_rate": 4.776458205176144e-06, "loss": 2.1077, "step": 36160 }, { "epoch": 0.75, "grad_norm": 0.431640625, "learning_rate": 4.768914267243309e-06, "loss": 2.1266, "step": 36170 }, { "epoch": 0.75, "grad_norm": 0.494140625, "learning_rate": 4.761375165285936e-06, "loss": 2.1078, "step": 36180 }, { "epoch": 0.75, "grad_norm": 0.458984375, "learning_rate": 4.753840902867575e-06, "loss": 2.1109, "step": 36190 }, { "epoch": 0.75, "grad_norm": 0.439453125, "learning_rate": 4.746311483549472e-06, "loss": 2.1165, "step": 36200 }, { "epoch": 0.75, "grad_norm": 0.451171875, "learning_rate": 4.7387869108906005e-06, "loss": 2.0883, "step": 36210 }, { "epoch": 0.75, "grad_norm": 0.43359375, "learning_rate": 4.731267188447637e-06, "loss": 2.1408, "step": 36220 }, { "epoch": 0.75, "grad_norm": 0.443359375, "learning_rate": 4.723752319774956e-06, "loss": 2.1222, "step": 36230 }, { "epoch": 0.75, "grad_norm": 0.435546875, "learning_rate": 4.716242308424652e-06, "loss": 2.1161, "step": 36240 }, { "epoch": 0.75, "grad_norm": 0.435546875, "learning_rate": 4.708737157946521e-06, "loss": 2.1181, "step": 36250 }, { "epoch": 0.75, "grad_norm": 0.439453125, "learning_rate": 4.7012368718880476e-06, "loss": 2.1157, "step": 36260 }, { "epoch": 0.75, "grad_norm": 0.42578125, "learning_rate": 4.693741453794433e-06, "loss": 2.1257, "step": 36270 }, { "epoch": 0.75, "grad_norm": 0.458984375, "learning_rate": 4.686250907208575e-06, "loss": 2.1204, "step": 36280 }, { "epoch": 0.75, "grad_norm": 0.427734375, "learning_rate": 4.6787652356710595e-06, "loss": 2.1552, "step": 36290 }, { "epoch": 0.75, "grad_norm": 0.458984375, "learning_rate": 4.671284442720174e-06, "loss": 2.137, "step": 36300 }, { "epoch": 0.75, "grad_norm": 0.46875, "learning_rate": 4.6638085318919094e-06, "loss": 2.055, "step": 36310 }, { "epoch": 0.76, "grad_norm": 0.4453125, "learning_rate": 4.656337506719929e-06, "loss": 2.1262, "step": 36320 }, { "epoch": 0.76, "grad_norm": 0.455078125, "learning_rate": 4.6488713707356e-06, "loss": 2.1164, "step": 36330 }, { "epoch": 0.76, "grad_norm": 0.44140625, "learning_rate": 4.6414101274679825e-06, "loss": 2.1551, "step": 36340 }, { "epoch": 0.76, "grad_norm": 0.443359375, "learning_rate": 4.633953780443807e-06, "loss": 2.108, "step": 36350 }, { "epoch": 0.76, "grad_norm": 0.427734375, "learning_rate": 4.626502333187506e-06, "loss": 2.1207, "step": 36360 }, { "epoch": 0.76, "grad_norm": 0.482421875, "learning_rate": 4.619055789221191e-06, "loss": 2.1213, "step": 36370 }, { "epoch": 0.76, "grad_norm": 0.435546875, "learning_rate": 4.611614152064658e-06, "loss": 2.1274, "step": 36380 }, { "epoch": 0.76, "grad_norm": 0.4296875, "learning_rate": 4.6041774252353715e-06, "loss": 2.0951, "step": 36390 }, { "epoch": 0.76, "grad_norm": 0.4375, "learning_rate": 4.596745612248488e-06, "loss": 2.161, "step": 36400 }, { "epoch": 0.76, "grad_norm": 0.42578125, "learning_rate": 4.589318716616844e-06, "loss": 2.1103, "step": 36410 }, { "epoch": 0.76, "grad_norm": 0.439453125, "learning_rate": 4.581896741850935e-06, "loss": 2.1117, "step": 36420 }, { "epoch": 0.76, "grad_norm": 0.43359375, "learning_rate": 4.574479691458952e-06, "loss": 2.1257, "step": 36430 }, { "epoch": 0.76, "grad_norm": 0.4921875, "learning_rate": 4.567067568946736e-06, "loss": 2.1078, "step": 36440 }, { "epoch": 0.76, "grad_norm": 0.4453125, "learning_rate": 4.559660377817818e-06, "loss": 2.1118, "step": 36450 }, { "epoch": 0.76, "grad_norm": 0.423828125, "learning_rate": 4.5522581215733825e-06, "loss": 2.1103, "step": 36460 }, { "epoch": 0.76, "grad_norm": 0.42578125, "learning_rate": 4.544860803712298e-06, "loss": 2.1457, "step": 36470 }, { "epoch": 0.76, "grad_norm": 0.44921875, "learning_rate": 4.537468427731079e-06, "loss": 2.109, "step": 36480 }, { "epoch": 0.76, "grad_norm": 0.451171875, "learning_rate": 4.5300809971239205e-06, "loss": 2.1253, "step": 36490 }, { "epoch": 0.76, "grad_norm": 0.447265625, "learning_rate": 4.522698515382679e-06, "loss": 2.1025, "step": 36500 }, { "epoch": 0.76, "eval_accuracy": 0.559105284342476, "eval_loss": 1.9931056499481201, "eval_runtime": 16.462, "eval_samples_per_second": 36.144, "eval_steps_per_second": 1.154, "step": 36500 }, { "epoch": 0.76, "grad_norm": 0.42578125, "learning_rate": 4.5153209859968544e-06, "loss": 2.1208, "step": 36510 }, { "epoch": 0.76, "grad_norm": 0.435546875, "learning_rate": 4.507948412453629e-06, "loss": 2.106, "step": 36520 }, { "epoch": 0.76, "grad_norm": 0.455078125, "learning_rate": 4.500580798237831e-06, "loss": 2.1105, "step": 36530 }, { "epoch": 0.76, "grad_norm": 0.423828125, "learning_rate": 4.493218146831938e-06, "loss": 2.1221, "step": 36540 }, { "epoch": 0.76, "grad_norm": 0.478515625, "learning_rate": 4.485860461716093e-06, "loss": 2.1242, "step": 36550 }, { "epoch": 0.76, "grad_norm": 0.435546875, "learning_rate": 4.47850774636809e-06, "loss": 2.113, "step": 36560 }, { "epoch": 0.76, "grad_norm": 0.486328125, "learning_rate": 4.4711600042633746e-06, "loss": 2.1126, "step": 36570 }, { "epoch": 0.76, "grad_norm": 0.453125, "learning_rate": 4.463817238875028e-06, "loss": 2.1519, "step": 36580 }, { "epoch": 0.76, "grad_norm": 0.462890625, "learning_rate": 4.456479453673796e-06, "loss": 2.1234, "step": 36590 }, { "epoch": 0.76, "grad_norm": 0.453125, "learning_rate": 4.449146652128069e-06, "loss": 2.1334, "step": 36600 }, { "epoch": 0.76, "grad_norm": 0.431640625, "learning_rate": 4.4418188377038675e-06, "loss": 2.1007, "step": 36610 }, { "epoch": 0.76, "grad_norm": 0.4453125, "learning_rate": 4.4344960138648675e-06, "loss": 2.0964, "step": 36620 }, { "epoch": 0.76, "grad_norm": 0.447265625, "learning_rate": 4.427178184072389e-06, "loss": 2.1164, "step": 36630 }, { "epoch": 0.76, "grad_norm": 0.470703125, "learning_rate": 4.419865351785373e-06, "loss": 2.1119, "step": 36640 }, { "epoch": 0.76, "grad_norm": 0.423828125, "learning_rate": 4.412557520460418e-06, "loss": 2.1117, "step": 36650 }, { "epoch": 0.76, "grad_norm": 0.419921875, "learning_rate": 4.405254693551754e-06, "loss": 2.098, "step": 36660 }, { "epoch": 0.76, "grad_norm": 0.435546875, "learning_rate": 4.397956874511234e-06, "loss": 2.1182, "step": 36670 }, { "epoch": 0.76, "grad_norm": 0.443359375, "learning_rate": 4.3906640667883616e-06, "loss": 2.1273, "step": 36680 }, { "epoch": 0.76, "grad_norm": 0.4375, "learning_rate": 4.3833762738302524e-06, "loss": 2.089, "step": 36690 }, { "epoch": 0.76, "grad_norm": 0.4921875, "learning_rate": 4.3760934990816715e-06, "loss": 2.1264, "step": 36700 }, { "epoch": 0.76, "grad_norm": 0.455078125, "learning_rate": 4.3688157459849945e-06, "loss": 2.1335, "step": 36710 }, { "epoch": 0.76, "grad_norm": 0.419921875, "learning_rate": 4.361543017980239e-06, "loss": 2.0725, "step": 36720 }, { "epoch": 0.76, "grad_norm": 0.421875, "learning_rate": 4.354275318505031e-06, "loss": 2.0855, "step": 36730 }, { "epoch": 0.76, "grad_norm": 0.44140625, "learning_rate": 4.347012650994631e-06, "loss": 2.123, "step": 36740 }, { "epoch": 0.76, "grad_norm": 0.4453125, "learning_rate": 4.339755018881922e-06, "loss": 2.0903, "step": 36750 }, { "epoch": 0.76, "grad_norm": 0.443359375, "learning_rate": 4.332502425597404e-06, "loss": 2.1431, "step": 36760 }, { "epoch": 0.76, "grad_norm": 0.416015625, "learning_rate": 4.325254874569189e-06, "loss": 2.1281, "step": 36770 }, { "epoch": 0.76, "grad_norm": 0.443359375, "learning_rate": 4.318012369223011e-06, "loss": 2.1016, "step": 36780 }, { "epoch": 0.76, "grad_norm": 0.427734375, "learning_rate": 4.310774912982227e-06, "loss": 2.1429, "step": 36790 }, { "epoch": 0.77, "grad_norm": 0.435546875, "learning_rate": 4.30354250926779e-06, "loss": 2.1594, "step": 36800 }, { "epoch": 0.77, "grad_norm": 0.431640625, "learning_rate": 4.296315161498277e-06, "loss": 2.1184, "step": 36810 }, { "epoch": 0.77, "grad_norm": 0.42578125, "learning_rate": 4.289092873089881e-06, "loss": 2.1207, "step": 36820 }, { "epoch": 0.77, "grad_norm": 0.419921875, "learning_rate": 4.281875647456381e-06, "loss": 2.0888, "step": 36830 }, { "epoch": 0.77, "grad_norm": 0.4375, "learning_rate": 4.274663488009185e-06, "loss": 2.1329, "step": 36840 }, { "epoch": 0.77, "grad_norm": 0.466796875, "learning_rate": 4.2674563981573015e-06, "loss": 2.1339, "step": 36850 }, { "epoch": 0.77, "grad_norm": 0.435546875, "learning_rate": 4.2602543813073325e-06, "loss": 2.1374, "step": 36860 }, { "epoch": 0.77, "grad_norm": 0.4453125, "learning_rate": 4.2530574408634906e-06, "loss": 2.0959, "step": 36870 }, { "epoch": 0.77, "grad_norm": 0.4765625, "learning_rate": 4.245865580227594e-06, "loss": 2.1113, "step": 36880 }, { "epoch": 0.77, "grad_norm": 0.494140625, "learning_rate": 4.238678802799043e-06, "loss": 2.1188, "step": 36890 }, { "epoch": 0.77, "grad_norm": 0.42578125, "learning_rate": 4.231497111974853e-06, "loss": 2.1132, "step": 36900 }, { "epoch": 0.77, "grad_norm": 0.4296875, "learning_rate": 4.224320511149628e-06, "loss": 2.1199, "step": 36910 }, { "epoch": 0.77, "grad_norm": 0.44140625, "learning_rate": 4.21714900371556e-06, "loss": 2.1004, "step": 36920 }, { "epoch": 0.77, "grad_norm": 0.45703125, "learning_rate": 4.209982593062446e-06, "loss": 2.1752, "step": 36930 }, { "epoch": 0.77, "grad_norm": 0.4453125, "learning_rate": 4.202821282577661e-06, "loss": 2.1265, "step": 36940 }, { "epoch": 0.77, "grad_norm": 0.4375, "learning_rate": 4.19566507564618e-06, "loss": 2.1123, "step": 36950 }, { "epoch": 0.77, "grad_norm": 0.44140625, "learning_rate": 4.188513975650555e-06, "loss": 2.1381, "step": 36960 }, { "epoch": 0.77, "grad_norm": 0.5078125, "learning_rate": 4.181367985970934e-06, "loss": 2.11, "step": 36970 }, { "epoch": 0.77, "grad_norm": 0.423828125, "learning_rate": 4.174227109985052e-06, "loss": 2.1224, "step": 36980 }, { "epoch": 0.77, "grad_norm": 0.4296875, "learning_rate": 4.167091351068207e-06, "loss": 2.1297, "step": 36990 }, { "epoch": 0.77, "grad_norm": 0.443359375, "learning_rate": 4.159960712593301e-06, "loss": 2.1217, "step": 37000 }, { "epoch": 0.77, "eval_accuracy": 0.5589886394440474, "eval_loss": 1.9931020736694336, "eval_runtime": 16.5352, "eval_samples_per_second": 35.984, "eval_steps_per_second": 1.149, "step": 37000 }, { "epoch": 0.77, "grad_norm": 0.4375, "learning_rate": 4.152835197930806e-06, "loss": 2.0892, "step": 37010 }, { "epoch": 0.77, "grad_norm": 0.4375, "learning_rate": 4.145714810448768e-06, "loss": 2.1267, "step": 37020 }, { "epoch": 0.77, "grad_norm": 0.451171875, "learning_rate": 4.1385995535128175e-06, "loss": 2.0758, "step": 37030 }, { "epoch": 0.77, "grad_norm": 0.46484375, "learning_rate": 4.131489430486159e-06, "loss": 2.1344, "step": 37040 }, { "epoch": 0.77, "grad_norm": 0.43359375, "learning_rate": 4.124384444729561e-06, "loss": 2.1235, "step": 37050 }, { "epoch": 0.77, "grad_norm": 0.453125, "learning_rate": 4.117284599601374e-06, "loss": 2.1071, "step": 37060 }, { "epoch": 0.77, "grad_norm": 0.443359375, "learning_rate": 4.110189898457521e-06, "loss": 2.1207, "step": 37070 }, { "epoch": 0.77, "grad_norm": 0.427734375, "learning_rate": 4.103100344651478e-06, "loss": 2.113, "step": 37080 }, { "epoch": 0.77, "grad_norm": 0.44921875, "learning_rate": 4.0960159415342994e-06, "loss": 2.1157, "step": 37090 }, { "epoch": 0.77, "grad_norm": 0.419921875, "learning_rate": 4.088936692454613e-06, "loss": 2.1251, "step": 37100 }, { "epoch": 0.77, "grad_norm": 0.4453125, "learning_rate": 4.081862600758589e-06, "loss": 2.103, "step": 37110 }, { "epoch": 0.77, "grad_norm": 0.447265625, "learning_rate": 4.074793669789977e-06, "loss": 2.1519, "step": 37120 }, { "epoch": 0.77, "grad_norm": 0.44921875, "learning_rate": 4.06772990289008e-06, "loss": 2.1609, "step": 37130 }, { "epoch": 0.77, "grad_norm": 0.421875, "learning_rate": 4.06067130339777e-06, "loss": 2.1093, "step": 37140 }, { "epoch": 0.77, "grad_norm": 0.455078125, "learning_rate": 4.053617874649455e-06, "loss": 2.1067, "step": 37150 }, { "epoch": 0.77, "grad_norm": 0.47265625, "learning_rate": 4.046569619979124e-06, "loss": 2.1077, "step": 37160 }, { "epoch": 0.77, "grad_norm": 0.5234375, "learning_rate": 4.039526542718302e-06, "loss": 2.1271, "step": 37170 }, { "epoch": 0.77, "grad_norm": 0.4375, "learning_rate": 4.032488646196077e-06, "loss": 2.1231, "step": 37180 }, { "epoch": 0.77, "grad_norm": 0.484375, "learning_rate": 4.025455933739079e-06, "loss": 2.1256, "step": 37190 }, { "epoch": 0.77, "grad_norm": 0.435546875, "learning_rate": 4.018428408671503e-06, "loss": 2.0914, "step": 37200 }, { "epoch": 0.77, "grad_norm": 0.453125, "learning_rate": 4.011406074315072e-06, "loss": 2.1392, "step": 37210 }, { "epoch": 0.77, "grad_norm": 0.470703125, "learning_rate": 4.004388933989071e-06, "loss": 2.1404, "step": 37220 }, { "epoch": 0.77, "grad_norm": 0.431640625, "learning_rate": 3.997376991010327e-06, "loss": 2.1152, "step": 37230 }, { "epoch": 0.77, "grad_norm": 0.423828125, "learning_rate": 3.990370248693203e-06, "loss": 2.1153, "step": 37240 }, { "epoch": 0.77, "grad_norm": 0.435546875, "learning_rate": 3.9833687103496095e-06, "loss": 2.1069, "step": 37250 }, { "epoch": 0.77, "grad_norm": 0.423828125, "learning_rate": 3.976372379289005e-06, "loss": 2.1161, "step": 37260 }, { "epoch": 0.77, "grad_norm": 0.4296875, "learning_rate": 3.969381258818365e-06, "loss": 2.0945, "step": 37270 }, { "epoch": 0.78, "grad_norm": 0.5390625, "learning_rate": 3.962395352242224e-06, "loss": 2.1046, "step": 37280 }, { "epoch": 0.78, "grad_norm": 0.5, "learning_rate": 3.955414662862644e-06, "loss": 2.1054, "step": 37290 }, { "epoch": 0.78, "grad_norm": 0.423828125, "learning_rate": 3.9484391939792145e-06, "loss": 2.1603, "step": 37300 }, { "epoch": 0.78, "grad_norm": 0.421875, "learning_rate": 3.941468948889067e-06, "loss": 2.1027, "step": 37310 }, { "epoch": 0.78, "grad_norm": 0.484375, "learning_rate": 3.9345039308868594e-06, "loss": 2.1295, "step": 37320 }, { "epoch": 0.78, "grad_norm": 0.423828125, "learning_rate": 3.927544143264782e-06, "loss": 2.1199, "step": 37330 }, { "epoch": 0.78, "grad_norm": 0.44140625, "learning_rate": 3.920589589312545e-06, "loss": 2.101, "step": 37340 }, { "epoch": 0.78, "grad_norm": 0.4375, "learning_rate": 3.913640272317394e-06, "loss": 2.1135, "step": 37350 }, { "epoch": 0.78, "grad_norm": 0.4296875, "learning_rate": 3.906696195564098e-06, "loss": 2.0865, "step": 37360 }, { "epoch": 0.78, "grad_norm": 0.435546875, "learning_rate": 3.8997573623349385e-06, "loss": 2.0865, "step": 37370 }, { "epoch": 0.78, "grad_norm": 0.451171875, "learning_rate": 3.892823775909731e-06, "loss": 2.0933, "step": 37380 }, { "epoch": 0.78, "grad_norm": 0.44140625, "learning_rate": 3.885895439565811e-06, "loss": 2.1234, "step": 37390 }, { "epoch": 0.78, "grad_norm": 0.42578125, "learning_rate": 3.878972356578019e-06, "loss": 2.132, "step": 37400 }, { "epoch": 0.78, "grad_norm": 0.44921875, "learning_rate": 3.872054530218726e-06, "loss": 2.1138, "step": 37410 }, { "epoch": 0.78, "grad_norm": 0.427734375, "learning_rate": 3.865141963757817e-06, "loss": 2.0882, "step": 37420 }, { "epoch": 0.78, "grad_norm": 0.423828125, "learning_rate": 3.858234660462678e-06, "loss": 2.1238, "step": 37430 }, { "epoch": 0.78, "grad_norm": 0.4375, "learning_rate": 3.851332623598227e-06, "loss": 2.0945, "step": 37440 }, { "epoch": 0.78, "grad_norm": 0.4375, "learning_rate": 3.844435856426874e-06, "loss": 2.1293, "step": 37450 }, { "epoch": 0.78, "grad_norm": 0.43359375, "learning_rate": 3.837544362208554e-06, "loss": 2.1185, "step": 37460 }, { "epoch": 0.78, "grad_norm": 0.4296875, "learning_rate": 3.830658144200691e-06, "loss": 2.1271, "step": 37470 }, { "epoch": 0.78, "grad_norm": 0.453125, "learning_rate": 3.82377720565824e-06, "loss": 2.1114, "step": 37480 }, { "epoch": 0.78, "grad_norm": 0.431640625, "learning_rate": 3.816901549833633e-06, "loss": 2.1179, "step": 37490 }, { "epoch": 0.78, "grad_norm": 0.431640625, "learning_rate": 3.8100311799768256e-06, "loss": 2.1227, "step": 37500 }, { "epoch": 0.78, "eval_accuracy": 0.5591479993757034, "eval_loss": 1.992994785308838, "eval_runtime": 16.4756, "eval_samples_per_second": 36.114, "eval_steps_per_second": 1.153, "step": 37500 }, { "epoch": 0.78, "grad_norm": 0.421875, "learning_rate": 3.803166099335267e-06, "loss": 2.1182, "step": 37510 }, { "epoch": 0.78, "grad_norm": 0.4296875, "learning_rate": 3.796306311153913e-06, "loss": 2.106, "step": 37520 }, { "epoch": 0.78, "grad_norm": 0.859375, "learning_rate": 3.7894518186752007e-06, "loss": 2.1329, "step": 37530 }, { "epoch": 0.78, "grad_norm": 0.427734375, "learning_rate": 3.7826026251390816e-06, "loss": 2.113, "step": 37540 }, { "epoch": 0.78, "grad_norm": 0.51171875, "learning_rate": 3.7757587337830007e-06, "loss": 2.0975, "step": 37550 }, { "epoch": 0.78, "grad_norm": 0.423828125, "learning_rate": 3.768920147841882e-06, "loss": 2.1341, "step": 37560 }, { "epoch": 0.78, "grad_norm": 0.42578125, "learning_rate": 3.7620868705481586e-06, "loss": 2.146, "step": 37570 }, { "epoch": 0.78, "grad_norm": 0.458984375, "learning_rate": 3.75525890513175e-06, "loss": 2.1283, "step": 37580 }, { "epoch": 0.78, "grad_norm": 0.4375, "learning_rate": 3.7484362548200558e-06, "loss": 2.1078, "step": 37590 }, { "epoch": 0.78, "grad_norm": 0.421875, "learning_rate": 3.7416189228379732e-06, "loss": 2.1181, "step": 37600 }, { "epoch": 0.78, "grad_norm": 0.474609375, "learning_rate": 3.734806912407888e-06, "loss": 2.13, "step": 37610 }, { "epoch": 0.78, "grad_norm": 0.462890625, "learning_rate": 3.7280002267496584e-06, "loss": 2.1177, "step": 37620 }, { "epoch": 0.78, "grad_norm": 0.4375, "learning_rate": 3.7211988690806313e-06, "loss": 2.1211, "step": 37630 }, { "epoch": 0.78, "grad_norm": 0.439453125, "learning_rate": 3.7144028426156463e-06, "loss": 2.1043, "step": 37640 }, { "epoch": 0.78, "grad_norm": 0.421875, "learning_rate": 3.707612150567003e-06, "loss": 2.0999, "step": 37650 }, { "epoch": 0.78, "grad_norm": 0.458984375, "learning_rate": 3.7008267961444924e-06, "loss": 2.1014, "step": 37660 }, { "epoch": 0.78, "grad_norm": 0.431640625, "learning_rate": 3.6940467825553857e-06, "loss": 2.1203, "step": 37670 }, { "epoch": 0.78, "grad_norm": 0.4609375, "learning_rate": 3.6872721130044145e-06, "loss": 2.1194, "step": 37680 }, { "epoch": 0.78, "grad_norm": 0.443359375, "learning_rate": 3.6805027906938038e-06, "loss": 2.1392, "step": 37690 }, { "epoch": 0.78, "grad_norm": 0.46484375, "learning_rate": 3.6737388188232305e-06, "loss": 2.1037, "step": 37700 }, { "epoch": 0.78, "grad_norm": 0.431640625, "learning_rate": 3.6669802005898644e-06, "loss": 2.1136, "step": 37710 }, { "epoch": 0.78, "grad_norm": 0.447265625, "learning_rate": 3.6602269391883214e-06, "loss": 2.1088, "step": 37720 }, { "epoch": 0.78, "grad_norm": 0.4375, "learning_rate": 3.653479037810705e-06, "loss": 2.1291, "step": 37730 }, { "epoch": 0.78, "grad_norm": 0.453125, "learning_rate": 3.646736499646578e-06, "loss": 2.1073, "step": 37740 }, { "epoch": 0.78, "grad_norm": 0.439453125, "learning_rate": 3.639999327882962e-06, "loss": 2.1245, "step": 37750 }, { "epoch": 0.79, "grad_norm": 0.4375, "learning_rate": 3.633267525704352e-06, "loss": 2.1101, "step": 37760 }, { "epoch": 0.79, "grad_norm": 0.4296875, "learning_rate": 3.6265410962927027e-06, "loss": 2.1155, "step": 37770 }, { "epoch": 0.79, "grad_norm": 0.4296875, "learning_rate": 3.619820042827421e-06, "loss": 2.1103, "step": 37780 }, { "epoch": 0.79, "grad_norm": 0.44140625, "learning_rate": 3.6131043684853824e-06, "loss": 2.0879, "step": 37790 }, { "epoch": 0.79, "grad_norm": 0.431640625, "learning_rate": 3.6063940764409205e-06, "loss": 2.1121, "step": 37800 }, { "epoch": 0.79, "grad_norm": 0.453125, "learning_rate": 3.59968916986581e-06, "loss": 2.0914, "step": 37810 }, { "epoch": 0.79, "grad_norm": 0.431640625, "learning_rate": 3.592989651929299e-06, "loss": 2.1075, "step": 37820 }, { "epoch": 0.79, "grad_norm": 0.52734375, "learning_rate": 3.5862955257980813e-06, "loss": 2.1359, "step": 37830 }, { "epoch": 0.79, "grad_norm": 0.447265625, "learning_rate": 3.579606794636293e-06, "loss": 2.1123, "step": 37840 }, { "epoch": 0.79, "grad_norm": 0.443359375, "learning_rate": 3.572923461605534e-06, "loss": 2.1447, "step": 37850 }, { "epoch": 0.79, "grad_norm": 0.427734375, "learning_rate": 3.56624552986485e-06, "loss": 2.1121, "step": 37860 }, { "epoch": 0.79, "grad_norm": 0.451171875, "learning_rate": 3.5595730025707214e-06, "loss": 2.1075, "step": 37870 }, { "epoch": 0.79, "grad_norm": 0.44140625, "learning_rate": 3.552905882877087e-06, "loss": 2.1, "step": 37880 }, { "epoch": 0.79, "grad_norm": 0.431640625, "learning_rate": 3.546244173935327e-06, "loss": 2.1252, "step": 37890 }, { "epoch": 0.79, "grad_norm": 0.4453125, "learning_rate": 3.539587878894268e-06, "loss": 2.137, "step": 37900 }, { "epoch": 0.79, "grad_norm": 0.4375, "learning_rate": 3.5329370009001606e-06, "loss": 2.1511, "step": 37910 }, { "epoch": 0.79, "grad_norm": 0.5078125, "learning_rate": 3.5262915430967175e-06, "loss": 2.1462, "step": 37920 }, { "epoch": 0.79, "grad_norm": 0.4453125, "learning_rate": 3.51965150862507e-06, "loss": 2.1215, "step": 37930 }, { "epoch": 0.79, "grad_norm": 0.41796875, "learning_rate": 3.5130169006238016e-06, "loss": 2.12, "step": 37940 }, { "epoch": 0.79, "grad_norm": 0.43359375, "learning_rate": 3.5063877222289146e-06, "loss": 2.1103, "step": 37950 }, { "epoch": 0.79, "grad_norm": 0.48828125, "learning_rate": 3.499763976573866e-06, "loss": 2.1292, "step": 37960 }, { "epoch": 0.79, "grad_norm": 0.4296875, "learning_rate": 3.4931456667895206e-06, "loss": 2.1046, "step": 37970 }, { "epoch": 0.79, "grad_norm": 0.427734375, "learning_rate": 3.486532796004193e-06, "loss": 2.1059, "step": 37980 }, { "epoch": 0.79, "grad_norm": 0.447265625, "learning_rate": 3.4799253673436215e-06, "loss": 2.1328, "step": 37990 }, { "epoch": 0.79, "grad_norm": 0.43359375, "learning_rate": 3.4733233839309642e-06, "loss": 2.1272, "step": 38000 }, { "epoch": 0.79, "eval_accuracy": 0.5592038574960776, "eval_loss": 1.99307382106781, "eval_runtime": 16.4339, "eval_samples_per_second": 36.206, "eval_steps_per_second": 1.156, "step": 38000 }, { "epoch": 0.79, "grad_norm": 0.443359375, "learning_rate": 3.4667268488868166e-06, "loss": 2.0993, "step": 38010 }, { "epoch": 0.79, "grad_norm": 0.431640625, "learning_rate": 3.460135765329196e-06, "loss": 2.0954, "step": 38020 }, { "epoch": 0.79, "grad_norm": 0.4453125, "learning_rate": 3.453550136373532e-06, "loss": 2.0937, "step": 38030 }, { "epoch": 0.79, "grad_norm": 0.4375, "learning_rate": 3.446969965132692e-06, "loss": 2.1, "step": 38040 }, { "epoch": 0.79, "grad_norm": 0.421875, "learning_rate": 3.440395254716959e-06, "loss": 2.108, "step": 38050 }, { "epoch": 0.79, "grad_norm": 0.439453125, "learning_rate": 3.4338260082340233e-06, "loss": 2.1147, "step": 38060 }, { "epoch": 0.79, "grad_norm": 0.4375, "learning_rate": 3.427262228789007e-06, "loss": 2.1175, "step": 38070 }, { "epoch": 0.79, "grad_norm": 0.427734375, "learning_rate": 3.420703919484443e-06, "loss": 2.0821, "step": 38080 }, { "epoch": 0.79, "grad_norm": 0.42578125, "learning_rate": 3.41415108342028e-06, "loss": 2.1068, "step": 38090 }, { "epoch": 0.79, "grad_norm": 0.4765625, "learning_rate": 3.407603723693872e-06, "loss": 2.1375, "step": 38100 }, { "epoch": 0.79, "grad_norm": 0.462890625, "learning_rate": 3.4010618433999928e-06, "loss": 2.1335, "step": 38110 }, { "epoch": 0.79, "grad_norm": 0.4375, "learning_rate": 3.39452544563083e-06, "loss": 2.1256, "step": 38120 }, { "epoch": 0.79, "grad_norm": 0.435546875, "learning_rate": 3.387994533475963e-06, "loss": 2.1404, "step": 38130 }, { "epoch": 0.79, "grad_norm": 0.447265625, "learning_rate": 3.381469110022395e-06, "loss": 2.0768, "step": 38140 }, { "epoch": 0.79, "grad_norm": 0.423828125, "learning_rate": 3.374949178354531e-06, "loss": 2.1113, "step": 38150 }, { "epoch": 0.79, "grad_norm": 0.4296875, "learning_rate": 3.36843474155417e-06, "loss": 2.1124, "step": 38160 }, { "epoch": 0.79, "grad_norm": 0.51171875, "learning_rate": 3.36192580270053e-06, "loss": 2.1087, "step": 38170 }, { "epoch": 0.79, "grad_norm": 0.451171875, "learning_rate": 3.3554223648702136e-06, "loss": 2.109, "step": 38180 }, { "epoch": 0.79, "grad_norm": 0.419921875, "learning_rate": 3.34892443113724e-06, "loss": 2.1273, "step": 38190 }, { "epoch": 0.79, "grad_norm": 0.447265625, "learning_rate": 3.3424320045730076e-06, "loss": 2.1078, "step": 38200 }, { "epoch": 0.79, "grad_norm": 0.42578125, "learning_rate": 3.3359450882463347e-06, "loss": 2.1389, "step": 38210 }, { "epoch": 0.79, "grad_norm": 0.427734375, "learning_rate": 3.329463685223411e-06, "loss": 2.1324, "step": 38220 }, { "epoch": 0.79, "grad_norm": 0.419921875, "learning_rate": 3.322987798567836e-06, "loss": 2.1135, "step": 38230 }, { "epoch": 0.8, "grad_norm": 0.486328125, "learning_rate": 3.3165174313406023e-06, "loss": 2.1338, "step": 38240 }, { "epoch": 0.8, "grad_norm": 0.447265625, "learning_rate": 3.3100525866000814e-06, "loss": 2.1119, "step": 38250 }, { "epoch": 0.8, "grad_norm": 0.4375, "learning_rate": 3.3035932674020457e-06, "loss": 2.1136, "step": 38260 }, { "epoch": 0.8, "grad_norm": 0.435546875, "learning_rate": 3.2971394767996514e-06, "loss": 2.1327, "step": 38270 }, { "epoch": 0.8, "grad_norm": 0.443359375, "learning_rate": 3.290691217843447e-06, "loss": 2.1511, "step": 38280 }, { "epoch": 0.8, "grad_norm": 0.443359375, "learning_rate": 3.2842484935813533e-06, "loss": 2.1398, "step": 38290 }, { "epoch": 0.8, "grad_norm": 0.4296875, "learning_rate": 3.2778113070586852e-06, "loss": 2.138, "step": 38300 }, { "epoch": 0.8, "grad_norm": 0.44140625, "learning_rate": 3.271379661318145e-06, "loss": 2.1258, "step": 38310 }, { "epoch": 0.8, "grad_norm": 0.431640625, "learning_rate": 3.264953559399798e-06, "loss": 2.0918, "step": 38320 }, { "epoch": 0.8, "grad_norm": 0.4375, "learning_rate": 3.258533004341106e-06, "loss": 2.1346, "step": 38330 }, { "epoch": 0.8, "grad_norm": 0.427734375, "learning_rate": 3.252117999176906e-06, "loss": 2.1038, "step": 38340 }, { "epoch": 0.8, "grad_norm": 0.443359375, "learning_rate": 3.2457085469394015e-06, "loss": 2.1346, "step": 38350 }, { "epoch": 0.8, "grad_norm": 0.44921875, "learning_rate": 3.239304650658182e-06, "loss": 2.1366, "step": 38360 }, { "epoch": 0.8, "grad_norm": 0.4375, "learning_rate": 3.232906313360211e-06, "loss": 2.1097, "step": 38370 }, { "epoch": 0.8, "grad_norm": 0.455078125, "learning_rate": 3.226513538069813e-06, "loss": 2.1326, "step": 38380 }, { "epoch": 0.8, "grad_norm": 0.427734375, "learning_rate": 3.2201263278086956e-06, "loss": 2.0995, "step": 38390 }, { "epoch": 0.8, "grad_norm": 0.466796875, "learning_rate": 3.2137446855959356e-06, "loss": 2.1398, "step": 38400 }, { "epoch": 0.8, "grad_norm": 0.439453125, "learning_rate": 3.207368614447965e-06, "loss": 2.0944, "step": 38410 }, { "epoch": 0.8, "grad_norm": 0.4375, "learning_rate": 3.200998117378597e-06, "loss": 2.1035, "step": 38420 }, { "epoch": 0.8, "grad_norm": 0.439453125, "learning_rate": 3.1946331973990066e-06, "loss": 2.0984, "step": 38430 }, { "epoch": 0.8, "grad_norm": 0.4375, "learning_rate": 3.1882738575177234e-06, "loss": 2.0896, "step": 38440 }, { "epoch": 0.8, "grad_norm": 0.453125, "learning_rate": 3.181920100740654e-06, "loss": 2.0856, "step": 38450 }, { "epoch": 0.8, "grad_norm": 0.427734375, "learning_rate": 3.175571930071051e-06, "loss": 2.1063, "step": 38460 }, { "epoch": 0.8, "grad_norm": 0.427734375, "learning_rate": 3.169229348509543e-06, "loss": 2.1152, "step": 38470 }, { "epoch": 0.8, "grad_norm": 0.431640625, "learning_rate": 3.162892359054098e-06, "loss": 2.0908, "step": 38480 }, { "epoch": 0.8, "grad_norm": 0.466796875, "learning_rate": 3.156560964700055e-06, "loss": 2.1044, "step": 38490 }, { "epoch": 0.8, "grad_norm": 0.439453125, "learning_rate": 3.1502351684401094e-06, "loss": 2.117, "step": 38500 }, { "epoch": 0.8, "eval_accuracy": 0.559105284342476, "eval_loss": 1.9930951595306396, "eval_runtime": 16.4469, "eval_samples_per_second": 36.177, "eval_steps_per_second": 1.155, "step": 38500 }, { "epoch": 0.8, "grad_norm": 0.43359375, "learning_rate": 3.1439149732642964e-06, "loss": 2.0981, "step": 38510 }, { "epoch": 0.8, "grad_norm": 0.4296875, "learning_rate": 3.1376003821600153e-06, "loss": 2.1308, "step": 38520 }, { "epoch": 0.8, "grad_norm": 0.4375, "learning_rate": 3.13129139811202e-06, "loss": 2.1007, "step": 38530 }, { "epoch": 0.8, "grad_norm": 0.427734375, "learning_rate": 3.124988024102397e-06, "loss": 2.1312, "step": 38540 }, { "epoch": 0.8, "grad_norm": 0.474609375, "learning_rate": 3.118690263110596e-06, "loss": 2.1244, "step": 38550 }, { "epoch": 0.8, "grad_norm": 0.419921875, "learning_rate": 3.1123981181134167e-06, "loss": 2.1276, "step": 38560 }, { "epoch": 0.8, "grad_norm": 0.447265625, "learning_rate": 3.1061115920849853e-06, "loss": 2.1307, "step": 38570 }, { "epoch": 0.8, "grad_norm": 0.45703125, "learning_rate": 3.099830687996789e-06, "loss": 2.1206, "step": 38580 }, { "epoch": 0.8, "grad_norm": 0.419921875, "learning_rate": 3.0935554088176565e-06, "loss": 2.1353, "step": 38590 }, { "epoch": 0.8, "grad_norm": 0.44140625, "learning_rate": 3.087285757513745e-06, "loss": 2.1118, "step": 38600 }, { "epoch": 0.8, "grad_norm": 0.431640625, "learning_rate": 3.081021737048565e-06, "loss": 2.1263, "step": 38610 }, { "epoch": 0.8, "grad_norm": 0.439453125, "learning_rate": 3.0747633503829635e-06, "loss": 2.1086, "step": 38620 }, { "epoch": 0.8, "grad_norm": 0.435546875, "learning_rate": 3.0685106004751146e-06, "loss": 2.1548, "step": 38630 }, { "epoch": 0.8, "grad_norm": 0.466796875, "learning_rate": 3.0622634902805367e-06, "loss": 2.0745, "step": 38640 }, { "epoch": 0.8, "grad_norm": 0.427734375, "learning_rate": 3.0560220227520836e-06, "loss": 2.1024, "step": 38650 }, { "epoch": 0.8, "grad_norm": 0.50390625, "learning_rate": 3.0497862008399428e-06, "loss": 2.0821, "step": 38660 }, { "epoch": 0.8, "grad_norm": 0.4921875, "learning_rate": 3.0435560274916207e-06, "loss": 2.1247, "step": 38670 }, { "epoch": 0.8, "grad_norm": 0.419921875, "learning_rate": 3.0373315056519697e-06, "loss": 2.1326, "step": 38680 }, { "epoch": 0.8, "grad_norm": 0.42578125, "learning_rate": 3.0311126382631574e-06, "loss": 2.1253, "step": 38690 }, { "epoch": 0.8, "grad_norm": 0.44140625, "learning_rate": 3.0248994282646936e-06, "loss": 2.1393, "step": 38700 }, { "epoch": 0.8, "grad_norm": 0.44921875, "learning_rate": 3.0186918785933965e-06, "loss": 2.1067, "step": 38710 }, { "epoch": 0.8, "grad_norm": 0.421875, "learning_rate": 3.012489992183426e-06, "loss": 2.1137, "step": 38720 }, { "epoch": 0.81, "grad_norm": 0.447265625, "learning_rate": 3.0062937719662485e-06, "loss": 2.1006, "step": 38730 }, { "epoch": 0.81, "grad_norm": 0.43359375, "learning_rate": 3.0001032208706653e-06, "loss": 2.1159, "step": 38740 }, { "epoch": 0.81, "grad_norm": 0.4375, "learning_rate": 2.9939183418227966e-06, "loss": 2.1229, "step": 38750 }, { "epoch": 0.81, "grad_norm": 0.44140625, "learning_rate": 2.987739137746071e-06, "loss": 2.124, "step": 38760 }, { "epoch": 0.81, "grad_norm": 0.421875, "learning_rate": 2.9815656115612473e-06, "loss": 2.1329, "step": 38770 }, { "epoch": 0.81, "grad_norm": 0.4296875, "learning_rate": 2.9753977661863956e-06, "loss": 2.0899, "step": 38780 }, { "epoch": 0.81, "grad_norm": 0.4453125, "learning_rate": 2.9692356045368964e-06, "loss": 2.0893, "step": 38790 }, { "epoch": 0.81, "grad_norm": 0.43359375, "learning_rate": 2.963079129525447e-06, "loss": 2.0987, "step": 38800 }, { "epoch": 0.81, "grad_norm": 0.42578125, "learning_rate": 2.956928344062066e-06, "loss": 2.1152, "step": 38810 }, { "epoch": 0.81, "grad_norm": 0.423828125, "learning_rate": 2.9507832510540633e-06, "loss": 2.0963, "step": 38820 }, { "epoch": 0.81, "grad_norm": 0.45703125, "learning_rate": 2.944643853406076e-06, "loss": 2.1232, "step": 38830 }, { "epoch": 0.81, "grad_norm": 0.453125, "learning_rate": 2.9385101540200375e-06, "loss": 2.085, "step": 38840 }, { "epoch": 0.81, "grad_norm": 0.44921875, "learning_rate": 2.9323821557952007e-06, "loss": 2.1208, "step": 38850 }, { "epoch": 0.81, "grad_norm": 0.427734375, "learning_rate": 2.926259861628105e-06, "loss": 2.0946, "step": 38860 }, { "epoch": 0.81, "grad_norm": 0.443359375, "learning_rate": 2.9201432744126074e-06, "loss": 2.1473, "step": 38870 }, { "epoch": 0.81, "grad_norm": 0.4765625, "learning_rate": 2.9140323970398695e-06, "loss": 2.1226, "step": 38880 }, { "epoch": 0.81, "grad_norm": 0.427734375, "learning_rate": 2.907927232398342e-06, "loss": 2.1028, "step": 38890 }, { "epoch": 0.81, "grad_norm": 0.44140625, "learning_rate": 2.901827783373782e-06, "loss": 2.1287, "step": 38900 }, { "epoch": 0.81, "grad_norm": 0.431640625, "learning_rate": 2.895734052849251e-06, "loss": 2.1321, "step": 38910 }, { "epoch": 0.81, "grad_norm": 0.43359375, "learning_rate": 2.889646043705094e-06, "loss": 2.1108, "step": 38920 }, { "epoch": 0.81, "grad_norm": 0.439453125, "learning_rate": 2.883563758818965e-06, "loss": 2.1251, "step": 38930 }, { "epoch": 0.81, "grad_norm": 0.7734375, "learning_rate": 2.8774872010658005e-06, "loss": 2.1175, "step": 38940 }, { "epoch": 0.81, "grad_norm": 0.43359375, "learning_rate": 2.8714163733178412e-06, "loss": 2.122, "step": 38950 }, { "epoch": 0.81, "grad_norm": 0.44921875, "learning_rate": 2.865351278444607e-06, "loss": 2.1043, "step": 38960 }, { "epoch": 0.81, "grad_norm": 0.427734375, "learning_rate": 2.859291919312924e-06, "loss": 2.1147, "step": 38970 }, { "epoch": 0.81, "grad_norm": 0.447265625, "learning_rate": 2.853238298786887e-06, "loss": 2.1021, "step": 38980 }, { "epoch": 0.81, "grad_norm": 0.4296875, "learning_rate": 2.847190419727897e-06, "loss": 2.1217, "step": 38990 }, { "epoch": 0.81, "grad_norm": 0.443359375, "learning_rate": 2.8411482849946352e-06, "loss": 2.1325, "step": 39000 }, { "epoch": 0.81, "eval_accuracy": 0.5590789981681822, "eval_loss": 1.993077278137207, "eval_runtime": 16.4287, "eval_samples_per_second": 36.217, "eval_steps_per_second": 1.157, "step": 39000 }, { "epoch": 0.81, "grad_norm": 0.423828125, "learning_rate": 2.835111897443057e-06, "loss": 2.1213, "step": 39010 }, { "epoch": 0.81, "grad_norm": 0.4296875, "learning_rate": 2.8290812599264178e-06, "loss": 2.1199, "step": 39020 }, { "epoch": 0.81, "grad_norm": 0.439453125, "learning_rate": 2.8230563752952426e-06, "loss": 2.1274, "step": 39030 }, { "epoch": 0.81, "grad_norm": 0.490234375, "learning_rate": 2.817037246397351e-06, "loss": 2.1116, "step": 39040 }, { "epoch": 0.81, "grad_norm": 0.451171875, "learning_rate": 2.81102387607782e-06, "loss": 2.1164, "step": 39050 }, { "epoch": 0.81, "grad_norm": 0.423828125, "learning_rate": 2.8050162671790264e-06, "loss": 2.0473, "step": 39060 }, { "epoch": 0.81, "grad_norm": 0.427734375, "learning_rate": 2.799014422540615e-06, "loss": 2.0956, "step": 39070 }, { "epoch": 0.81, "grad_norm": 0.435546875, "learning_rate": 2.7930183449995e-06, "loss": 2.1364, "step": 39080 }, { "epoch": 0.81, "grad_norm": 0.4296875, "learning_rate": 2.7870280373898792e-06, "loss": 2.1309, "step": 39090 }, { "epoch": 0.81, "grad_norm": 0.4375, "learning_rate": 2.7810435025432244e-06, "loss": 2.1126, "step": 39100 }, { "epoch": 0.81, "grad_norm": 0.43359375, "learning_rate": 2.775064743288265e-06, "loss": 2.119, "step": 39110 }, { "epoch": 0.81, "grad_norm": 0.439453125, "learning_rate": 2.7690917624510127e-06, "loss": 2.1179, "step": 39120 }, { "epoch": 0.81, "grad_norm": 0.458984375, "learning_rate": 2.7631245628547496e-06, "loss": 2.1025, "step": 39130 }, { "epoch": 0.81, "grad_norm": 0.462890625, "learning_rate": 2.7571631473200115e-06, "loss": 2.1248, "step": 39140 }, { "epoch": 0.81, "grad_norm": 0.4296875, "learning_rate": 2.7512075186646125e-06, "loss": 2.0777, "step": 39150 }, { "epoch": 0.81, "grad_norm": 0.427734375, "learning_rate": 2.745257679703632e-06, "loss": 2.0887, "step": 39160 }, { "epoch": 0.81, "grad_norm": 0.48828125, "learning_rate": 2.739313633249401e-06, "loss": 2.0912, "step": 39170 }, { "epoch": 0.81, "grad_norm": 0.4765625, "learning_rate": 2.7333753821115252e-06, "loss": 2.0972, "step": 39180 }, { "epoch": 0.81, "grad_norm": 0.439453125, "learning_rate": 2.7274429290968626e-06, "loss": 2.1552, "step": 39190 }, { "epoch": 0.81, "grad_norm": 0.451171875, "learning_rate": 2.721516277009537e-06, "loss": 2.1281, "step": 39200 }, { "epoch": 0.82, "grad_norm": 0.431640625, "learning_rate": 2.715595428650922e-06, "loss": 2.1185, "step": 39210 }, { "epoch": 0.82, "grad_norm": 0.423828125, "learning_rate": 2.7096803868196546e-06, "loss": 2.1236, "step": 39220 }, { "epoch": 0.82, "grad_norm": 0.455078125, "learning_rate": 2.703771154311631e-06, "loss": 2.0983, "step": 39230 }, { "epoch": 0.82, "grad_norm": 0.44140625, "learning_rate": 2.6978677339199877e-06, "loss": 2.1234, "step": 39240 }, { "epoch": 0.82, "grad_norm": 0.421875, "learning_rate": 2.691970128435125e-06, "loss": 2.1048, "step": 39250 }, { "epoch": 0.82, "grad_norm": 0.43359375, "learning_rate": 2.6860783406446986e-06, "loss": 2.1394, "step": 39260 }, { "epoch": 0.82, "grad_norm": 0.44140625, "learning_rate": 2.6801923733335975e-06, "loss": 2.1226, "step": 39270 }, { "epoch": 0.82, "grad_norm": 0.455078125, "learning_rate": 2.6743122292839726e-06, "loss": 2.1134, "step": 39280 }, { "epoch": 0.82, "grad_norm": 0.43359375, "learning_rate": 2.668437911275226e-06, "loss": 2.0929, "step": 39290 }, { "epoch": 0.82, "grad_norm": 0.4375, "learning_rate": 2.6625694220839896e-06, "loss": 2.1166, "step": 39300 }, { "epoch": 0.82, "grad_norm": 0.416015625, "learning_rate": 2.656706764484153e-06, "loss": 2.1016, "step": 39310 }, { "epoch": 0.82, "grad_norm": 0.49609375, "learning_rate": 2.650849941246853e-06, "loss": 2.137, "step": 39320 }, { "epoch": 0.82, "grad_norm": 0.427734375, "learning_rate": 2.6449989551404514e-06, "loss": 2.1449, "step": 39330 }, { "epoch": 0.82, "grad_norm": 0.470703125, "learning_rate": 2.6391538089305663e-06, "loss": 2.1063, "step": 39340 }, { "epoch": 0.82, "grad_norm": 0.431640625, "learning_rate": 2.633314505380052e-06, "loss": 2.115, "step": 39350 }, { "epoch": 0.82, "grad_norm": 0.42578125, "learning_rate": 2.627481047248997e-06, "loss": 2.1144, "step": 39360 }, { "epoch": 0.82, "grad_norm": 0.44921875, "learning_rate": 2.6216534372947274e-06, "loss": 2.0999, "step": 39370 }, { "epoch": 0.82, "grad_norm": 0.46875, "learning_rate": 2.615831678271816e-06, "loss": 2.1344, "step": 39380 }, { "epoch": 0.82, "grad_norm": 0.439453125, "learning_rate": 2.6100157729320497e-06, "loss": 2.1486, "step": 39390 }, { "epoch": 0.82, "grad_norm": 0.4453125, "learning_rate": 2.6042057240244653e-06, "loss": 2.1418, "step": 39400 }, { "epoch": 0.82, "grad_norm": 0.439453125, "learning_rate": 2.598401534295327e-06, "loss": 2.1089, "step": 39410 }, { "epoch": 0.82, "grad_norm": 0.451171875, "learning_rate": 2.5926032064881298e-06, "loss": 2.1405, "step": 39420 }, { "epoch": 0.82, "grad_norm": 0.4765625, "learning_rate": 2.58681074334359e-06, "loss": 2.104, "step": 39430 }, { "epoch": 0.82, "grad_norm": 0.44921875, "learning_rate": 2.581024147599667e-06, "loss": 2.1205, "step": 39440 }, { "epoch": 0.82, "grad_norm": 0.4296875, "learning_rate": 2.5752434219915298e-06, "loss": 2.1532, "step": 39450 }, { "epoch": 0.82, "grad_norm": 0.51171875, "learning_rate": 2.5694685692515895e-06, "loss": 2.1023, "step": 39460 }, { "epoch": 0.82, "grad_norm": 0.439453125, "learning_rate": 2.563699592109464e-06, "loss": 2.1162, "step": 39470 }, { "epoch": 0.82, "grad_norm": 0.4296875, "learning_rate": 2.5579364932920106e-06, "loss": 2.1218, "step": 39480 }, { "epoch": 0.82, "grad_norm": 0.4375, "learning_rate": 2.5521792755232955e-06, "loss": 2.121, "step": 39490 }, { "epoch": 0.82, "grad_norm": 0.42578125, "learning_rate": 2.5464279415246093e-06, "loss": 2.1046, "step": 39500 }, { "epoch": 0.82, "eval_accuracy": 0.55914635648981, "eval_loss": 1.9930146932601929, "eval_runtime": 16.4521, "eval_samples_per_second": 36.166, "eval_steps_per_second": 1.155, "step": 39500 }, { "epoch": 0.82, "grad_norm": 0.443359375, "learning_rate": 2.54068249401447e-06, "loss": 2.1028, "step": 39510 }, { "epoch": 0.82, "grad_norm": 0.423828125, "learning_rate": 2.5349429357085952e-06, "loss": 2.0948, "step": 39520 }, { "epoch": 0.82, "grad_norm": 0.431640625, "learning_rate": 2.5292092693199344e-06, "loss": 2.0923, "step": 39530 }, { "epoch": 0.82, "grad_norm": 0.49609375, "learning_rate": 2.523481497558648e-06, "loss": 2.1247, "step": 39540 }, { "epoch": 0.82, "grad_norm": 0.43359375, "learning_rate": 2.5177596231321037e-06, "loss": 2.1331, "step": 39550 }, { "epoch": 0.82, "grad_norm": 0.4140625, "learning_rate": 2.51204364874489e-06, "loss": 2.1433, "step": 39560 }, { "epoch": 0.82, "grad_norm": 0.431640625, "learning_rate": 2.5063335770988053e-06, "loss": 2.1148, "step": 39570 }, { "epoch": 0.82, "grad_norm": 0.419921875, "learning_rate": 2.5006294108928528e-06, "loss": 2.1084, "step": 39580 }, { "epoch": 0.82, "grad_norm": 0.486328125, "learning_rate": 2.494931152823247e-06, "loss": 2.0711, "step": 39590 }, { "epoch": 0.82, "grad_norm": 0.453125, "learning_rate": 2.4892388055834137e-06, "loss": 2.1357, "step": 39600 }, { "epoch": 0.82, "grad_norm": 0.439453125, "learning_rate": 2.483552371863982e-06, "loss": 2.1171, "step": 39610 }, { "epoch": 0.82, "grad_norm": 0.431640625, "learning_rate": 2.47787185435278e-06, "loss": 2.109, "step": 39620 }, { "epoch": 0.82, "grad_norm": 0.443359375, "learning_rate": 2.472197255734847e-06, "loss": 2.1119, "step": 39630 }, { "epoch": 0.82, "grad_norm": 0.427734375, "learning_rate": 2.4665285786924292e-06, "loss": 2.1521, "step": 39640 }, { "epoch": 0.82, "grad_norm": 0.423828125, "learning_rate": 2.4608658259049544e-06, "loss": 2.1119, "step": 39650 }, { "epoch": 0.82, "grad_norm": 0.45703125, "learning_rate": 2.455209000049072e-06, "loss": 2.1421, "step": 39660 }, { "epoch": 0.82, "grad_norm": 0.44921875, "learning_rate": 2.449558103798619e-06, "loss": 2.162, "step": 39670 }, { "epoch": 0.82, "grad_norm": 0.4609375, "learning_rate": 2.443913139824625e-06, "loss": 2.1142, "step": 39680 }, { "epoch": 0.83, "grad_norm": 0.451171875, "learning_rate": 2.4382741107953317e-06, "loss": 2.1177, "step": 39690 }, { "epoch": 0.83, "grad_norm": 0.431640625, "learning_rate": 2.432641019376157e-06, "loss": 2.1082, "step": 39700 }, { "epoch": 0.83, "grad_norm": 0.48828125, "learning_rate": 2.4270138682297273e-06, "loss": 2.0948, "step": 39710 }, { "epoch": 0.83, "grad_norm": 0.423828125, "learning_rate": 2.421392660015847e-06, "loss": 2.1198, "step": 39720 }, { "epoch": 0.83, "grad_norm": 0.46875, "learning_rate": 2.415777397391526e-06, "loss": 2.1266, "step": 39730 }, { "epoch": 0.83, "grad_norm": 0.4609375, "learning_rate": 2.4101680830109523e-06, "loss": 2.1253, "step": 39740 }, { "epoch": 0.83, "grad_norm": 0.435546875, "learning_rate": 2.404564719525507e-06, "loss": 2.0963, "step": 39750 }, { "epoch": 0.83, "grad_norm": 0.44921875, "learning_rate": 2.398967309583765e-06, "loss": 2.1115, "step": 39760 }, { "epoch": 0.83, "grad_norm": 0.451171875, "learning_rate": 2.393375855831473e-06, "loss": 2.1266, "step": 39770 }, { "epoch": 0.83, "grad_norm": 0.43359375, "learning_rate": 2.3877903609115697e-06, "loss": 2.1145, "step": 39780 }, { "epoch": 0.83, "grad_norm": 0.466796875, "learning_rate": 2.3822108274641823e-06, "loss": 2.1107, "step": 39790 }, { "epoch": 0.83, "grad_norm": 0.4296875, "learning_rate": 2.3766372581266146e-06, "loss": 2.1349, "step": 39800 }, { "epoch": 0.83, "grad_norm": 0.41796875, "learning_rate": 2.3710696555333468e-06, "loss": 2.1091, "step": 39810 }, { "epoch": 0.83, "grad_norm": 0.423828125, "learning_rate": 2.3655080223160476e-06, "loss": 2.0956, "step": 39820 }, { "epoch": 0.83, "grad_norm": 0.427734375, "learning_rate": 2.359952361103564e-06, "loss": 2.1263, "step": 39830 }, { "epoch": 0.83, "grad_norm": 0.435546875, "learning_rate": 2.354402674521907e-06, "loss": 2.11, "step": 39840 }, { "epoch": 0.83, "grad_norm": 0.431640625, "learning_rate": 2.3488589651942803e-06, "loss": 2.1226, "step": 39850 }, { "epoch": 0.83, "grad_norm": 0.45703125, "learning_rate": 2.343321235741056e-06, "loss": 2.1063, "step": 39860 }, { "epoch": 0.83, "grad_norm": 0.5234375, "learning_rate": 2.337789488779772e-06, "loss": 2.1354, "step": 39870 }, { "epoch": 0.83, "grad_norm": 0.4296875, "learning_rate": 2.3322637269251497e-06, "loss": 2.1069, "step": 39880 }, { "epoch": 0.83, "grad_norm": 0.447265625, "learning_rate": 2.32674395278908e-06, "loss": 2.1531, "step": 39890 }, { "epoch": 0.83, "grad_norm": 0.427734375, "learning_rate": 2.3212301689806127e-06, "loss": 2.1052, "step": 39900 }, { "epoch": 0.83, "grad_norm": 0.4765625, "learning_rate": 2.3157223781059776e-06, "loss": 2.1108, "step": 39910 }, { "epoch": 0.83, "grad_norm": 0.423828125, "learning_rate": 2.3102205827685747e-06, "loss": 2.1199, "step": 39920 }, { "epoch": 0.83, "grad_norm": 0.4453125, "learning_rate": 2.304724785568953e-06, "loss": 2.1061, "step": 39930 }, { "epoch": 0.83, "grad_norm": 0.4375, "learning_rate": 2.299234989104843e-06, "loss": 2.1323, "step": 39940 }, { "epoch": 0.83, "grad_norm": 0.45703125, "learning_rate": 2.2937511959711293e-06, "loss": 2.1359, "step": 39950 }, { "epoch": 0.83, "grad_norm": 0.4375, "learning_rate": 2.2882734087598685e-06, "loss": 2.0921, "step": 39960 }, { "epoch": 0.83, "grad_norm": 0.44921875, "learning_rate": 2.2828016300602623e-06, "loss": 2.1156, "step": 39970 }, { "epoch": 0.83, "grad_norm": 0.455078125, "learning_rate": 2.2773358624586887e-06, "loss": 2.1434, "step": 39980 }, { "epoch": 0.83, "grad_norm": 0.44921875, "learning_rate": 2.271876108538678e-06, "loss": 2.1101, "step": 39990 }, { "epoch": 0.83, "grad_norm": 0.427734375, "learning_rate": 2.266422370880914e-06, "loss": 2.1096, "step": 40000 }, { "epoch": 0.83, "eval_accuracy": 0.5590970699130092, "eval_loss": 1.9930177927017212, "eval_runtime": 16.4304, "eval_samples_per_second": 36.213, "eval_steps_per_second": 1.156, "step": 40000 }, { "epoch": 0.83, "grad_norm": 0.4296875, "learning_rate": 2.260974652063243e-06, "loss": 2.1459, "step": 40010 }, { "epoch": 0.83, "grad_norm": 0.453125, "learning_rate": 2.255532954660667e-06, "loss": 2.116, "step": 40020 }, { "epoch": 0.83, "grad_norm": 0.43359375, "learning_rate": 2.250097281245333e-06, "loss": 2.0934, "step": 40030 }, { "epoch": 0.83, "grad_norm": 0.421875, "learning_rate": 2.2446676343865464e-06, "loss": 2.1055, "step": 40040 }, { "epoch": 0.83, "grad_norm": 0.4375, "learning_rate": 2.239244016650772e-06, "loss": 2.1163, "step": 40050 }, { "epoch": 0.83, "grad_norm": 0.435546875, "learning_rate": 2.2338264306016065e-06, "loss": 2.1134, "step": 40060 }, { "epoch": 0.83, "grad_norm": 0.453125, "learning_rate": 2.228414878799811e-06, "loss": 2.1396, "step": 40070 }, { "epoch": 0.83, "grad_norm": 0.455078125, "learning_rate": 2.2230093638032912e-06, "loss": 2.1483, "step": 40080 }, { "epoch": 0.83, "grad_norm": 0.451171875, "learning_rate": 2.2176098881670913e-06, "loss": 2.0784, "step": 40090 }, { "epoch": 0.83, "grad_norm": 0.46875, "learning_rate": 2.2122164544434103e-06, "loss": 2.1203, "step": 40100 }, { "epoch": 0.83, "grad_norm": 0.43359375, "learning_rate": 2.206829065181592e-06, "loss": 2.1252, "step": 40110 }, { "epoch": 0.83, "grad_norm": 0.44140625, "learning_rate": 2.201447722928111e-06, "loss": 2.1072, "step": 40120 }, { "epoch": 0.83, "grad_norm": 0.43359375, "learning_rate": 2.1960724302265946e-06, "loss": 2.1589, "step": 40130 }, { "epoch": 0.83, "grad_norm": 0.41796875, "learning_rate": 2.190703189617813e-06, "loss": 2.1173, "step": 40140 }, { "epoch": 0.83, "grad_norm": 0.41796875, "learning_rate": 2.1853400036396636e-06, "loss": 2.1002, "step": 40150 }, { "epoch": 0.83, "grad_norm": 0.4453125, "learning_rate": 2.1799828748271897e-06, "loss": 2.1007, "step": 40160 }, { "epoch": 0.84, "grad_norm": 0.42578125, "learning_rate": 2.174631805712576e-06, "loss": 2.1291, "step": 40170 }, { "epoch": 0.84, "grad_norm": 0.431640625, "learning_rate": 2.1692867988251315e-06, "loss": 2.1131, "step": 40180 }, { "epoch": 0.84, "grad_norm": 0.435546875, "learning_rate": 2.1639478566913107e-06, "loss": 2.0844, "step": 40190 }, { "epoch": 0.84, "grad_norm": 0.435546875, "learning_rate": 2.158614981834691e-06, "loss": 2.1219, "step": 40200 }, { "epoch": 0.84, "grad_norm": 0.46484375, "learning_rate": 2.1532881767759915e-06, "loss": 2.1261, "step": 40210 }, { "epoch": 0.84, "grad_norm": 0.42578125, "learning_rate": 2.147967444033056e-06, "loss": 2.105, "step": 40220 }, { "epoch": 0.84, "grad_norm": 0.44140625, "learning_rate": 2.1426527861208605e-06, "loss": 2.1067, "step": 40230 }, { "epoch": 0.84, "grad_norm": 0.443359375, "learning_rate": 2.137344205551514e-06, "loss": 2.1079, "step": 40240 }, { "epoch": 0.84, "grad_norm": 0.462890625, "learning_rate": 2.1320417048342395e-06, "loss": 2.1024, "step": 40250 }, { "epoch": 0.84, "grad_norm": 0.47265625, "learning_rate": 2.1267452864754015e-06, "loss": 2.1224, "step": 40260 }, { "epoch": 0.84, "grad_norm": 0.515625, "learning_rate": 2.121454952978485e-06, "loss": 2.1138, "step": 40270 }, { "epoch": 0.84, "grad_norm": 0.447265625, "learning_rate": 2.1161707068440887e-06, "loss": 2.1279, "step": 40280 }, { "epoch": 0.84, "grad_norm": 0.44921875, "learning_rate": 2.110892550569947e-06, "loss": 2.1078, "step": 40290 }, { "epoch": 0.84, "grad_norm": 0.44921875, "learning_rate": 2.1056204866509145e-06, "loss": 2.0859, "step": 40300 }, { "epoch": 0.84, "grad_norm": 0.431640625, "learning_rate": 2.100354517578955e-06, "loss": 2.11, "step": 40310 }, { "epoch": 0.84, "grad_norm": 0.455078125, "learning_rate": 2.095094645843162e-06, "loss": 2.1157, "step": 40320 }, { "epoch": 0.84, "grad_norm": 0.43359375, "learning_rate": 2.0898408739297476e-06, "loss": 2.1055, "step": 40330 }, { "epoch": 0.84, "grad_norm": 0.42578125, "learning_rate": 2.0845932043220307e-06, "loss": 2.0706, "step": 40340 }, { "epoch": 0.84, "grad_norm": 0.423828125, "learning_rate": 2.0793516395004554e-06, "loss": 2.1144, "step": 40350 }, { "epoch": 0.84, "grad_norm": 0.439453125, "learning_rate": 2.0741161819425748e-06, "loss": 2.1332, "step": 40360 }, { "epoch": 0.84, "grad_norm": 0.427734375, "learning_rate": 2.0688868341230626e-06, "loss": 2.1171, "step": 40370 }, { "epoch": 0.84, "grad_norm": 0.42578125, "learning_rate": 2.063663598513694e-06, "loss": 2.0994, "step": 40380 }, { "epoch": 0.84, "grad_norm": 0.462890625, "learning_rate": 2.0584464775833596e-06, "loss": 2.0858, "step": 40390 }, { "epoch": 0.84, "grad_norm": 0.43359375, "learning_rate": 2.053235473798066e-06, "loss": 2.0817, "step": 40400 }, { "epoch": 0.84, "grad_norm": 0.44140625, "learning_rate": 2.048030589620917e-06, "loss": 2.1484, "step": 40410 }, { "epoch": 0.84, "grad_norm": 0.43359375, "learning_rate": 2.0428318275121327e-06, "loss": 2.1246, "step": 40420 }, { "epoch": 0.84, "grad_norm": 0.53125, "learning_rate": 2.0376391899290397e-06, "loss": 2.1304, "step": 40430 }, { "epoch": 0.84, "grad_norm": 0.451171875, "learning_rate": 2.0324526793260586e-06, "loss": 2.1256, "step": 40440 }, { "epoch": 0.84, "grad_norm": 0.42578125, "learning_rate": 2.0272722981547294e-06, "loss": 2.1141, "step": 40450 }, { "epoch": 0.84, "grad_norm": 0.427734375, "learning_rate": 2.0220980488636803e-06, "loss": 2.1487, "step": 40460 }, { "epoch": 0.84, "grad_norm": 0.41015625, "learning_rate": 2.016929933898657e-06, "loss": 2.1142, "step": 40470 }, { "epoch": 0.84, "grad_norm": 0.68359375, "learning_rate": 2.0117679557024866e-06, "loss": 2.1062, "step": 40480 }, { "epoch": 0.84, "grad_norm": 0.4609375, "learning_rate": 2.0066121167151125e-06, "loss": 2.1048, "step": 40490 }, { "epoch": 0.84, "grad_norm": 0.5078125, "learning_rate": 2.001462419373565e-06, "loss": 2.1149, "step": 40500 }, { "epoch": 0.84, "eval_accuracy": 0.559087212597649, "eval_loss": 1.9930588006973267, "eval_runtime": 16.4374, "eval_samples_per_second": 36.198, "eval_steps_per_second": 1.156, "step": 40500 }, { "epoch": 0.84, "grad_norm": 0.431640625, "learning_rate": 1.9963188661119798e-06, "loss": 2.1208, "step": 40510 }, { "epoch": 0.84, "grad_norm": 0.46484375, "learning_rate": 1.991181459361584e-06, "loss": 2.1136, "step": 40520 }, { "epoch": 0.84, "grad_norm": 0.44140625, "learning_rate": 1.986050201550696e-06, "loss": 2.1268, "step": 40530 }, { "epoch": 0.84, "grad_norm": 0.4375, "learning_rate": 1.9809250951047335e-06, "loss": 2.105, "step": 40540 }, { "epoch": 0.84, "grad_norm": 0.44140625, "learning_rate": 1.9758061424462058e-06, "loss": 2.1074, "step": 40550 }, { "epoch": 0.84, "grad_norm": 0.423828125, "learning_rate": 1.9706933459947156e-06, "loss": 2.1022, "step": 40560 }, { "epoch": 0.84, "grad_norm": 0.4453125, "learning_rate": 1.9655867081669463e-06, "loss": 2.1218, "step": 40570 }, { "epoch": 0.84, "grad_norm": 0.439453125, "learning_rate": 1.9604862313766775e-06, "loss": 2.0989, "step": 40580 }, { "epoch": 0.84, "grad_norm": 0.427734375, "learning_rate": 1.9553919180347818e-06, "loss": 2.138, "step": 40590 }, { "epoch": 0.84, "grad_norm": 0.453125, "learning_rate": 1.950303770549203e-06, "loss": 2.1152, "step": 40600 }, { "epoch": 0.84, "grad_norm": 0.447265625, "learning_rate": 1.945221791324985e-06, "loss": 2.0858, "step": 40610 }, { "epoch": 0.84, "grad_norm": 0.439453125, "learning_rate": 1.9401459827642516e-06, "loss": 2.1118, "step": 40620 }, { "epoch": 0.84, "grad_norm": 0.44921875, "learning_rate": 1.9350763472662057e-06, "loss": 2.1138, "step": 40630 }, { "epoch": 0.84, "grad_norm": 0.439453125, "learning_rate": 1.930012887227138e-06, "loss": 2.1353, "step": 40640 }, { "epoch": 0.85, "grad_norm": 0.435546875, "learning_rate": 1.924955605040421e-06, "loss": 2.1248, "step": 40650 }, { "epoch": 0.85, "grad_norm": 0.4375, "learning_rate": 1.919904503096497e-06, "loss": 2.1035, "step": 40660 }, { "epoch": 0.85, "grad_norm": 0.427734375, "learning_rate": 1.914859583782897e-06, "loss": 2.1136, "step": 40670 }, { "epoch": 0.85, "grad_norm": 0.458984375, "learning_rate": 1.9098208494842323e-06, "loss": 2.136, "step": 40680 }, { "epoch": 0.85, "grad_norm": 0.4375, "learning_rate": 1.9047883025821777e-06, "loss": 2.1199, "step": 40690 }, { "epoch": 0.85, "grad_norm": 0.447265625, "learning_rate": 1.8997619454554955e-06, "loss": 2.1209, "step": 40700 }, { "epoch": 0.85, "grad_norm": 0.47265625, "learning_rate": 1.8947417804800131e-06, "loss": 2.1306, "step": 40710 }, { "epoch": 0.85, "grad_norm": 0.447265625, "learning_rate": 1.8897278100286413e-06, "loss": 2.1293, "step": 40720 }, { "epoch": 0.85, "grad_norm": 0.443359375, "learning_rate": 1.8847200364713507e-06, "loss": 2.1038, "step": 40730 }, { "epoch": 0.85, "grad_norm": 0.44140625, "learning_rate": 1.879718462175193e-06, "loss": 2.1225, "step": 40740 }, { "epoch": 0.85, "grad_norm": 0.4296875, "learning_rate": 1.8747230895042883e-06, "loss": 2.1301, "step": 40750 }, { "epoch": 0.85, "grad_norm": 0.427734375, "learning_rate": 1.8697339208198183e-06, "loss": 2.1155, "step": 40760 }, { "epoch": 0.85, "grad_norm": 0.427734375, "learning_rate": 1.8647509584800365e-06, "loss": 2.1167, "step": 40770 }, { "epoch": 0.85, "grad_norm": 0.41796875, "learning_rate": 1.8597742048402687e-06, "loss": 2.1075, "step": 40780 }, { "epoch": 0.85, "grad_norm": 0.4375, "learning_rate": 1.8548036622528952e-06, "loss": 2.1266, "step": 40790 }, { "epoch": 0.85, "grad_norm": 0.41015625, "learning_rate": 1.8498393330673663e-06, "loss": 2.1219, "step": 40800 }, { "epoch": 0.85, "grad_norm": 0.404296875, "learning_rate": 1.8448812196302e-06, "loss": 2.1229, "step": 40810 }, { "epoch": 0.85, "grad_norm": 0.439453125, "learning_rate": 1.8399293242849635e-06, "loss": 2.1481, "step": 40820 }, { "epoch": 0.85, "grad_norm": 0.458984375, "learning_rate": 1.834983649372296e-06, "loss": 2.1021, "step": 40830 }, { "epoch": 0.85, "grad_norm": 0.423828125, "learning_rate": 1.8300441972298976e-06, "loss": 2.1458, "step": 40840 }, { "epoch": 0.85, "grad_norm": 0.43359375, "learning_rate": 1.825110970192514e-06, "loss": 2.1128, "step": 40850 }, { "epoch": 0.85, "grad_norm": 0.44140625, "learning_rate": 1.8201839705919598e-06, "loss": 2.1155, "step": 40860 }, { "epoch": 0.85, "grad_norm": 0.453125, "learning_rate": 1.815263200757109e-06, "loss": 2.151, "step": 40870 }, { "epoch": 0.85, "grad_norm": 0.45703125, "learning_rate": 1.810348663013876e-06, "loss": 2.1232, "step": 40880 }, { "epoch": 0.85, "grad_norm": 0.427734375, "learning_rate": 1.8054403596852425e-06, "loss": 2.0951, "step": 40890 }, { "epoch": 0.85, "grad_norm": 0.42578125, "learning_rate": 1.8005382930912446e-06, "loss": 2.1109, "step": 40900 }, { "epoch": 0.85, "grad_norm": 0.4296875, "learning_rate": 1.795642465548955e-06, "loss": 2.1178, "step": 40910 }, { "epoch": 0.85, "grad_norm": 0.609375, "learning_rate": 1.7907528793725132e-06, "loss": 2.1073, "step": 40920 }, { "epoch": 0.85, "grad_norm": 0.435546875, "learning_rate": 1.7858695368731048e-06, "loss": 2.1175, "step": 40930 }, { "epoch": 0.85, "grad_norm": 0.4453125, "learning_rate": 1.7809924403589589e-06, "loss": 2.091, "step": 40940 }, { "epoch": 0.85, "grad_norm": 0.42578125, "learning_rate": 1.776121592135358e-06, "loss": 2.0829, "step": 40950 }, { "epoch": 0.85, "grad_norm": 0.443359375, "learning_rate": 1.7712569945046253e-06, "loss": 2.1106, "step": 40960 }, { "epoch": 0.85, "grad_norm": 0.4296875, "learning_rate": 1.7663986497661383e-06, "loss": 2.1407, "step": 40970 }, { "epoch": 0.85, "grad_norm": 0.4375, "learning_rate": 1.7615465602163067e-06, "loss": 2.1278, "step": 40980 }, { "epoch": 0.85, "grad_norm": 0.458984375, "learning_rate": 1.756700728148594e-06, "loss": 2.1042, "step": 40990 }, { "epoch": 0.85, "grad_norm": 0.421875, "learning_rate": 1.7518611558535086e-06, "loss": 2.122, "step": 41000 }, { "epoch": 0.85, "eval_accuracy": 0.559128284744983, "eval_loss": 1.9930520057678223, "eval_runtime": 16.4331, "eval_samples_per_second": 36.207, "eval_steps_per_second": 1.156, "step": 41000 }, { "epoch": 0.85, "grad_norm": 0.431640625, "learning_rate": 1.7470278456185834e-06, "loss": 2.1016, "step": 41010 }, { "epoch": 0.85, "grad_norm": 0.439453125, "learning_rate": 1.7422007997284063e-06, "loss": 2.1012, "step": 41020 }, { "epoch": 0.85, "grad_norm": 0.4453125, "learning_rate": 1.737380020464605e-06, "loss": 2.1105, "step": 41030 }, { "epoch": 0.85, "grad_norm": 0.447265625, "learning_rate": 1.7325655101058308e-06, "loss": 2.1164, "step": 41040 }, { "epoch": 0.85, "grad_norm": 0.4296875, "learning_rate": 1.7277572709277861e-06, "loss": 2.1155, "step": 41050 }, { "epoch": 0.85, "grad_norm": 0.439453125, "learning_rate": 1.7229553052032066e-06, "loss": 2.1464, "step": 41060 }, { "epoch": 0.85, "grad_norm": 0.423828125, "learning_rate": 1.718159615201853e-06, "loss": 2.1352, "step": 41070 }, { "epoch": 0.85, "grad_norm": 0.44140625, "learning_rate": 1.7133702031905307e-06, "loss": 2.1204, "step": 41080 }, { "epoch": 0.85, "grad_norm": 0.42578125, "learning_rate": 1.7085870714330753e-06, "loss": 2.084, "step": 41090 }, { "epoch": 0.85, "grad_norm": 0.458984375, "learning_rate": 1.7038102221903483e-06, "loss": 2.111, "step": 41100 }, { "epoch": 0.85, "grad_norm": 0.43359375, "learning_rate": 1.699039657720247e-06, "loss": 2.1301, "step": 41110 }, { "epoch": 0.85, "grad_norm": 0.412109375, "learning_rate": 1.694275380277701e-06, "loss": 2.0874, "step": 41120 }, { "epoch": 0.86, "grad_norm": 0.435546875, "learning_rate": 1.6895173921146555e-06, "loss": 2.1138, "step": 41130 }, { "epoch": 0.86, "grad_norm": 0.4375, "learning_rate": 1.684765695480095e-06, "loss": 2.0863, "step": 41140 }, { "epoch": 0.86, "grad_norm": 0.478515625, "learning_rate": 1.6800202926200276e-06, "loss": 2.1258, "step": 41150 }, { "epoch": 0.86, "grad_norm": 0.416015625, "learning_rate": 1.6752811857774874e-06, "loss": 2.0856, "step": 41160 }, { "epoch": 0.86, "grad_norm": 0.416015625, "learning_rate": 1.6705483771925244e-06, "loss": 2.0796, "step": 41170 }, { "epoch": 0.86, "grad_norm": 0.453125, "learning_rate": 1.6658218691022242e-06, "loss": 2.1002, "step": 41180 }, { "epoch": 0.86, "grad_norm": 0.46484375, "learning_rate": 1.6611016637406796e-06, "loss": 2.1254, "step": 41190 }, { "epoch": 0.86, "grad_norm": 0.443359375, "learning_rate": 1.656387763339019e-06, "loss": 2.1359, "step": 41200 }, { "epoch": 0.86, "grad_norm": 0.443359375, "learning_rate": 1.6516801701253802e-06, "loss": 2.1073, "step": 41210 }, { "epoch": 0.86, "grad_norm": 0.421875, "learning_rate": 1.6469788863249268e-06, "loss": 2.0931, "step": 41220 }, { "epoch": 0.86, "grad_norm": 0.458984375, "learning_rate": 1.6422839141598311e-06, "loss": 2.1148, "step": 41230 }, { "epoch": 0.86, "grad_norm": 0.419921875, "learning_rate": 1.6375952558492912e-06, "loss": 2.1125, "step": 41240 }, { "epoch": 0.86, "grad_norm": 0.419921875, "learning_rate": 1.632912913609521e-06, "loss": 2.1401, "step": 41250 }, { "epoch": 0.86, "grad_norm": 0.44140625, "learning_rate": 1.6282368896537386e-06, "loss": 2.1214, "step": 41260 }, { "epoch": 0.86, "grad_norm": 0.4296875, "learning_rate": 1.6235671861921858e-06, "loss": 2.1231, "step": 41270 }, { "epoch": 0.86, "grad_norm": 0.453125, "learning_rate": 1.6189038054321153e-06, "loss": 2.1373, "step": 41280 }, { "epoch": 0.86, "grad_norm": 1.234375, "learning_rate": 1.6142467495777824e-06, "loss": 2.1255, "step": 41290 }, { "epoch": 0.86, "grad_norm": 0.435546875, "learning_rate": 1.609596020830466e-06, "loss": 2.0925, "step": 41300 }, { "epoch": 0.86, "grad_norm": 0.431640625, "learning_rate": 1.604951621388448e-06, "loss": 2.0955, "step": 41310 }, { "epoch": 0.86, "grad_norm": 0.439453125, "learning_rate": 1.6003135534470137e-06, "loss": 2.1064, "step": 41320 }, { "epoch": 0.86, "grad_norm": 0.46484375, "learning_rate": 1.5956818191984628e-06, "loss": 2.0881, "step": 41330 }, { "epoch": 0.86, "grad_norm": 0.4296875, "learning_rate": 1.5910564208320993e-06, "loss": 2.1502, "step": 41340 }, { "epoch": 0.86, "grad_norm": 0.443359375, "learning_rate": 1.5864373605342342e-06, "loss": 2.0895, "step": 41350 }, { "epoch": 0.86, "grad_norm": 0.435546875, "learning_rate": 1.5818246404881775e-06, "loss": 2.1122, "step": 41360 }, { "epoch": 0.86, "grad_norm": 0.4296875, "learning_rate": 1.5772182628742454e-06, "loss": 2.12, "step": 41370 }, { "epoch": 0.86, "grad_norm": 0.419921875, "learning_rate": 1.5726182298697594e-06, "loss": 2.081, "step": 41380 }, { "epoch": 0.86, "grad_norm": 0.42578125, "learning_rate": 1.5680245436490338e-06, "loss": 2.0825, "step": 41390 }, { "epoch": 0.86, "grad_norm": 0.462890625, "learning_rate": 1.5634372063833896e-06, "loss": 2.0768, "step": 41400 }, { "epoch": 0.86, "grad_norm": 0.4375, "learning_rate": 1.5588562202411495e-06, "loss": 2.1102, "step": 41410 }, { "epoch": 0.86, "grad_norm": 0.4453125, "learning_rate": 1.5542815873876225e-06, "loss": 2.1071, "step": 41420 }, { "epoch": 0.86, "grad_norm": 0.421875, "learning_rate": 1.5497133099851263e-06, "loss": 2.1297, "step": 41430 }, { "epoch": 0.86, "grad_norm": 0.42578125, "learning_rate": 1.5451513901929714e-06, "loss": 2.1385, "step": 41440 }, { "epoch": 0.86, "grad_norm": 0.4921875, "learning_rate": 1.5405958301674582e-06, "loss": 2.1197, "step": 41450 }, { "epoch": 0.86, "grad_norm": 0.4609375, "learning_rate": 1.5360466320618876e-06, "loss": 2.104, "step": 41460 }, { "epoch": 0.86, "grad_norm": 0.431640625, "learning_rate": 1.5315037980265478e-06, "loss": 2.1565, "step": 41470 }, { "epoch": 0.86, "grad_norm": 0.44921875, "learning_rate": 1.5269673302087244e-06, "loss": 2.1101, "step": 41480 }, { "epoch": 0.86, "grad_norm": 0.439453125, "learning_rate": 1.5224372307526874e-06, "loss": 2.136, "step": 41490 }, { "epoch": 0.86, "grad_norm": 0.451171875, "learning_rate": 1.5179135017997053e-06, "loss": 2.1137, "step": 41500 }, { "epoch": 0.86, "eval_accuracy": 0.5590559977656752, "eval_loss": 1.9930741786956787, "eval_runtime": 16.5205, "eval_samples_per_second": 36.016, "eval_steps_per_second": 1.15, "step": 41500 }, { "epoch": 0.86, "grad_norm": 0.4296875, "learning_rate": 1.5133961454880247e-06, "loss": 2.1122, "step": 41510 }, { "epoch": 0.86, "grad_norm": 0.439453125, "learning_rate": 1.5088851639528872e-06, "loss": 2.101, "step": 41520 }, { "epoch": 0.86, "grad_norm": 0.44921875, "learning_rate": 1.5043805593265226e-06, "loss": 2.1284, "step": 41530 }, { "epoch": 0.86, "grad_norm": 0.4296875, "learning_rate": 1.499882333738144e-06, "loss": 2.1305, "step": 41540 }, { "epoch": 0.86, "grad_norm": 0.478515625, "learning_rate": 1.4953904893139424e-06, "loss": 2.0943, "step": 41550 }, { "epoch": 0.86, "grad_norm": 0.435546875, "learning_rate": 1.4909050281771047e-06, "loss": 2.0968, "step": 41560 }, { "epoch": 0.86, "grad_norm": 0.423828125, "learning_rate": 1.486425952447798e-06, "loss": 2.1306, "step": 41570 }, { "epoch": 0.86, "grad_norm": 0.443359375, "learning_rate": 1.4819532642431588e-06, "loss": 2.1324, "step": 41580 }, { "epoch": 0.86, "grad_norm": 0.453125, "learning_rate": 1.4774869656773194e-06, "loss": 2.1278, "step": 41590 }, { "epoch": 0.86, "grad_norm": 0.41796875, "learning_rate": 1.4730270588613876e-06, "loss": 2.1217, "step": 41600 }, { "epoch": 0.87, "grad_norm": 0.478515625, "learning_rate": 1.468573545903444e-06, "loss": 2.138, "step": 41610 }, { "epoch": 0.87, "grad_norm": 0.47265625, "learning_rate": 1.4641264289085543e-06, "loss": 2.1068, "step": 41620 }, { "epoch": 0.87, "grad_norm": 0.48046875, "learning_rate": 1.4596857099787609e-06, "loss": 2.1027, "step": 41630 }, { "epoch": 0.87, "grad_norm": 0.4375, "learning_rate": 1.4552513912130726e-06, "loss": 2.1088, "step": 41640 }, { "epoch": 0.87, "grad_norm": 0.44140625, "learning_rate": 1.4508234747074834e-06, "loss": 2.1131, "step": 41650 }, { "epoch": 0.87, "grad_norm": 0.44140625, "learning_rate": 1.44640196255496e-06, "loss": 2.1198, "step": 41660 }, { "epoch": 0.87, "grad_norm": 0.44921875, "learning_rate": 1.4419868568454347e-06, "loss": 2.1491, "step": 41670 }, { "epoch": 0.87, "grad_norm": 0.427734375, "learning_rate": 1.4375781596658194e-06, "loss": 2.1198, "step": 41680 }, { "epoch": 0.87, "grad_norm": 0.45703125, "learning_rate": 1.4331758730999939e-06, "loss": 2.0786, "step": 41690 }, { "epoch": 0.87, "grad_norm": 0.5, "learning_rate": 1.4287799992288037e-06, "loss": 2.1014, "step": 41700 }, { "epoch": 0.87, "grad_norm": 0.44921875, "learning_rate": 1.4243905401300734e-06, "loss": 2.134, "step": 41710 }, { "epoch": 0.87, "grad_norm": 0.42578125, "learning_rate": 1.4200074978785832e-06, "loss": 2.1039, "step": 41720 }, { "epoch": 0.87, "grad_norm": 0.419921875, "learning_rate": 1.4156308745460928e-06, "loss": 2.0882, "step": 41730 }, { "epoch": 0.87, "grad_norm": 0.4765625, "learning_rate": 1.411260672201314e-06, "loss": 2.151, "step": 41740 }, { "epoch": 0.87, "grad_norm": 0.427734375, "learning_rate": 1.406896892909934e-06, "loss": 2.1136, "step": 41750 }, { "epoch": 0.87, "grad_norm": 0.43359375, "learning_rate": 1.4025395387346035e-06, "loss": 2.103, "step": 41760 }, { "epoch": 0.87, "grad_norm": 0.455078125, "learning_rate": 1.3981886117349312e-06, "loss": 2.1098, "step": 41770 }, { "epoch": 0.87, "grad_norm": 0.43359375, "learning_rate": 1.393844113967489e-06, "loss": 2.1477, "step": 41780 }, { "epoch": 0.87, "grad_norm": 0.419921875, "learning_rate": 1.3895060474858158e-06, "loss": 2.1405, "step": 41790 }, { "epoch": 0.87, "grad_norm": 0.45703125, "learning_rate": 1.385174414340401e-06, "loss": 2.1344, "step": 41800 }, { "epoch": 0.87, "grad_norm": 0.466796875, "learning_rate": 1.380849216578699e-06, "loss": 2.1131, "step": 41810 }, { "epoch": 0.87, "grad_norm": 0.44921875, "learning_rate": 1.3765304562451242e-06, "loss": 2.1128, "step": 41820 }, { "epoch": 0.87, "grad_norm": 0.423828125, "learning_rate": 1.372218135381042e-06, "loss": 2.1234, "step": 41830 }, { "epoch": 0.87, "grad_norm": 0.4453125, "learning_rate": 1.3679122560247787e-06, "loss": 2.123, "step": 41840 }, { "epoch": 0.87, "grad_norm": 0.482421875, "learning_rate": 1.3636128202116177e-06, "loss": 2.101, "step": 41850 }, { "epoch": 0.87, "grad_norm": 0.423828125, "learning_rate": 1.3593198299737869e-06, "loss": 2.1083, "step": 41860 }, { "epoch": 0.87, "grad_norm": 0.4296875, "learning_rate": 1.3550332873404798e-06, "loss": 2.1425, "step": 41870 }, { "epoch": 0.87, "grad_norm": 0.435546875, "learning_rate": 1.3507531943378354e-06, "loss": 2.1073, "step": 41880 }, { "epoch": 0.87, "grad_norm": 0.427734375, "learning_rate": 1.3464795529889424e-06, "loss": 2.1124, "step": 41890 }, { "epoch": 0.87, "grad_norm": 0.43359375, "learning_rate": 1.3422123653138464e-06, "loss": 2.0959, "step": 41900 }, { "epoch": 0.87, "grad_norm": 0.451171875, "learning_rate": 1.3379516333295372e-06, "loss": 2.1069, "step": 41910 }, { "epoch": 0.87, "grad_norm": 0.4453125, "learning_rate": 1.3336973590499606e-06, "loss": 2.1041, "step": 41920 }, { "epoch": 0.87, "grad_norm": 0.419921875, "learning_rate": 1.329449544485995e-06, "loss": 2.1082, "step": 41930 }, { "epoch": 0.87, "grad_norm": 0.44140625, "learning_rate": 1.3252081916454823e-06, "loss": 2.1362, "step": 41940 }, { "epoch": 0.87, "grad_norm": 0.439453125, "learning_rate": 1.3209733025331988e-06, "loss": 2.1294, "step": 41950 }, { "epoch": 0.87, "grad_norm": 0.43359375, "learning_rate": 1.3167448791508734e-06, "loss": 2.1381, "step": 41960 }, { "epoch": 0.87, "grad_norm": 0.4453125, "learning_rate": 1.3125229234971691e-06, "loss": 2.1294, "step": 41970 }, { "epoch": 0.87, "grad_norm": 0.443359375, "learning_rate": 1.3083074375677029e-06, "loss": 2.1277, "step": 41980 }, { "epoch": 0.87, "grad_norm": 0.443359375, "learning_rate": 1.3040984233550242e-06, "loss": 2.1164, "step": 41990 }, { "epoch": 0.87, "grad_norm": 0.43359375, "learning_rate": 1.2998958828486295e-06, "loss": 2.0983, "step": 42000 }, { "epoch": 0.87, "eval_accuracy": 0.5590182113901279, "eval_loss": 1.9930047988891602, "eval_runtime": 16.4418, "eval_samples_per_second": 36.188, "eval_steps_per_second": 1.156, "step": 42000 }, { "epoch": 0.87, "grad_norm": 0.87890625, "learning_rate": 1.2956998180349555e-06, "loss": 2.1191, "step": 42010 }, { "epoch": 0.87, "grad_norm": 0.431640625, "learning_rate": 1.291510230897372e-06, "loss": 2.1201, "step": 42020 }, { "epoch": 0.87, "grad_norm": 0.453125, "learning_rate": 1.2873271234161932e-06, "loss": 2.1202, "step": 42030 }, { "epoch": 0.87, "grad_norm": 0.453125, "learning_rate": 1.2831504975686693e-06, "loss": 2.1257, "step": 42040 }, { "epoch": 0.87, "grad_norm": 0.42578125, "learning_rate": 1.2789803553289825e-06, "loss": 2.1017, "step": 42050 }, { "epoch": 0.87, "grad_norm": 0.427734375, "learning_rate": 1.2748166986682546e-06, "loss": 2.086, "step": 42060 }, { "epoch": 0.87, "grad_norm": 0.431640625, "learning_rate": 1.2706595295545449e-06, "loss": 2.1073, "step": 42070 }, { "epoch": 0.87, "grad_norm": 0.44140625, "learning_rate": 1.2665088499528344e-06, "loss": 2.105, "step": 42080 }, { "epoch": 0.88, "grad_norm": 0.423828125, "learning_rate": 1.2623646618250505e-06, "loss": 2.1214, "step": 42090 }, { "epoch": 0.88, "grad_norm": 0.453125, "learning_rate": 1.2582269671300435e-06, "loss": 2.118, "step": 42100 }, { "epoch": 0.88, "grad_norm": 0.447265625, "learning_rate": 1.2540957678235998e-06, "loss": 2.1249, "step": 42110 }, { "epoch": 0.88, "grad_norm": 0.431640625, "learning_rate": 1.2499710658584296e-06, "loss": 2.1289, "step": 42120 }, { "epoch": 0.88, "grad_norm": 0.435546875, "learning_rate": 1.2458528631841748e-06, "loss": 2.1342, "step": 42130 }, { "epoch": 0.88, "grad_norm": 0.42578125, "learning_rate": 1.2417411617474117e-06, "loss": 2.1441, "step": 42140 }, { "epoch": 0.88, "grad_norm": 0.431640625, "learning_rate": 1.237635963491629e-06, "loss": 2.0913, "step": 42150 }, { "epoch": 0.88, "grad_norm": 0.43359375, "learning_rate": 1.2335372703572546e-06, "loss": 2.0991, "step": 42160 }, { "epoch": 0.88, "grad_norm": 0.435546875, "learning_rate": 1.2294450842816403e-06, "loss": 2.097, "step": 42170 }, { "epoch": 0.88, "grad_norm": 0.44921875, "learning_rate": 1.225359407199052e-06, "loss": 2.1143, "step": 42180 }, { "epoch": 0.88, "grad_norm": 0.435546875, "learning_rate": 1.221280241040692e-06, "loss": 2.1487, "step": 42190 }, { "epoch": 0.88, "grad_norm": 0.427734375, "learning_rate": 1.2172075877346728e-06, "loss": 2.1163, "step": 42200 }, { "epoch": 0.88, "grad_norm": 0.4296875, "learning_rate": 1.2131414492060412e-06, "loss": 2.1128, "step": 42210 }, { "epoch": 0.88, "grad_norm": 0.431640625, "learning_rate": 1.2090818273767529e-06, "loss": 2.099, "step": 42220 }, { "epoch": 0.88, "grad_norm": 0.453125, "learning_rate": 1.2050287241656931e-06, "loss": 2.0942, "step": 42230 }, { "epoch": 0.88, "grad_norm": 0.4375, "learning_rate": 1.2009821414886545e-06, "loss": 2.1352, "step": 42240 }, { "epoch": 0.88, "grad_norm": 0.423828125, "learning_rate": 1.1969420812583603e-06, "loss": 2.1387, "step": 42250 }, { "epoch": 0.88, "grad_norm": 0.45703125, "learning_rate": 1.1929085453844446e-06, "loss": 2.1062, "step": 42260 }, { "epoch": 0.88, "grad_norm": 0.439453125, "learning_rate": 1.188881535773454e-06, "loss": 2.1165, "step": 42270 }, { "epoch": 0.88, "grad_norm": 0.46875, "learning_rate": 1.1848610543288557e-06, "loss": 2.1221, "step": 42280 }, { "epoch": 0.88, "grad_norm": 0.427734375, "learning_rate": 1.18084710295103e-06, "loss": 2.0941, "step": 42290 }, { "epoch": 0.88, "grad_norm": 0.453125, "learning_rate": 1.1768396835372736e-06, "loss": 2.1231, "step": 42300 }, { "epoch": 0.88, "grad_norm": 0.46484375, "learning_rate": 1.1728387979817868e-06, "loss": 2.1246, "step": 42310 }, { "epoch": 0.88, "grad_norm": 0.42578125, "learning_rate": 1.1688444481756882e-06, "loss": 2.1094, "step": 42320 }, { "epoch": 0.88, "grad_norm": 0.427734375, "learning_rate": 1.1648566360070095e-06, "loss": 2.1421, "step": 42330 }, { "epoch": 0.88, "grad_norm": 0.44921875, "learning_rate": 1.1608753633606845e-06, "loss": 2.1195, "step": 42340 }, { "epoch": 0.88, "grad_norm": 0.431640625, "learning_rate": 1.156900632118561e-06, "loss": 2.098, "step": 42350 }, { "epoch": 0.88, "grad_norm": 0.458984375, "learning_rate": 1.152932444159398e-06, "loss": 2.1224, "step": 42360 }, { "epoch": 0.88, "grad_norm": 0.431640625, "learning_rate": 1.1489708013588502e-06, "loss": 2.1212, "step": 42370 }, { "epoch": 0.88, "grad_norm": 0.439453125, "learning_rate": 1.145015705589491e-06, "loss": 2.1188, "step": 42380 }, { "epoch": 0.88, "grad_norm": 0.43359375, "learning_rate": 1.1410671587207954e-06, "loss": 2.1269, "step": 42390 }, { "epoch": 0.88, "grad_norm": 0.43359375, "learning_rate": 1.137125162619137e-06, "loss": 2.0952, "step": 42400 }, { "epoch": 0.88, "grad_norm": 0.470703125, "learning_rate": 1.1331897191478001e-06, "loss": 2.1496, "step": 42410 }, { "epoch": 0.88, "grad_norm": 0.435546875, "learning_rate": 1.1292608301669721e-06, "loss": 2.1091, "step": 42420 }, { "epoch": 0.88, "grad_norm": 0.439453125, "learning_rate": 1.1253384975337322e-06, "loss": 2.0972, "step": 42430 }, { "epoch": 0.88, "grad_norm": 0.447265625, "learning_rate": 1.121422723102074e-06, "loss": 2.1133, "step": 42440 }, { "epoch": 0.88, "grad_norm": 0.455078125, "learning_rate": 1.1175135087228854e-06, "loss": 2.1323, "step": 42450 }, { "epoch": 0.88, "grad_norm": 0.4453125, "learning_rate": 1.1136108562439519e-06, "loss": 2.0985, "step": 42460 }, { "epoch": 0.88, "grad_norm": 0.4375, "learning_rate": 1.1097147675099539e-06, "loss": 2.1211, "step": 42470 }, { "epoch": 0.88, "grad_norm": 0.423828125, "learning_rate": 1.1058252443624773e-06, "loss": 2.097, "step": 42480 }, { "epoch": 0.88, "grad_norm": 0.439453125, "learning_rate": 1.101942288640006e-06, "loss": 2.1088, "step": 42490 }, { "epoch": 0.88, "grad_norm": 0.458984375, "learning_rate": 1.098065902177907e-06, "loss": 2.1109, "step": 42500 }, { "epoch": 0.88, "eval_accuracy": 0.5590888554835424, "eval_loss": 1.9930570125579834, "eval_runtime": 16.5088, "eval_samples_per_second": 36.041, "eval_steps_per_second": 1.151, "step": 42500 }, { "epoch": 0.88, "grad_norm": 0.4375, "learning_rate": 1.0941960868084549e-06, "loss": 2.1218, "step": 42510 }, { "epoch": 0.88, "grad_norm": 0.443359375, "learning_rate": 1.0903328443608145e-06, "loss": 2.0967, "step": 42520 }, { "epoch": 0.88, "grad_norm": 0.439453125, "learning_rate": 1.0864761766610388e-06, "loss": 2.1258, "step": 42530 }, { "epoch": 0.88, "grad_norm": 0.439453125, "learning_rate": 1.082626085532079e-06, "loss": 2.1216, "step": 42540 }, { "epoch": 0.88, "grad_norm": 0.44921875, "learning_rate": 1.0787825727937783e-06, "loss": 2.1111, "step": 42550 }, { "epoch": 0.88, "grad_norm": 0.4609375, "learning_rate": 1.0749456402628632e-06, "loss": 2.1542, "step": 42560 }, { "epoch": 0.89, "grad_norm": 0.482421875, "learning_rate": 1.071115289752958e-06, "loss": 2.1169, "step": 42570 }, { "epoch": 0.89, "grad_norm": 0.42578125, "learning_rate": 1.0672915230745728e-06, "loss": 2.1001, "step": 42580 }, { "epoch": 0.89, "grad_norm": 0.453125, "learning_rate": 1.0634743420351033e-06, "loss": 2.0937, "step": 42590 }, { "epoch": 0.89, "grad_norm": 0.4609375, "learning_rate": 1.0596637484388334e-06, "loss": 2.1009, "step": 42600 }, { "epoch": 0.89, "grad_norm": 0.4375, "learning_rate": 1.0558597440869399e-06, "loss": 2.0736, "step": 42610 }, { "epoch": 0.89, "grad_norm": 0.431640625, "learning_rate": 1.0520623307774735e-06, "loss": 2.0985, "step": 42620 }, { "epoch": 0.89, "grad_norm": 0.4296875, "learning_rate": 1.048271510305378e-06, "loss": 2.1038, "step": 42630 }, { "epoch": 0.89, "grad_norm": 0.46484375, "learning_rate": 1.0444872844624804e-06, "loss": 2.1185, "step": 42640 }, { "epoch": 0.89, "grad_norm": 0.42578125, "learning_rate": 1.040709655037483e-06, "loss": 2.1106, "step": 42650 }, { "epoch": 0.89, "grad_norm": 0.46484375, "learning_rate": 1.036938623815979e-06, "loss": 2.1104, "step": 42660 }, { "epoch": 0.89, "grad_norm": 0.431640625, "learning_rate": 1.0331741925804394e-06, "loss": 2.1271, "step": 42670 }, { "epoch": 0.89, "grad_norm": 0.44921875, "learning_rate": 1.0294163631102193e-06, "loss": 2.1043, "step": 42680 }, { "epoch": 0.89, "grad_norm": 0.435546875, "learning_rate": 1.025665137181543e-06, "loss": 2.1021, "step": 42690 }, { "epoch": 0.89, "grad_norm": 0.48828125, "learning_rate": 1.021920516567525e-06, "loss": 2.0928, "step": 42700 }, { "epoch": 0.89, "grad_norm": 0.44140625, "learning_rate": 1.0181825030381497e-06, "loss": 2.1148, "step": 42710 }, { "epoch": 0.89, "grad_norm": 0.470703125, "learning_rate": 1.014451098360284e-06, "loss": 2.1105, "step": 42720 }, { "epoch": 0.89, "grad_norm": 0.408203125, "learning_rate": 1.010726304297665e-06, "loss": 2.1245, "step": 42730 }, { "epoch": 0.89, "grad_norm": 0.43359375, "learning_rate": 1.0070081226109146e-06, "loss": 2.0956, "step": 42740 }, { "epoch": 0.89, "grad_norm": 0.43359375, "learning_rate": 1.0032965550575151e-06, "loss": 2.1112, "step": 42750 }, { "epoch": 0.89, "grad_norm": 0.4375, "learning_rate": 9.995916033918367e-07, "loss": 2.1226, "step": 42760 }, { "epoch": 0.89, "grad_norm": 0.443359375, "learning_rate": 9.958932693651168e-07, "loss": 2.0988, "step": 42770 }, { "epoch": 0.89, "grad_norm": 0.4609375, "learning_rate": 9.922015547254587e-07, "loss": 2.1113, "step": 42780 }, { "epoch": 0.89, "grad_norm": 0.4296875, "learning_rate": 9.885164612178482e-07, "loss": 2.103, "step": 42790 }, { "epoch": 0.89, "grad_norm": 0.44140625, "learning_rate": 9.848379905841337e-07, "loss": 2.0847, "step": 42800 }, { "epoch": 0.89, "grad_norm": 0.4375, "learning_rate": 9.811661445630343e-07, "loss": 2.1191, "step": 42810 }, { "epoch": 0.89, "grad_norm": 0.453125, "learning_rate": 9.7750092489014e-07, "loss": 2.1427, "step": 42820 }, { "epoch": 0.89, "grad_norm": 0.4296875, "learning_rate": 9.738423332979085e-07, "loss": 2.1083, "step": 42830 }, { "epoch": 0.89, "grad_norm": 0.419921875, "learning_rate": 9.701903715156613e-07, "loss": 2.0986, "step": 42840 }, { "epoch": 0.89, "grad_norm": 0.453125, "learning_rate": 9.665450412695875e-07, "loss": 2.1249, "step": 42850 }, { "epoch": 0.89, "grad_norm": 0.4296875, "learning_rate": 9.629063442827458e-07, "loss": 2.0911, "step": 42860 }, { "epoch": 0.89, "grad_norm": 0.41796875, "learning_rate": 9.592742822750568e-07, "loss": 2.1047, "step": 42870 }, { "epoch": 0.89, "grad_norm": 0.421875, "learning_rate": 9.556488569632988e-07, "loss": 2.1153, "step": 42880 }, { "epoch": 0.89, "grad_norm": 0.458984375, "learning_rate": 9.520300700611228e-07, "loss": 2.1311, "step": 42890 }, { "epoch": 0.89, "grad_norm": 0.447265625, "learning_rate": 9.484179232790391e-07, "loss": 2.1254, "step": 42900 }, { "epoch": 0.89, "grad_norm": 0.453125, "learning_rate": 9.448124183244134e-07, "loss": 2.1288, "step": 42910 }, { "epoch": 0.89, "grad_norm": 0.435546875, "learning_rate": 9.412135569014807e-07, "loss": 2.1384, "step": 42920 }, { "epoch": 0.89, "grad_norm": 0.439453125, "learning_rate": 9.376213407113321e-07, "loss": 2.083, "step": 42930 }, { "epoch": 0.89, "grad_norm": 0.421875, "learning_rate": 9.340357714519143e-07, "loss": 2.1177, "step": 42940 }, { "epoch": 0.89, "grad_norm": 0.451171875, "learning_rate": 9.304568508180384e-07, "loss": 2.1337, "step": 42950 }, { "epoch": 0.89, "grad_norm": 0.419921875, "learning_rate": 9.268845805013642e-07, "loss": 2.1216, "step": 42960 }, { "epoch": 0.89, "grad_norm": 0.453125, "learning_rate": 9.233189621904215e-07, "loss": 2.1355, "step": 42970 }, { "epoch": 0.89, "grad_norm": 0.44921875, "learning_rate": 9.1975999757058e-07, "loss": 2.1295, "step": 42980 }, { "epoch": 0.89, "grad_norm": 0.484375, "learning_rate": 9.162076883240794e-07, "loss": 2.0949, "step": 42990 }, { "epoch": 0.89, "grad_norm": 0.42578125, "learning_rate": 9.126620361299998e-07, "loss": 2.172, "step": 43000 }, { "epoch": 0.89, "eval_accuracy": 0.559046140450315, "eval_loss": 1.9930146932601929, "eval_runtime": 16.4531, "eval_samples_per_second": 36.163, "eval_steps_per_second": 1.155, "step": 43000 }, { "epoch": 0.89, "grad_norm": 0.4375, "learning_rate": 9.091230426642854e-07, "loss": 2.1312, "step": 43010 }, { "epoch": 0.89, "grad_norm": 0.4453125, "learning_rate": 9.055907095997302e-07, "loss": 2.1183, "step": 43020 }, { "epoch": 0.89, "grad_norm": 0.435546875, "learning_rate": 9.020650386059731e-07, "loss": 2.0889, "step": 43030 }, { "epoch": 0.89, "grad_norm": 0.423828125, "learning_rate": 8.985460313495131e-07, "loss": 2.1385, "step": 43040 }, { "epoch": 0.9, "grad_norm": 0.423828125, "learning_rate": 8.950336894936978e-07, "loss": 2.1234, "step": 43050 }, { "epoch": 0.9, "grad_norm": 0.435546875, "learning_rate": 8.915280146987209e-07, "loss": 2.1088, "step": 43060 }, { "epoch": 0.9, "grad_norm": 0.44921875, "learning_rate": 8.880290086216236e-07, "loss": 2.1122, "step": 43070 }, { "epoch": 0.9, "grad_norm": 0.427734375, "learning_rate": 8.845366729162996e-07, "loss": 2.1481, "step": 43080 }, { "epoch": 0.9, "grad_norm": 0.447265625, "learning_rate": 8.8105100923349e-07, "loss": 2.0863, "step": 43090 }, { "epoch": 0.9, "grad_norm": 0.435546875, "learning_rate": 8.775720192207753e-07, "loss": 2.1109, "step": 43100 }, { "epoch": 0.9, "grad_norm": 0.458984375, "learning_rate": 8.740997045225901e-07, "loss": 2.1144, "step": 43110 }, { "epoch": 0.9, "grad_norm": 0.447265625, "learning_rate": 8.706340667802082e-07, "loss": 2.1011, "step": 43120 }, { "epoch": 0.9, "grad_norm": 0.453125, "learning_rate": 8.671751076317475e-07, "loss": 2.0959, "step": 43130 }, { "epoch": 0.9, "grad_norm": 0.43359375, "learning_rate": 8.637228287121717e-07, "loss": 2.1237, "step": 43140 }, { "epoch": 0.9, "grad_norm": 0.435546875, "learning_rate": 8.602772316532875e-07, "loss": 2.1407, "step": 43150 }, { "epoch": 0.9, "grad_norm": 0.447265625, "learning_rate": 8.568383180837369e-07, "loss": 2.0922, "step": 43160 }, { "epoch": 0.9, "grad_norm": 0.458984375, "learning_rate": 8.534060896290113e-07, "loss": 2.0881, "step": 43170 }, { "epoch": 0.9, "grad_norm": 0.447265625, "learning_rate": 8.499805479114397e-07, "loss": 2.1546, "step": 43180 }, { "epoch": 0.9, "grad_norm": 0.43359375, "learning_rate": 8.465616945501836e-07, "loss": 2.1102, "step": 43190 }, { "epoch": 0.9, "grad_norm": 0.474609375, "learning_rate": 8.431495311612536e-07, "loss": 2.1159, "step": 43200 }, { "epoch": 0.9, "grad_norm": 0.451171875, "learning_rate": 8.397440593574895e-07, "loss": 2.1184, "step": 43210 }, { "epoch": 0.9, "grad_norm": 0.44140625, "learning_rate": 8.363452807485755e-07, "loss": 2.1455, "step": 43220 }, { "epoch": 0.9, "grad_norm": 0.43359375, "learning_rate": 8.329531969410232e-07, "loss": 2.0902, "step": 43230 }, { "epoch": 0.9, "grad_norm": 0.453125, "learning_rate": 8.295678095381865e-07, "loss": 2.1179, "step": 43240 }, { "epoch": 0.9, "grad_norm": 0.427734375, "learning_rate": 8.261891201402572e-07, "loss": 2.1354, "step": 43250 }, { "epoch": 0.9, "grad_norm": 0.44140625, "learning_rate": 8.228171303442494e-07, "loss": 2.1055, "step": 43260 }, { "epoch": 0.9, "grad_norm": 0.421875, "learning_rate": 8.1945184174402e-07, "loss": 2.1178, "step": 43270 }, { "epoch": 0.9, "grad_norm": 0.4375, "learning_rate": 8.160932559302598e-07, "loss": 2.1001, "step": 43280 }, { "epoch": 0.9, "grad_norm": 0.443359375, "learning_rate": 8.127413744904805e-07, "loss": 2.1227, "step": 43290 }, { "epoch": 0.9, "grad_norm": 0.455078125, "learning_rate": 8.093961990090365e-07, "loss": 2.1074, "step": 43300 }, { "epoch": 0.9, "grad_norm": 0.455078125, "learning_rate": 8.060577310671064e-07, "loss": 2.1241, "step": 43310 }, { "epoch": 0.9, "grad_norm": 0.427734375, "learning_rate": 8.027259722426994e-07, "loss": 2.1054, "step": 43320 }, { "epoch": 0.9, "grad_norm": 0.48828125, "learning_rate": 7.994009241106526e-07, "loss": 2.1314, "step": 43330 }, { "epoch": 0.9, "grad_norm": 0.44921875, "learning_rate": 7.960825882426354e-07, "loss": 2.1079, "step": 43340 }, { "epoch": 0.9, "grad_norm": 0.423828125, "learning_rate": 7.927709662071364e-07, "loss": 2.1226, "step": 43350 }, { "epoch": 0.9, "grad_norm": 0.455078125, "learning_rate": 7.894660595694786e-07, "loss": 2.1269, "step": 43360 }, { "epoch": 0.9, "grad_norm": 0.443359375, "learning_rate": 7.86167869891809e-07, "loss": 2.1135, "step": 43370 }, { "epoch": 0.9, "grad_norm": 0.43359375, "learning_rate": 7.828763987330956e-07, "loss": 2.1089, "step": 43380 }, { "epoch": 0.9, "grad_norm": 0.447265625, "learning_rate": 7.795916476491355e-07, "loss": 2.1283, "step": 43390 }, { "epoch": 0.9, "grad_norm": 0.46875, "learning_rate": 7.763136181925467e-07, "loss": 2.1094, "step": 43400 }, { "epoch": 0.9, "grad_norm": 0.4375, "learning_rate": 7.7304231191277e-07, "loss": 2.12, "step": 43410 }, { "epoch": 0.9, "grad_norm": 0.4296875, "learning_rate": 7.697777303560699e-07, "loss": 2.1002, "step": 43420 }, { "epoch": 0.9, "grad_norm": 0.427734375, "learning_rate": 7.665198750655339e-07, "loss": 2.1274, "step": 43430 }, { "epoch": 0.9, "grad_norm": 0.435546875, "learning_rate": 7.632687475810634e-07, "loss": 2.1221, "step": 43440 }, { "epoch": 0.9, "grad_norm": 0.4453125, "learning_rate": 7.600243494393894e-07, "loss": 2.1052, "step": 43450 }, { "epoch": 0.9, "grad_norm": 0.435546875, "learning_rate": 7.567866821740499e-07, "loss": 2.1455, "step": 43460 }, { "epoch": 0.9, "grad_norm": 0.427734375, "learning_rate": 7.535557473154142e-07, "loss": 2.1079, "step": 43470 }, { "epoch": 0.9, "grad_norm": 0.42578125, "learning_rate": 7.503315463906607e-07, "loss": 2.1307, "step": 43480 }, { "epoch": 0.9, "grad_norm": 0.42578125, "learning_rate": 7.47114080923787e-07, "loss": 2.1143, "step": 43490 }, { "epoch": 0.9, "grad_norm": 0.44921875, "learning_rate": 7.439033524356131e-07, "loss": 2.0882, "step": 43500 }, { "epoch": 0.9, "eval_accuracy": 0.5590527119938885, "eval_loss": 1.9930274486541748, "eval_runtime": 16.4545, "eval_samples_per_second": 36.16, "eval_steps_per_second": 1.155, "step": 43500 }, { "epoch": 0.9, "grad_norm": 0.421875, "learning_rate": 7.406993624437619e-07, "loss": 2.1017, "step": 43510 }, { "epoch": 0.9, "grad_norm": 0.4375, "learning_rate": 7.375021124626802e-07, "loss": 2.0924, "step": 43520 }, { "epoch": 0.9, "grad_norm": 0.419921875, "learning_rate": 7.343116040036324e-07, "loss": 2.1026, "step": 43530 }, { "epoch": 0.91, "grad_norm": 0.4375, "learning_rate": 7.311278385746855e-07, "loss": 2.1089, "step": 43540 }, { "epoch": 0.91, "grad_norm": 0.443359375, "learning_rate": 7.279508176807259e-07, "loss": 2.1175, "step": 43550 }, { "epoch": 0.91, "grad_norm": 0.45703125, "learning_rate": 7.247805428234538e-07, "loss": 2.0975, "step": 43560 }, { "epoch": 0.91, "grad_norm": 0.439453125, "learning_rate": 7.21617015501374e-07, "loss": 2.1424, "step": 43570 }, { "epoch": 0.91, "grad_norm": 0.451171875, "learning_rate": 7.184602372098087e-07, "loss": 2.0906, "step": 43580 }, { "epoch": 0.91, "grad_norm": 0.41796875, "learning_rate": 7.153102094408893e-07, "loss": 2.083, "step": 43590 }, { "epoch": 0.91, "grad_norm": 0.451171875, "learning_rate": 7.121669336835479e-07, "loss": 2.1038, "step": 43600 }, { "epoch": 0.91, "grad_norm": 0.451171875, "learning_rate": 7.090304114235347e-07, "loss": 2.1372, "step": 43610 }, { "epoch": 0.91, "grad_norm": 0.447265625, "learning_rate": 7.059006441434051e-07, "loss": 2.1194, "step": 43620 }, { "epoch": 0.91, "grad_norm": 0.45703125, "learning_rate": 7.027776333225211e-07, "loss": 2.1084, "step": 43630 }, { "epoch": 0.91, "grad_norm": 0.44140625, "learning_rate": 6.9966138043705e-07, "loss": 2.1085, "step": 43640 }, { "epoch": 0.91, "grad_norm": 0.435546875, "learning_rate": 6.965518869599652e-07, "loss": 2.0933, "step": 43650 }, { "epoch": 0.91, "grad_norm": 0.419921875, "learning_rate": 6.934491543610494e-07, "loss": 2.0934, "step": 43660 }, { "epoch": 0.91, "grad_norm": 0.46875, "learning_rate": 6.903531841068795e-07, "loss": 2.1324, "step": 43670 }, { "epoch": 0.91, "grad_norm": 0.423828125, "learning_rate": 6.872639776608464e-07, "loss": 2.1084, "step": 43680 }, { "epoch": 0.91, "grad_norm": 0.423828125, "learning_rate": 6.841815364831405e-07, "loss": 2.126, "step": 43690 }, { "epoch": 0.91, "grad_norm": 0.408203125, "learning_rate": 6.811058620307514e-07, "loss": 2.0742, "step": 43700 }, { "epoch": 0.91, "grad_norm": 0.423828125, "learning_rate": 6.780369557574762e-07, "loss": 2.145, "step": 43710 }, { "epoch": 0.91, "grad_norm": 0.43359375, "learning_rate": 6.749748191139027e-07, "loss": 2.1475, "step": 43720 }, { "epoch": 0.91, "grad_norm": 0.4375, "learning_rate": 6.719194535474332e-07, "loss": 2.1074, "step": 43730 }, { "epoch": 0.91, "grad_norm": 0.43359375, "learning_rate": 6.688708605022558e-07, "loss": 2.127, "step": 43740 }, { "epoch": 0.91, "grad_norm": 0.41796875, "learning_rate": 6.658290414193663e-07, "loss": 2.1133, "step": 43750 }, { "epoch": 0.91, "grad_norm": 0.439453125, "learning_rate": 6.627939977365544e-07, "loss": 2.1232, "step": 43760 }, { "epoch": 0.91, "grad_norm": 1.15625, "learning_rate": 6.597657308884075e-07, "loss": 2.0822, "step": 43770 }, { "epoch": 0.91, "grad_norm": 0.439453125, "learning_rate": 6.567442423063158e-07, "loss": 2.1159, "step": 43780 }, { "epoch": 0.91, "grad_norm": 0.53125, "learning_rate": 6.537295334184534e-07, "loss": 2.1127, "step": 43790 }, { "epoch": 0.91, "grad_norm": 0.4453125, "learning_rate": 6.507216056498006e-07, "loss": 2.1474, "step": 43800 }, { "epoch": 0.91, "grad_norm": 0.44921875, "learning_rate": 6.477204604221298e-07, "loss": 2.1023, "step": 43810 }, { "epoch": 0.91, "grad_norm": 0.423828125, "learning_rate": 6.447260991540066e-07, "loss": 2.1411, "step": 43820 }, { "epoch": 0.91, "grad_norm": 0.49609375, "learning_rate": 6.41738523260787e-07, "loss": 2.0886, "step": 43830 }, { "epoch": 0.91, "grad_norm": 0.462890625, "learning_rate": 6.387577341546247e-07, "loss": 2.1056, "step": 43840 }, { "epoch": 0.91, "grad_norm": 0.43359375, "learning_rate": 6.357837332444627e-07, "loss": 2.1066, "step": 43850 }, { "epoch": 0.91, "grad_norm": 0.451171875, "learning_rate": 6.328165219360365e-07, "loss": 2.1097, "step": 43860 }, { "epoch": 0.91, "grad_norm": 0.484375, "learning_rate": 6.298561016318693e-07, "loss": 2.103, "step": 43870 }, { "epoch": 0.91, "grad_norm": 0.443359375, "learning_rate": 6.269024737312817e-07, "loss": 2.1166, "step": 43880 }, { "epoch": 0.91, "grad_norm": 0.423828125, "learning_rate": 6.239556396303753e-07, "loss": 2.0711, "step": 43890 }, { "epoch": 0.91, "grad_norm": 0.44921875, "learning_rate": 6.210156007220441e-07, "loss": 2.1139, "step": 43900 }, { "epoch": 0.91, "grad_norm": 0.439453125, "learning_rate": 6.180823583959733e-07, "loss": 2.0997, "step": 43910 }, { "epoch": 0.91, "grad_norm": 0.431640625, "learning_rate": 6.15155914038627e-07, "loss": 2.1208, "step": 43920 }, { "epoch": 0.91, "grad_norm": 0.435546875, "learning_rate": 6.122362690332655e-07, "loss": 2.1486, "step": 43930 }, { "epoch": 0.91, "grad_norm": 0.4375, "learning_rate": 6.093234247599317e-07, "loss": 2.1087, "step": 43940 }, { "epoch": 0.91, "grad_norm": 0.431640625, "learning_rate": 6.064173825954506e-07, "loss": 2.1032, "step": 43950 }, { "epoch": 0.91, "grad_norm": 0.427734375, "learning_rate": 6.035181439134368e-07, "loss": 2.1044, "step": 43960 }, { "epoch": 0.91, "grad_norm": 0.427734375, "learning_rate": 6.00625710084286e-07, "loss": 2.1427, "step": 43970 }, { "epoch": 0.91, "grad_norm": 0.4453125, "learning_rate": 5.977400824751794e-07, "loss": 2.0962, "step": 43980 }, { "epoch": 0.91, "grad_norm": 0.44921875, "learning_rate": 5.948612624500776e-07, "loss": 2.1323, "step": 43990 }, { "epoch": 0.91, "grad_norm": 0.42578125, "learning_rate": 5.919892513697306e-07, "loss": 2.0646, "step": 44000 }, { "epoch": 0.91, "eval_accuracy": 0.5590773552822889, "eval_loss": 1.993033528327942, "eval_runtime": 16.4535, "eval_samples_per_second": 36.163, "eval_steps_per_second": 1.155, "step": 44000 }, { "epoch": 0.91, "grad_norm": 0.43359375, "learning_rate": 5.891240505916623e-07, "loss": 2.1339, "step": 44010 }, { "epoch": 0.92, "grad_norm": 0.451171875, "learning_rate": 5.862656614701795e-07, "loss": 2.1037, "step": 44020 }, { "epoch": 0.92, "grad_norm": 0.4453125, "learning_rate": 5.83414085356373e-07, "loss": 2.1265, "step": 44030 }, { "epoch": 0.92, "grad_norm": 0.44140625, "learning_rate": 5.805693235981113e-07, "loss": 2.127, "step": 44040 }, { "epoch": 0.92, "grad_norm": 0.4609375, "learning_rate": 5.777313775400389e-07, "loss": 2.1356, "step": 44050 }, { "epoch": 0.92, "grad_norm": 0.4296875, "learning_rate": 5.749002485235827e-07, "loss": 2.1389, "step": 44060 }, { "epoch": 0.92, "grad_norm": 0.4375, "learning_rate": 5.720759378869455e-07, "loss": 2.0945, "step": 44070 }, { "epoch": 0.92, "grad_norm": 0.4375, "learning_rate": 5.692584469651063e-07, "loss": 2.1135, "step": 44080 }, { "epoch": 0.92, "grad_norm": 0.4453125, "learning_rate": 5.664477770898214e-07, "loss": 2.1239, "step": 44090 }, { "epoch": 0.92, "grad_norm": 0.4375, "learning_rate": 5.636439295896279e-07, "loss": 2.1188, "step": 44100 }, { "epoch": 0.92, "grad_norm": 0.455078125, "learning_rate": 5.608469057898291e-07, "loss": 2.1432, "step": 44110 }, { "epoch": 0.92, "grad_norm": 0.447265625, "learning_rate": 5.580567070125076e-07, "loss": 2.1249, "step": 44120 }, { "epoch": 0.92, "grad_norm": 0.44921875, "learning_rate": 5.552733345765232e-07, "loss": 2.1214, "step": 44130 }, { "epoch": 0.92, "grad_norm": 0.431640625, "learning_rate": 5.52496789797502e-07, "loss": 2.1195, "step": 44140 }, { "epoch": 0.92, "grad_norm": 0.43359375, "learning_rate": 5.497270739878474e-07, "loss": 2.1074, "step": 44150 }, { "epoch": 0.92, "grad_norm": 0.4140625, "learning_rate": 5.469641884567355e-07, "loss": 2.1507, "step": 44160 }, { "epoch": 0.92, "grad_norm": 0.431640625, "learning_rate": 5.442081345101102e-07, "loss": 2.1158, "step": 44170 }, { "epoch": 0.92, "grad_norm": 0.431640625, "learning_rate": 5.414589134506892e-07, "loss": 2.0916, "step": 44180 }, { "epoch": 0.92, "grad_norm": 0.4453125, "learning_rate": 5.387165265779631e-07, "loss": 2.0888, "step": 44190 }, { "epoch": 0.92, "grad_norm": 0.443359375, "learning_rate": 5.359809751881834e-07, "loss": 2.1165, "step": 44200 }, { "epoch": 0.92, "grad_norm": 0.458984375, "learning_rate": 5.332522605743805e-07, "loss": 2.137, "step": 44210 }, { "epoch": 0.92, "grad_norm": 0.43359375, "learning_rate": 5.305303840263465e-07, "loss": 2.1321, "step": 44220 }, { "epoch": 0.92, "grad_norm": 0.455078125, "learning_rate": 5.278153468306435e-07, "loss": 2.0847, "step": 44230 }, { "epoch": 0.92, "grad_norm": 0.42578125, "learning_rate": 5.251071502706018e-07, "loss": 2.1404, "step": 44240 }, { "epoch": 0.92, "grad_norm": 0.490234375, "learning_rate": 5.224057956263157e-07, "loss": 2.1265, "step": 44250 }, { "epoch": 0.92, "grad_norm": 0.42578125, "learning_rate": 5.197112841746504e-07, "loss": 2.1401, "step": 44260 }, { "epoch": 0.92, "grad_norm": 0.427734375, "learning_rate": 5.170236171892307e-07, "loss": 2.0868, "step": 44270 }, { "epoch": 0.92, "grad_norm": 0.431640625, "learning_rate": 5.143427959404484e-07, "loss": 2.1228, "step": 44280 }, { "epoch": 0.92, "grad_norm": 0.453125, "learning_rate": 5.116688216954635e-07, "loss": 2.0813, "step": 44290 }, { "epoch": 0.92, "grad_norm": 0.4140625, "learning_rate": 5.090016957181914e-07, "loss": 2.0937, "step": 44300 }, { "epoch": 0.92, "grad_norm": 0.451171875, "learning_rate": 5.063414192693172e-07, "loss": 2.1192, "step": 44310 }, { "epoch": 0.92, "grad_norm": 0.43359375, "learning_rate": 5.036879936062883e-07, "loss": 2.0863, "step": 44320 }, { "epoch": 0.92, "grad_norm": 0.439453125, "learning_rate": 5.010414199833097e-07, "loss": 2.1189, "step": 44330 }, { "epoch": 0.92, "grad_norm": 0.435546875, "learning_rate": 4.984016996513474e-07, "loss": 2.0778, "step": 44340 }, { "epoch": 0.92, "grad_norm": 0.419921875, "learning_rate": 4.957688338581368e-07, "loss": 2.1197, "step": 44350 }, { "epoch": 0.92, "grad_norm": 0.423828125, "learning_rate": 4.931428238481606e-07, "loss": 2.108, "step": 44360 }, { "epoch": 0.92, "grad_norm": 0.427734375, "learning_rate": 4.905236708626709e-07, "loss": 2.055, "step": 44370 }, { "epoch": 0.92, "grad_norm": 0.427734375, "learning_rate": 4.879113761396741e-07, "loss": 2.0908, "step": 44380 }, { "epoch": 0.92, "grad_norm": 0.423828125, "learning_rate": 4.853059409139376e-07, "loss": 2.1413, "step": 44390 }, { "epoch": 0.92, "grad_norm": 0.43359375, "learning_rate": 4.827073664169812e-07, "loss": 2.1274, "step": 44400 }, { "epoch": 0.92, "grad_norm": 0.4453125, "learning_rate": 4.801156538770857e-07, "loss": 2.0934, "step": 44410 }, { "epoch": 0.92, "grad_norm": 0.4375, "learning_rate": 4.775308045192927e-07, "loss": 2.1133, "step": 44420 }, { "epoch": 0.92, "grad_norm": 0.435546875, "learning_rate": 4.7495281956539016e-07, "loss": 2.1339, "step": 44430 }, { "epoch": 0.92, "grad_norm": 0.45703125, "learning_rate": 4.7238170023393147e-07, "loss": 2.128, "step": 44440 }, { "epoch": 0.92, "grad_norm": 0.43359375, "learning_rate": 4.6981744774021284e-07, "loss": 2.1505, "step": 44450 }, { "epoch": 0.92, "grad_norm": 0.4296875, "learning_rate": 4.6726006329629645e-07, "loss": 2.093, "step": 44460 }, { "epoch": 0.92, "grad_norm": 0.546875, "learning_rate": 4.6470954811099195e-07, "loss": 2.105, "step": 44470 }, { "epoch": 0.92, "grad_norm": 0.482421875, "learning_rate": 4.621659033898634e-07, "loss": 2.0961, "step": 44480 }, { "epoch": 0.92, "grad_norm": 0.44140625, "learning_rate": 4.5962913033522733e-07, "loss": 2.154, "step": 44490 }, { "epoch": 0.93, "grad_norm": 0.421875, "learning_rate": 4.570992301461513e-07, "loss": 2.1223, "step": 44500 }, { "epoch": 0.93, "eval_accuracy": 0.5591348562885565, "eval_loss": 1.9930362701416016, "eval_runtime": 16.4511, "eval_samples_per_second": 36.168, "eval_steps_per_second": 1.155, "step": 44500 }, { "epoch": 0.93, "grad_norm": 0.59765625, "learning_rate": 4.54576204018457e-07, "loss": 2.0922, "step": 44510 }, { "epoch": 0.93, "grad_norm": 0.43359375, "learning_rate": 4.520600531447139e-07, "loss": 2.1062, "step": 44520 }, { "epoch": 0.93, "grad_norm": 0.4296875, "learning_rate": 4.4955077871424223e-07, "loss": 2.1182, "step": 44530 }, { "epoch": 0.93, "grad_norm": 0.4296875, "learning_rate": 4.4704838191311494e-07, "loss": 2.1066, "step": 44540 }, { "epoch": 0.93, "grad_norm": 0.439453125, "learning_rate": 4.4455286392414916e-07, "loss": 2.1063, "step": 44550 }, { "epoch": 0.93, "grad_norm": 0.4296875, "learning_rate": 4.420642259269148e-07, "loss": 2.108, "step": 44560 }, { "epoch": 0.93, "grad_norm": 0.453125, "learning_rate": 4.395824690977257e-07, "loss": 2.1246, "step": 44570 }, { "epoch": 0.93, "grad_norm": 0.42578125, "learning_rate": 4.371075946096503e-07, "loss": 2.1458, "step": 44580 }, { "epoch": 0.93, "grad_norm": 0.4453125, "learning_rate": 4.3463960363249286e-07, "loss": 2.1224, "step": 44590 }, { "epoch": 0.93, "grad_norm": 0.447265625, "learning_rate": 4.3217849733281523e-07, "loss": 2.1193, "step": 44600 }, { "epoch": 0.93, "grad_norm": 0.421875, "learning_rate": 4.297242768739185e-07, "loss": 2.1001, "step": 44610 }, { "epoch": 0.93, "grad_norm": 0.435546875, "learning_rate": 4.272769434158497e-07, "loss": 2.1277, "step": 44620 }, { "epoch": 0.93, "grad_norm": 0.42578125, "learning_rate": 4.2483649811540346e-07, "loss": 2.1026, "step": 44630 }, { "epoch": 0.93, "grad_norm": 0.427734375, "learning_rate": 4.2240294212611553e-07, "loss": 2.1033, "step": 44640 }, { "epoch": 0.93, "grad_norm": 0.427734375, "learning_rate": 4.1997627659826564e-07, "loss": 2.1107, "step": 44650 }, { "epoch": 0.93, "grad_norm": 0.451171875, "learning_rate": 4.17556502678878e-07, "loss": 2.1202, "step": 44660 }, { "epoch": 0.93, "grad_norm": 0.421875, "learning_rate": 4.1514362151171934e-07, "loss": 2.126, "step": 44670 }, { "epoch": 0.93, "grad_norm": 0.435546875, "learning_rate": 4.12737634237294e-07, "loss": 2.1066, "step": 44680 }, { "epoch": 0.93, "grad_norm": 0.44140625, "learning_rate": 4.10338541992854e-07, "loss": 2.1077, "step": 44690 }, { "epoch": 0.93, "grad_norm": 0.455078125, "learning_rate": 4.079463459123922e-07, "loss": 2.1392, "step": 44700 }, { "epoch": 0.93, "grad_norm": 0.447265625, "learning_rate": 4.055610471266341e-07, "loss": 2.1258, "step": 44710 }, { "epoch": 0.93, "grad_norm": 0.4453125, "learning_rate": 4.031826467630545e-07, "loss": 2.1216, "step": 44720 }, { "epoch": 0.93, "grad_norm": 0.44921875, "learning_rate": 4.008111459458591e-07, "loss": 2.1069, "step": 44730 }, { "epoch": 0.93, "grad_norm": 0.423828125, "learning_rate": 3.984465457959996e-07, "loss": 2.129, "step": 44740 }, { "epoch": 0.93, "grad_norm": 0.44921875, "learning_rate": 3.9608884743116025e-07, "loss": 2.1299, "step": 44750 }, { "epoch": 0.93, "grad_norm": 0.4453125, "learning_rate": 3.9373805196576627e-07, "loss": 2.1517, "step": 44760 }, { "epoch": 0.93, "grad_norm": 0.421875, "learning_rate": 3.9139416051098053e-07, "loss": 2.1009, "step": 44770 }, { "epoch": 0.93, "grad_norm": 0.431640625, "learning_rate": 3.890571741746984e-07, "loss": 2.1082, "step": 44780 }, { "epoch": 0.93, "grad_norm": 0.42578125, "learning_rate": 3.867270940615564e-07, "loss": 2.1467, "step": 44790 }, { "epoch": 0.93, "grad_norm": 0.43359375, "learning_rate": 3.844039212729267e-07, "loss": 2.1072, "step": 44800 }, { "epoch": 0.93, "grad_norm": 0.431640625, "learning_rate": 3.8208765690690925e-07, "loss": 2.1238, "step": 44810 }, { "epoch": 0.93, "grad_norm": 0.431640625, "learning_rate": 3.7977830205834676e-07, "loss": 2.143, "step": 44820 }, { "epoch": 0.93, "grad_norm": 0.439453125, "learning_rate": 3.7747585781881253e-07, "loss": 2.1126, "step": 44830 }, { "epoch": 0.93, "grad_norm": 0.41015625, "learning_rate": 3.751803252766128e-07, "loss": 2.1183, "step": 44840 }, { "epoch": 0.93, "grad_norm": 0.48828125, "learning_rate": 3.7289170551678633e-07, "loss": 2.1425, "step": 44850 }, { "epoch": 0.93, "grad_norm": 0.435546875, "learning_rate": 3.7060999962111095e-07, "loss": 2.1106, "step": 44860 }, { "epoch": 0.93, "grad_norm": 0.490234375, "learning_rate": 3.6833520866808745e-07, "loss": 2.1488, "step": 44870 }, { "epoch": 0.93, "grad_norm": 0.43359375, "learning_rate": 3.6606733373295064e-07, "loss": 2.1408, "step": 44880 }, { "epoch": 0.93, "grad_norm": 0.5, "learning_rate": 3.6380637588767306e-07, "loss": 2.1283, "step": 44890 }, { "epoch": 0.93, "grad_norm": 0.4375, "learning_rate": 3.615523362009465e-07, "loss": 2.1038, "step": 44900 }, { "epoch": 0.93, "grad_norm": 0.40625, "learning_rate": 3.593052157382004e-07, "loss": 2.0932, "step": 44910 }, { "epoch": 0.93, "grad_norm": 0.43359375, "learning_rate": 3.570650155615951e-07, "loss": 2.1117, "step": 44920 }, { "epoch": 0.93, "grad_norm": 0.427734375, "learning_rate": 3.548317367300119e-07, "loss": 2.1107, "step": 44930 }, { "epoch": 0.93, "grad_norm": 0.41796875, "learning_rate": 3.526053802990664e-07, "loss": 2.0999, "step": 44940 }, { "epoch": 0.93, "grad_norm": 0.423828125, "learning_rate": 3.5038594732110354e-07, "loss": 2.0925, "step": 44950 }, { "epoch": 0.93, "grad_norm": 0.412109375, "learning_rate": 3.481734388451907e-07, "loss": 2.1169, "step": 44960 }, { "epoch": 0.93, "grad_norm": 0.453125, "learning_rate": 3.459678559171248e-07, "loss": 2.1306, "step": 44970 }, { "epoch": 0.94, "grad_norm": 0.4296875, "learning_rate": 3.437691995794301e-07, "loss": 2.0983, "step": 44980 }, { "epoch": 0.94, "grad_norm": 0.44140625, "learning_rate": 3.415774708713554e-07, "loss": 2.0743, "step": 44990 }, { "epoch": 0.94, "grad_norm": 0.42578125, "learning_rate": 3.393926708288753e-07, "loss": 2.1342, "step": 45000 }, { "epoch": 0.94, "eval_accuracy": 0.5590576406515686, "eval_loss": 1.9930360317230225, "eval_runtime": 16.4305, "eval_samples_per_second": 36.213, "eval_steps_per_second": 1.156, "step": 45000 }, { "epoch": 0.94, "grad_norm": 0.427734375, "learning_rate": 3.37214800484687e-07, "loss": 2.1057, "step": 45010 }, { "epoch": 0.94, "grad_norm": 0.423828125, "learning_rate": 3.3504386086822046e-07, "loss": 2.1398, "step": 45020 }, { "epoch": 0.94, "grad_norm": 0.431640625, "learning_rate": 3.3287985300561985e-07, "loss": 2.0922, "step": 45030 }, { "epoch": 0.94, "grad_norm": 0.439453125, "learning_rate": 3.307227779197569e-07, "loss": 2.1144, "step": 45040 }, { "epoch": 0.94, "grad_norm": 0.52734375, "learning_rate": 3.2857263663022943e-07, "loss": 2.1353, "step": 45050 }, { "epoch": 0.94, "grad_norm": 0.462890625, "learning_rate": 3.26429430153351e-07, "loss": 2.1495, "step": 45060 }, { "epoch": 0.94, "grad_norm": 0.458984375, "learning_rate": 3.242931595021631e-07, "loss": 2.0845, "step": 45070 }, { "epoch": 0.94, "grad_norm": 0.451171875, "learning_rate": 3.221638256864279e-07, "loss": 2.1206, "step": 45080 }, { "epoch": 0.94, "grad_norm": 0.423828125, "learning_rate": 3.200414297126253e-07, "loss": 2.1069, "step": 45090 }, { "epoch": 0.94, "grad_norm": 0.46484375, "learning_rate": 3.179259725839595e-07, "loss": 2.1014, "step": 45100 }, { "epoch": 0.94, "grad_norm": 0.435546875, "learning_rate": 3.15817455300354e-07, "loss": 2.138, "step": 45110 }, { "epoch": 0.94, "grad_norm": 0.447265625, "learning_rate": 3.137158788584499e-07, "loss": 2.1401, "step": 45120 }, { "epoch": 0.94, "grad_norm": 0.42578125, "learning_rate": 3.1162124425161087e-07, "loss": 2.1208, "step": 45130 }, { "epoch": 0.94, "grad_norm": 0.46875, "learning_rate": 3.0953355246991663e-07, "loss": 2.1036, "step": 45140 }, { "epoch": 0.94, "grad_norm": 0.421875, "learning_rate": 3.0745280450016954e-07, "loss": 2.1082, "step": 45150 }, { "epoch": 0.94, "grad_norm": 0.498046875, "learning_rate": 3.0537900132588113e-07, "loss": 2.1344, "step": 45160 }, { "epoch": 0.94, "grad_norm": 0.455078125, "learning_rate": 3.0331214392728903e-07, "loss": 2.1449, "step": 45170 }, { "epoch": 0.94, "grad_norm": 0.447265625, "learning_rate": 3.0125223328134334e-07, "loss": 2.114, "step": 45180 }, { "epoch": 0.94, "grad_norm": 0.44921875, "learning_rate": 2.991992703617136e-07, "loss": 2.1032, "step": 45190 }, { "epoch": 0.94, "grad_norm": 0.419921875, "learning_rate": 2.9715325613878184e-07, "loss": 2.142, "step": 45200 }, { "epoch": 0.94, "grad_norm": 0.427734375, "learning_rate": 2.9511419157964625e-07, "loss": 2.1107, "step": 45210 }, { "epoch": 0.94, "grad_norm": 0.494140625, "learning_rate": 2.9308207764812244e-07, "loss": 2.1332, "step": 45220 }, { "epoch": 0.94, "grad_norm": 0.4375, "learning_rate": 2.910569153047404e-07, "loss": 2.1236, "step": 45230 }, { "epoch": 0.94, "grad_norm": 0.4375, "learning_rate": 2.890387055067412e-07, "loss": 2.106, "step": 45240 }, { "epoch": 0.94, "grad_norm": 0.431640625, "learning_rate": 2.8702744920808154e-07, "loss": 2.1194, "step": 45250 }, { "epoch": 0.94, "grad_norm": 0.421875, "learning_rate": 2.850231473594328e-07, "loss": 2.1311, "step": 45260 }, { "epoch": 0.94, "grad_norm": 0.44921875, "learning_rate": 2.830258009081771e-07, "loss": 2.1452, "step": 45270 }, { "epoch": 0.94, "grad_norm": 0.44140625, "learning_rate": 2.8103541079840924e-07, "loss": 2.1229, "step": 45280 }, { "epoch": 0.94, "grad_norm": 0.423828125, "learning_rate": 2.7905197797093663e-07, "loss": 2.1255, "step": 45290 }, { "epoch": 0.94, "grad_norm": 2.265625, "learning_rate": 2.770755033632777e-07, "loss": 2.1103, "step": 45300 }, { "epoch": 0.94, "grad_norm": 0.490234375, "learning_rate": 2.751059879096618e-07, "loss": 2.1238, "step": 45310 }, { "epoch": 0.94, "grad_norm": 0.421875, "learning_rate": 2.7314343254102925e-07, "loss": 2.098, "step": 45320 }, { "epoch": 0.94, "grad_norm": 0.447265625, "learning_rate": 2.7118783818503136e-07, "loss": 2.1116, "step": 45330 }, { "epoch": 0.94, "grad_norm": 0.419921875, "learning_rate": 2.69239205766027e-07, "loss": 2.129, "step": 45340 }, { "epoch": 0.94, "grad_norm": 0.435546875, "learning_rate": 2.6729753620508613e-07, "loss": 2.135, "step": 45350 }, { "epoch": 0.94, "grad_norm": 0.44921875, "learning_rate": 2.6536283041998454e-07, "loss": 2.1212, "step": 45360 }, { "epoch": 0.94, "grad_norm": 0.439453125, "learning_rate": 2.6343508932521243e-07, "loss": 2.1066, "step": 45370 }, { "epoch": 0.94, "grad_norm": 0.423828125, "learning_rate": 2.615143138319609e-07, "loss": 2.1065, "step": 45380 }, { "epoch": 0.94, "grad_norm": 0.47265625, "learning_rate": 2.5960050484813536e-07, "loss": 2.1595, "step": 45390 }, { "epoch": 0.94, "grad_norm": 0.435546875, "learning_rate": 2.576936632783422e-07, "loss": 2.0861, "step": 45400 }, { "epoch": 0.94, "grad_norm": 0.4296875, "learning_rate": 2.5579379002389716e-07, "loss": 2.114, "step": 45410 }, { "epoch": 0.94, "grad_norm": 0.451171875, "learning_rate": 2.5390088598282344e-07, "loss": 2.1287, "step": 45420 }, { "epoch": 0.94, "grad_norm": 0.427734375, "learning_rate": 2.520149520498488e-07, "loss": 2.0905, "step": 45430 }, { "epoch": 0.94, "grad_norm": 0.412109375, "learning_rate": 2.5013598911640514e-07, "loss": 2.1268, "step": 45440 }, { "epoch": 0.94, "grad_norm": 0.447265625, "learning_rate": 2.482639980706336e-07, "loss": 2.0868, "step": 45450 }, { "epoch": 0.95, "grad_norm": 0.4375, "learning_rate": 2.463989797973748e-07, "loss": 2.0986, "step": 45460 }, { "epoch": 0.95, "grad_norm": 0.43359375, "learning_rate": 2.4454093517817687e-07, "loss": 2.1135, "step": 45470 }, { "epoch": 0.95, "grad_norm": 0.447265625, "learning_rate": 2.4268986509128733e-07, "loss": 2.0712, "step": 45480 }, { "epoch": 0.95, "grad_norm": 0.435546875, "learning_rate": 2.408457704116662e-07, "loss": 2.0803, "step": 45490 }, { "epoch": 0.95, "grad_norm": 0.435546875, "learning_rate": 2.39008652010963e-07, "loss": 2.0991, "step": 45500 }, { "epoch": 0.95, "eval_accuracy": 0.5589902823299407, "eval_loss": 1.9930479526519775, "eval_runtime": 16.443, "eval_samples_per_second": 36.186, "eval_steps_per_second": 1.156, "step": 45500 }, { "epoch": 0.95, "grad_norm": 0.416015625, "learning_rate": 2.3717851075754305e-07, "loss": 2.1284, "step": 45510 }, { "epoch": 0.95, "grad_norm": 0.439453125, "learning_rate": 2.3535534751646436e-07, "loss": 2.1352, "step": 45520 }, { "epoch": 0.95, "grad_norm": 0.4453125, "learning_rate": 2.335391631494943e-07, "loss": 2.1524, "step": 45530 }, { "epoch": 0.95, "grad_norm": 0.447265625, "learning_rate": 2.317299585150928e-07, "loss": 2.1166, "step": 45540 }, { "epoch": 0.95, "grad_norm": 0.4453125, "learning_rate": 2.2992773446842585e-07, "loss": 2.1092, "step": 45550 }, { "epoch": 0.95, "grad_norm": 0.59765625, "learning_rate": 2.2813249186136197e-07, "loss": 2.1172, "step": 45560 }, { "epoch": 0.95, "grad_norm": 0.44921875, "learning_rate": 2.263442315424641e-07, "loss": 2.1471, "step": 45570 }, { "epoch": 0.95, "grad_norm": 0.43359375, "learning_rate": 2.245629543569977e-07, "loss": 2.1398, "step": 45580 }, { "epoch": 0.95, "grad_norm": 0.4375, "learning_rate": 2.2278866114693098e-07, "loss": 2.1437, "step": 45590 }, { "epoch": 0.95, "grad_norm": 0.46875, "learning_rate": 2.2102135275092307e-07, "loss": 2.1167, "step": 45600 }, { "epoch": 0.95, "grad_norm": 0.447265625, "learning_rate": 2.192610300043374e-07, "loss": 2.1036, "step": 45610 }, { "epoch": 0.95, "grad_norm": 0.435546875, "learning_rate": 2.1750769373923508e-07, "loss": 2.0965, "step": 45620 }, { "epoch": 0.95, "grad_norm": 0.44921875, "learning_rate": 2.1576134478437315e-07, "loss": 2.1495, "step": 45630 }, { "epoch": 0.95, "grad_norm": 0.44140625, "learning_rate": 2.1402198396520633e-07, "loss": 2.1311, "step": 45640 }, { "epoch": 0.95, "grad_norm": 0.46484375, "learning_rate": 2.1228961210388698e-07, "loss": 2.1074, "step": 45650 }, { "epoch": 0.95, "grad_norm": 0.435546875, "learning_rate": 2.105642300192634e-07, "loss": 2.1014, "step": 45660 }, { "epoch": 0.95, "grad_norm": 0.44140625, "learning_rate": 2.088458385268799e-07, "loss": 2.135, "step": 45670 }, { "epoch": 0.95, "grad_norm": 0.48828125, "learning_rate": 2.0713443843897674e-07, "loss": 2.0879, "step": 45680 }, { "epoch": 0.95, "grad_norm": 0.431640625, "learning_rate": 2.054300305644885e-07, "loss": 2.1002, "step": 45690 }, { "epoch": 0.95, "grad_norm": 0.421875, "learning_rate": 2.0373261570904744e-07, "loss": 2.1279, "step": 45700 }, { "epoch": 0.95, "grad_norm": 0.443359375, "learning_rate": 2.0204219467497842e-07, "loss": 2.0985, "step": 45710 }, { "epoch": 0.95, "grad_norm": 0.4375, "learning_rate": 2.0035876826129728e-07, "loss": 2.1147, "step": 45720 }, { "epoch": 0.95, "grad_norm": 0.44140625, "learning_rate": 1.9868233726372253e-07, "loss": 2.0787, "step": 45730 }, { "epoch": 0.95, "grad_norm": 0.42578125, "learning_rate": 1.9701290247465697e-07, "loss": 2.127, "step": 45740 }, { "epoch": 0.95, "grad_norm": 0.4453125, "learning_rate": 1.9535046468320272e-07, "loss": 2.0871, "step": 45750 }, { "epoch": 0.95, "grad_norm": 0.490234375, "learning_rate": 1.9369502467514788e-07, "loss": 2.1296, "step": 45760 }, { "epoch": 0.95, "grad_norm": 0.47265625, "learning_rate": 1.9204658323298152e-07, "loss": 2.1679, "step": 45770 }, { "epoch": 0.95, "grad_norm": 0.45703125, "learning_rate": 1.904051411358787e-07, "loss": 2.1201, "step": 45780 }, { "epoch": 0.95, "grad_norm": 0.439453125, "learning_rate": 1.8877069915970712e-07, "loss": 2.1464, "step": 45790 }, { "epoch": 0.95, "grad_norm": 0.466796875, "learning_rate": 1.8714325807702548e-07, "loss": 2.1096, "step": 45800 }, { "epoch": 0.95, "grad_norm": 0.439453125, "learning_rate": 1.8552281865708675e-07, "loss": 2.0995, "step": 45810 }, { "epoch": 0.95, "grad_norm": 0.439453125, "learning_rate": 1.8390938166582826e-07, "loss": 2.0881, "step": 45820 }, { "epoch": 0.95, "grad_norm": 0.4453125, "learning_rate": 1.823029478658833e-07, "loss": 2.1175, "step": 45830 }, { "epoch": 0.95, "grad_norm": 0.44140625, "learning_rate": 1.8070351801657114e-07, "loss": 2.1802, "step": 45840 }, { "epoch": 0.95, "grad_norm": 0.4375, "learning_rate": 1.791110928739037e-07, "loss": 2.1145, "step": 45850 }, { "epoch": 0.95, "grad_norm": 0.44140625, "learning_rate": 1.7752567319057723e-07, "loss": 2.0945, "step": 45860 }, { "epoch": 0.95, "grad_norm": 0.431640625, "learning_rate": 1.7594725971598225e-07, "loss": 2.0999, "step": 45870 }, { "epoch": 0.95, "grad_norm": 0.44921875, "learning_rate": 1.74375853196192e-07, "loss": 2.1349, "step": 45880 }, { "epoch": 0.95, "grad_norm": 0.44140625, "learning_rate": 1.7281145437397394e-07, "loss": 2.1026, "step": 45890 }, { "epoch": 0.95, "grad_norm": 0.462890625, "learning_rate": 1.712540639887783e-07, "loss": 2.1191, "step": 45900 }, { "epoch": 0.95, "grad_norm": 0.43359375, "learning_rate": 1.6970368277674285e-07, "loss": 2.1523, "step": 45910 }, { "epoch": 0.95, "grad_norm": 0.435546875, "learning_rate": 1.6816031147069476e-07, "loss": 2.1087, "step": 45920 }, { "epoch": 0.95, "grad_norm": 0.43359375, "learning_rate": 1.6662395080014547e-07, "loss": 2.0808, "step": 45930 }, { "epoch": 0.96, "grad_norm": 0.4765625, "learning_rate": 1.650946014912974e-07, "loss": 2.1197, "step": 45940 }, { "epoch": 0.96, "grad_norm": 0.4296875, "learning_rate": 1.6357226426703064e-07, "loss": 2.1047, "step": 45950 }, { "epoch": 0.96, "grad_norm": 0.451171875, "learning_rate": 1.620569398469196e-07, "loss": 2.0961, "step": 45960 }, { "epoch": 0.96, "grad_norm": 0.4609375, "learning_rate": 1.60548628947218e-07, "loss": 2.1058, "step": 45970 }, { "epoch": 0.96, "grad_norm": 0.423828125, "learning_rate": 1.5904733228086553e-07, "loss": 2.1122, "step": 45980 }, { "epoch": 0.96, "grad_norm": 0.4375, "learning_rate": 1.5755305055748625e-07, "loss": 2.1416, "step": 45990 }, { "epoch": 0.96, "grad_norm": 0.455078125, "learning_rate": 1.560657844833918e-07, "loss": 2.1431, "step": 46000 }, { "epoch": 0.96, "eval_accuracy": 0.5591742855499972, "eval_loss": 1.992998480796814, "eval_runtime": 16.4485, "eval_samples_per_second": 36.174, "eval_steps_per_second": 1.155, "step": 46000 }, { "epoch": 0.96, "grad_norm": 0.427734375, "learning_rate": 1.5458553476157488e-07, "loss": 2.0971, "step": 46010 }, { "epoch": 0.96, "grad_norm": 0.423828125, "learning_rate": 1.5311230209171078e-07, "loss": 2.1007, "step": 46020 }, { "epoch": 0.96, "grad_norm": 0.43359375, "learning_rate": 1.5164608717016082e-07, "loss": 2.1256, "step": 46030 }, { "epoch": 0.96, "grad_norm": 0.43359375, "learning_rate": 1.501868906899656e-07, "loss": 2.1464, "step": 46040 }, { "epoch": 0.96, "grad_norm": 0.48828125, "learning_rate": 1.4873471334085175e-07, "loss": 2.1391, "step": 46050 }, { "epoch": 0.96, "grad_norm": 0.439453125, "learning_rate": 1.4728955580922853e-07, "loss": 2.1074, "step": 46060 }, { "epoch": 0.96, "grad_norm": 0.451171875, "learning_rate": 1.458514187781812e-07, "loss": 2.1233, "step": 46070 }, { "epoch": 0.96, "grad_norm": 0.46484375, "learning_rate": 1.4442030292748432e-07, "loss": 2.1176, "step": 46080 }, { "epoch": 0.96, "grad_norm": 0.431640625, "learning_rate": 1.429962089335901e-07, "loss": 2.1019, "step": 46090 }, { "epoch": 0.96, "grad_norm": 0.435546875, "learning_rate": 1.4157913746962846e-07, "loss": 2.0939, "step": 46100 }, { "epoch": 0.96, "grad_norm": 0.423828125, "learning_rate": 1.4016908920541695e-07, "loss": 2.0803, "step": 46110 }, { "epoch": 0.96, "grad_norm": 0.455078125, "learning_rate": 1.387660648074457e-07, "loss": 2.1201, "step": 46120 }, { "epoch": 0.96, "grad_norm": 0.431640625, "learning_rate": 1.3737006493889427e-07, "loss": 2.134, "step": 46130 }, { "epoch": 0.96, "grad_norm": 0.423828125, "learning_rate": 1.3598109025961315e-07, "loss": 2.1016, "step": 46140 }, { "epoch": 0.96, "grad_norm": 0.439453125, "learning_rate": 1.3459914142613384e-07, "loss": 2.1291, "step": 46150 }, { "epoch": 0.96, "grad_norm": 0.4375, "learning_rate": 1.3322421909167216e-07, "loss": 2.1362, "step": 46160 }, { "epoch": 0.96, "grad_norm": 0.44921875, "learning_rate": 1.3185632390611823e-07, "loss": 2.1282, "step": 46170 }, { "epoch": 0.96, "grad_norm": 0.44140625, "learning_rate": 1.3049545651603989e-07, "loss": 2.1112, "step": 46180 }, { "epoch": 0.96, "grad_norm": 0.474609375, "learning_rate": 1.291416175646859e-07, "loss": 2.1117, "step": 46190 }, { "epoch": 0.96, "grad_norm": 0.470703125, "learning_rate": 1.277948076919827e-07, "loss": 2.1086, "step": 46200 }, { "epoch": 0.96, "grad_norm": 0.447265625, "learning_rate": 1.264550275345311e-07, "loss": 2.1295, "step": 46210 }, { "epoch": 0.96, "grad_norm": 0.453125, "learning_rate": 1.251222777256128e-07, "loss": 2.1282, "step": 46220 }, { "epoch": 0.96, "grad_norm": 0.431640625, "learning_rate": 1.2379655889518393e-07, "loss": 2.1342, "step": 46230 }, { "epoch": 0.96, "grad_norm": 0.453125, "learning_rate": 1.2247787166987823e-07, "loss": 2.1258, "step": 46240 }, { "epoch": 0.96, "grad_norm": 0.4453125, "learning_rate": 1.211662166730071e-07, "loss": 2.1023, "step": 46250 }, { "epoch": 0.96, "grad_norm": 0.443359375, "learning_rate": 1.1986159452455626e-07, "loss": 2.1328, "step": 46260 }, { "epoch": 0.96, "grad_norm": 0.43359375, "learning_rate": 1.1856400584118577e-07, "loss": 2.1128, "step": 46270 }, { "epoch": 0.96, "grad_norm": 0.44140625, "learning_rate": 1.1727345123623667e-07, "loss": 2.1226, "step": 46280 }, { "epoch": 0.96, "grad_norm": 0.447265625, "learning_rate": 1.1598993131971769e-07, "loss": 2.1344, "step": 46290 }, { "epoch": 0.96, "grad_norm": 0.431640625, "learning_rate": 1.1471344669831852e-07, "loss": 2.1125, "step": 46300 }, { "epoch": 0.96, "grad_norm": 0.44921875, "learning_rate": 1.1344399797539983e-07, "loss": 2.1422, "step": 46310 }, { "epoch": 0.96, "grad_norm": 0.42578125, "learning_rate": 1.1218158575100001e-07, "loss": 2.1421, "step": 46320 }, { "epoch": 0.96, "grad_norm": 0.431640625, "learning_rate": 1.1092621062182506e-07, "loss": 2.124, "step": 46330 }, { "epoch": 0.96, "grad_norm": 0.4296875, "learning_rate": 1.0967787318126366e-07, "loss": 2.1198, "step": 46340 }, { "epoch": 0.96, "grad_norm": 0.431640625, "learning_rate": 1.0843657401937213e-07, "loss": 2.1193, "step": 46350 }, { "epoch": 0.96, "grad_norm": 0.443359375, "learning_rate": 1.0720231372288114e-07, "loss": 2.1048, "step": 46360 }, { "epoch": 0.96, "grad_norm": 0.44921875, "learning_rate": 1.0597509287519236e-07, "loss": 2.1352, "step": 46370 }, { "epoch": 0.96, "grad_norm": 0.451171875, "learning_rate": 1.047549120563851e-07, "loss": 2.1113, "step": 46380 }, { "epoch": 0.96, "grad_norm": 0.4453125, "learning_rate": 1.0354177184320468e-07, "loss": 2.1259, "step": 46390 }, { "epoch": 0.96, "grad_norm": 0.42578125, "learning_rate": 1.0233567280907408e-07, "loss": 2.1186, "step": 46400 }, { "epoch": 0.96, "grad_norm": 0.435546875, "learning_rate": 1.011366155240856e-07, "loss": 2.1275, "step": 46410 }, { "epoch": 0.97, "grad_norm": 0.42578125, "learning_rate": 9.994460055500254e-08, "loss": 2.1298, "step": 46420 }, { "epoch": 0.97, "grad_norm": 0.498046875, "learning_rate": 9.875962846526087e-08, "loss": 2.1294, "step": 46430 }, { "epoch": 0.97, "grad_norm": 0.447265625, "learning_rate": 9.758169981496756e-08, "loss": 2.1182, "step": 46440 }, { "epoch": 0.97, "grad_norm": 0.4296875, "learning_rate": 9.641081516089722e-08, "loss": 2.0929, "step": 46450 }, { "epoch": 0.97, "grad_norm": 0.443359375, "learning_rate": 9.524697505650048e-08, "loss": 2.1053, "step": 46460 }, { "epoch": 0.97, "grad_norm": 0.44921875, "learning_rate": 9.409018005189229e-08, "loss": 2.1157, "step": 46470 }, { "epoch": 0.97, "grad_norm": 0.427734375, "learning_rate": 9.29404306938636e-08, "loss": 2.1169, "step": 46480 }, { "epoch": 0.97, "grad_norm": 0.4375, "learning_rate": 9.179772752586802e-08, "loss": 2.1062, "step": 46490 }, { "epoch": 0.97, "grad_norm": 0.42578125, "learning_rate": 9.066207108803515e-08, "loss": 2.0965, "step": 46500 }, { "epoch": 0.97, "eval_accuracy": 0.5589984967594076, "eval_loss": 1.9930667877197266, "eval_runtime": 16.443, "eval_samples_per_second": 36.186, "eval_steps_per_second": 1.156, "step": 46500 }, { "epoch": 0.97, "grad_norm": 0.447265625, "learning_rate": 8.953346191716228e-08, "loss": 2.1052, "step": 46510 }, { "epoch": 0.97, "grad_norm": 0.419921875, "learning_rate": 8.841190054670933e-08, "loss": 2.1407, "step": 46520 }, { "epoch": 0.97, "grad_norm": 0.4921875, "learning_rate": 8.729738750681559e-08, "loss": 2.0972, "step": 46530 }, { "epoch": 0.97, "grad_norm": 0.443359375, "learning_rate": 8.618992332427966e-08, "loss": 2.1144, "step": 46540 }, { "epoch": 0.97, "grad_norm": 0.439453125, "learning_rate": 8.50895085225728e-08, "loss": 2.0964, "step": 46550 }, { "epoch": 0.97, "grad_norm": 0.43359375, "learning_rate": 8.399614362183228e-08, "loss": 2.1371, "step": 46560 }, { "epoch": 0.97, "grad_norm": 0.455078125, "learning_rate": 8.290982913886802e-08, "loss": 2.1149, "step": 46570 }, { "epoch": 0.97, "grad_norm": 0.458984375, "learning_rate": 8.183056558714763e-08, "loss": 2.0917, "step": 46580 }, { "epoch": 0.97, "grad_norm": 0.478515625, "learning_rate": 8.075835347681638e-08, "loss": 2.0968, "step": 46590 }, { "epoch": 0.97, "grad_norm": 0.4296875, "learning_rate": 7.96931933146805e-08, "loss": 2.0971, "step": 46600 }, { "epoch": 0.97, "grad_norm": 0.447265625, "learning_rate": 7.863508560421228e-08, "loss": 2.1231, "step": 46610 }, { "epoch": 0.97, "grad_norm": 0.734375, "learning_rate": 7.758403084555499e-08, "loss": 2.1269, "step": 46620 }, { "epoch": 0.97, "grad_norm": 0.427734375, "learning_rate": 7.654002953551453e-08, "loss": 2.1248, "step": 46630 }, { "epoch": 0.97, "grad_norm": 0.9453125, "learning_rate": 7.550308216756285e-08, "loss": 2.0894, "step": 46640 }, { "epoch": 0.97, "grad_norm": 0.427734375, "learning_rate": 7.447318923184121e-08, "loss": 2.1127, "step": 46650 }, { "epoch": 0.97, "grad_norm": 0.431640625, "learning_rate": 7.345035121515186e-08, "loss": 2.1164, "step": 46660 }, { "epoch": 0.97, "grad_norm": 0.431640625, "learning_rate": 7.243456860096476e-08, "loss": 2.0763, "step": 46670 }, { "epoch": 0.97, "grad_norm": 0.439453125, "learning_rate": 7.14258418694158e-08, "loss": 2.1182, "step": 46680 }, { "epoch": 0.97, "grad_norm": 0.48828125, "learning_rate": 7.042417149730363e-08, "loss": 2.1176, "step": 46690 }, { "epoch": 0.97, "grad_norm": 0.435546875, "learning_rate": 6.942955795809447e-08, "loss": 2.1102, "step": 46700 }, { "epoch": 0.97, "grad_norm": 0.439453125, "learning_rate": 6.844200172191395e-08, "loss": 2.1534, "step": 46710 }, { "epoch": 0.97, "grad_norm": 0.431640625, "learning_rate": 6.7461503255557e-08, "loss": 2.1617, "step": 46720 }, { "epoch": 0.97, "grad_norm": 0.427734375, "learning_rate": 6.648806302247956e-08, "loss": 2.0943, "step": 46730 }, { "epoch": 0.97, "grad_norm": 0.447265625, "learning_rate": 6.552168148280191e-08, "loss": 2.1025, "step": 46740 }, { "epoch": 0.97, "grad_norm": 0.423828125, "learning_rate": 6.456235909330866e-08, "loss": 2.1174, "step": 46750 }, { "epoch": 0.97, "grad_norm": 0.416015625, "learning_rate": 6.361009630744708e-08, "loss": 2.1029, "step": 46760 }, { "epoch": 0.97, "grad_norm": 0.43359375, "learning_rate": 6.266489357532878e-08, "loss": 2.1107, "step": 46770 }, { "epoch": 0.97, "grad_norm": 0.423828125, "learning_rate": 6.172675134372474e-08, "loss": 2.1372, "step": 46780 }, { "epoch": 0.97, "grad_norm": 0.435546875, "learning_rate": 6.079567005607523e-08, "loss": 2.1395, "step": 46790 }, { "epoch": 0.97, "grad_norm": 0.416015625, "learning_rate": 5.98716501524732e-08, "loss": 2.1642, "step": 46800 }, { "epoch": 0.97, "grad_norm": 0.4296875, "learning_rate": 5.895469206968429e-08, "loss": 2.1287, "step": 46810 }, { "epoch": 0.97, "grad_norm": 0.43359375, "learning_rate": 5.8044796241128465e-08, "loss": 2.1205, "step": 46820 }, { "epoch": 0.97, "grad_norm": 0.419921875, "learning_rate": 5.7141963096893344e-08, "loss": 2.12, "step": 46830 }, { "epoch": 0.97, "grad_norm": 0.421875, "learning_rate": 5.624619306372092e-08, "loss": 2.1247, "step": 46840 }, { "epoch": 0.97, "grad_norm": 0.447265625, "learning_rate": 5.5357486565024154e-08, "loss": 2.1244, "step": 46850 }, { "epoch": 0.97, "grad_norm": 0.484375, "learning_rate": 5.4475844020868696e-08, "loss": 2.127, "step": 46860 }, { "epoch": 0.97, "grad_norm": 0.4296875, "learning_rate": 5.360126584798453e-08, "loss": 2.1243, "step": 46870 }, { "epoch": 0.97, "grad_norm": 0.4375, "learning_rate": 5.273375245976597e-08, "loss": 2.1084, "step": 46880 }, { "epoch": 0.97, "grad_norm": 0.466796875, "learning_rate": 5.187330426626169e-08, "loss": 2.1259, "step": 46890 }, { "epoch": 0.98, "grad_norm": 0.43359375, "learning_rate": 5.101992167418301e-08, "loss": 2.0995, "step": 46900 }, { "epoch": 0.98, "grad_norm": 0.435546875, "learning_rate": 5.0173605086905606e-08, "loss": 2.1291, "step": 46910 }, { "epoch": 0.98, "grad_norm": 0.431640625, "learning_rate": 4.9334354904459475e-08, "loss": 2.1173, "step": 46920 }, { "epoch": 0.98, "grad_norm": 0.43359375, "learning_rate": 4.850217152353731e-08, "loss": 2.134, "step": 46930 }, { "epoch": 0.98, "grad_norm": 0.458984375, "learning_rate": 4.767705533749112e-08, "loss": 2.1278, "step": 46940 }, { "epoch": 0.98, "grad_norm": 0.439453125, "learning_rate": 4.685900673633392e-08, "loss": 2.1123, "step": 46950 }, { "epoch": 0.98, "grad_norm": 0.4140625, "learning_rate": 4.604802610673475e-08, "loss": 2.1184, "step": 46960 }, { "epoch": 0.98, "grad_norm": 0.43359375, "learning_rate": 4.5244113832025314e-08, "loss": 2.1002, "step": 46970 }, { "epoch": 0.98, "grad_norm": 0.416015625, "learning_rate": 4.4447270292191646e-08, "loss": 2.1362, "step": 46980 }, { "epoch": 0.98, "grad_norm": 0.421875, "learning_rate": 4.365749586388579e-08, "loss": 2.1261, "step": 46990 }, { "epoch": 0.98, "grad_norm": 0.42578125, "learning_rate": 4.28747909204108e-08, "loss": 2.1377, "step": 47000 }, { "epoch": 0.98, "eval_accuracy": 0.5591907144089308, "eval_loss": 1.99307382106781, "eval_runtime": 16.463, "eval_samples_per_second": 36.142, "eval_steps_per_second": 1.154, "step": 47000 }, { "epoch": 0.98, "grad_norm": 0.44921875, "learning_rate": 4.209915583173241e-08, "loss": 2.1158, "step": 47010 }, { "epoch": 0.98, "grad_norm": 0.431640625, "learning_rate": 4.1330590964474e-08, "loss": 2.0902, "step": 47020 }, { "epoch": 0.98, "grad_norm": 0.435546875, "learning_rate": 4.056909668191666e-08, "loss": 2.1167, "step": 47030 }, { "epoch": 0.98, "grad_norm": 0.43359375, "learning_rate": 3.981467334399913e-08, "loss": 2.0849, "step": 47040 }, { "epoch": 0.98, "grad_norm": 0.43359375, "learning_rate": 3.906732130731949e-08, "loss": 2.1406, "step": 47050 }, { "epoch": 0.98, "grad_norm": 0.44140625, "learning_rate": 3.8327040925130175e-08, "loss": 2.1419, "step": 47060 }, { "epoch": 0.98, "grad_norm": 0.4296875, "learning_rate": 3.759383254734461e-08, "loss": 2.1096, "step": 47070 }, { "epoch": 0.98, "grad_norm": 0.45703125, "learning_rate": 3.686769652053224e-08, "loss": 2.1092, "step": 47080 }, { "epoch": 0.98, "grad_norm": 0.44140625, "learning_rate": 3.6148633187916834e-08, "loss": 2.1151, "step": 47090 }, { "epoch": 0.98, "grad_norm": 0.451171875, "learning_rate": 3.54366428893832e-08, "loss": 2.1464, "step": 47100 }, { "epoch": 0.98, "grad_norm": 0.43359375, "learning_rate": 3.473172596147045e-08, "loss": 2.1102, "step": 47110 }, { "epoch": 0.98, "grad_norm": 0.439453125, "learning_rate": 3.40338827373754e-08, "loss": 2.0963, "step": 47120 }, { "epoch": 0.98, "grad_norm": 0.439453125, "learning_rate": 3.3343113546950855e-08, "loss": 2.0916, "step": 47130 }, { "epoch": 0.98, "grad_norm": 0.431640625, "learning_rate": 3.26594187167073e-08, "loss": 2.1226, "step": 47140 }, { "epoch": 0.98, "grad_norm": 0.421875, "learning_rate": 3.19827985698079e-08, "loss": 2.1285, "step": 47150 }, { "epoch": 0.98, "grad_norm": 0.41796875, "learning_rate": 3.1313253426073496e-08, "loss": 2.1429, "step": 47160 }, { "epoch": 0.98, "grad_norm": 0.427734375, "learning_rate": 3.065078360198426e-08, "loss": 2.111, "step": 47170 }, { "epoch": 0.98, "grad_norm": 0.494140625, "learning_rate": 2.999538941066804e-08, "loss": 2.1493, "step": 47180 }, { "epoch": 0.98, "grad_norm": 0.4375, "learning_rate": 2.9347071161918703e-08, "loss": 2.1358, "step": 47190 }, { "epoch": 0.98, "grad_norm": 0.4296875, "learning_rate": 2.8705829162176122e-08, "loss": 2.0828, "step": 47200 }, { "epoch": 0.98, "grad_norm": 0.59375, "learning_rate": 2.8071663714539508e-08, "loss": 2.1029, "step": 47210 }, { "epoch": 0.98, "grad_norm": 0.439453125, "learning_rate": 2.744457511876408e-08, "loss": 2.1115, "step": 47220 }, { "epoch": 0.98, "grad_norm": 0.423828125, "learning_rate": 2.6824563671259406e-08, "loss": 2.1062, "step": 47230 }, { "epoch": 0.98, "grad_norm": 0.42578125, "learning_rate": 2.6211629665086056e-08, "loss": 2.1235, "step": 47240 }, { "epoch": 0.98, "grad_norm": 0.451171875, "learning_rate": 2.5605773389965616e-08, "loss": 2.1205, "step": 47250 }, { "epoch": 0.98, "grad_norm": 0.427734375, "learning_rate": 2.5006995132269007e-08, "loss": 2.0969, "step": 47260 }, { "epoch": 0.98, "grad_norm": 0.423828125, "learning_rate": 2.4415295175024832e-08, "loss": 2.0746, "step": 47270 }, { "epoch": 0.98, "grad_norm": 0.43359375, "learning_rate": 2.3830673797914372e-08, "loss": 2.1345, "step": 47280 }, { "epoch": 0.98, "grad_norm": 0.455078125, "learning_rate": 2.325313127727158e-08, "loss": 2.1141, "step": 47290 }, { "epoch": 0.98, "grad_norm": 0.439453125, "learning_rate": 2.2682667886089747e-08, "loss": 2.1138, "step": 47300 }, { "epoch": 0.98, "grad_norm": 0.447265625, "learning_rate": 2.2119283894009856e-08, "loss": 2.1214, "step": 47310 }, { "epoch": 0.98, "grad_norm": 0.466796875, "learning_rate": 2.1562979567330554e-08, "loss": 2.1126, "step": 47320 }, { "epoch": 0.98, "grad_norm": 0.458984375, "learning_rate": 2.1013755169001503e-08, "loss": 2.0662, "step": 47330 }, { "epoch": 0.98, "grad_norm": 0.443359375, "learning_rate": 2.0471610958628374e-08, "loss": 2.0994, "step": 47340 }, { "epoch": 0.98, "grad_norm": 0.435546875, "learning_rate": 1.9936547192467845e-08, "loss": 2.1121, "step": 47350 }, { "epoch": 0.98, "grad_norm": 0.44921875, "learning_rate": 1.9408564123432614e-08, "loss": 2.1105, "step": 47360 }, { "epoch": 0.98, "grad_norm": 0.453125, "learning_rate": 1.888766200108638e-08, "loss": 2.1276, "step": 47370 }, { "epoch": 0.99, "grad_norm": 0.423828125, "learning_rate": 1.8373841071645526e-08, "loss": 2.1132, "step": 47380 }, { "epoch": 0.99, "grad_norm": 0.431640625, "learning_rate": 1.7867101577980773e-08, "loss": 2.1257, "step": 47390 }, { "epoch": 0.99, "grad_norm": 0.42578125, "learning_rate": 1.736744375961552e-08, "loss": 2.1129, "step": 47400 }, { "epoch": 0.99, "grad_norm": 0.421875, "learning_rate": 1.6874867852724186e-08, "loss": 2.1268, "step": 47410 }, { "epoch": 0.99, "grad_norm": 0.419921875, "learning_rate": 1.6389374090135524e-08, "loss": 2.1189, "step": 47420 }, { "epoch": 0.99, "grad_norm": 0.4375, "learning_rate": 1.5910962701330966e-08, "loss": 2.0854, "step": 47430 }, { "epoch": 0.99, "grad_norm": 0.439453125, "learning_rate": 1.5439633912442962e-08, "loss": 2.1192, "step": 47440 }, { "epoch": 0.99, "grad_norm": 0.421875, "learning_rate": 1.4975387946256634e-08, "loss": 2.1096, "step": 47450 }, { "epoch": 0.99, "grad_norm": 0.4609375, "learning_rate": 1.451822502220812e-08, "loss": 2.1462, "step": 47460 }, { "epoch": 0.99, "grad_norm": 0.45703125, "learning_rate": 1.4068145356389561e-08, "loss": 2.1359, "step": 47470 }, { "epoch": 0.99, "grad_norm": 0.482421875, "learning_rate": 1.3625149161539118e-08, "loss": 2.1303, "step": 47480 }, { "epoch": 0.99, "grad_norm": 0.4375, "learning_rate": 1.3189236647052626e-08, "loss": 2.1349, "step": 47490 }, { "epoch": 0.99, "grad_norm": 0.431640625, "learning_rate": 1.2760408018973602e-08, "loss": 2.1118, "step": 47500 }, { "epoch": 0.99, "eval_accuracy": 0.5591677140064237, "eval_loss": 1.9930524826049805, "eval_runtime": 16.4253, "eval_samples_per_second": 36.225, "eval_steps_per_second": 1.157, "step": 47500 }, { "epoch": 0.99, "grad_norm": 0.43359375, "learning_rate": 1.233866347999657e-08, "loss": 2.122, "step": 47510 }, { "epoch": 0.99, "grad_norm": 0.453125, "learning_rate": 1.1924003229473735e-08, "loss": 2.0844, "step": 47520 }, { "epoch": 0.99, "grad_norm": 0.423828125, "learning_rate": 1.1516427463401646e-08, "loss": 2.1384, "step": 47530 }, { "epoch": 0.99, "grad_norm": 0.44140625, "learning_rate": 1.1115936374431202e-08, "loss": 2.1229, "step": 47540 }, { "epoch": 0.99, "grad_norm": 0.455078125, "learning_rate": 1.0722530151864307e-08, "loss": 2.1078, "step": 47550 }, { "epoch": 0.99, "grad_norm": 0.4375, "learning_rate": 1.0336208981655549e-08, "loss": 2.1337, "step": 47560 }, { "epoch": 0.99, "grad_norm": 0.4453125, "learning_rate": 9.956973046407192e-09, "loss": 2.1397, "step": 47570 }, { "epoch": 0.99, "grad_norm": 0.470703125, "learning_rate": 9.584822525377512e-09, "loss": 2.1335, "step": 47580 }, { "epoch": 0.99, "grad_norm": 0.44921875, "learning_rate": 9.219757594469136e-09, "loss": 2.1263, "step": 47590 }, { "epoch": 0.99, "grad_norm": 0.45703125, "learning_rate": 8.861778426242362e-09, "loss": 2.1176, "step": 47600 }, { "epoch": 0.99, "grad_norm": 0.419921875, "learning_rate": 8.510885189901841e-09, "loss": 2.0973, "step": 47610 }, { "epoch": 0.99, "grad_norm": 0.427734375, "learning_rate": 8.167078051306565e-09, "loss": 2.086, "step": 47620 }, { "epoch": 0.99, "grad_norm": 0.431640625, "learning_rate": 7.830357172966541e-09, "loss": 2.1037, "step": 47630 }, { "epoch": 0.99, "grad_norm": 0.43359375, "learning_rate": 7.50072271404112e-09, "loss": 2.1, "step": 47640 }, { "epoch": 0.99, "grad_norm": 0.4765625, "learning_rate": 7.1781748303406665e-09, "loss": 2.1152, "step": 47650 }, { "epoch": 0.99, "grad_norm": 0.419921875, "learning_rate": 6.862713674323229e-09, "loss": 2.1094, "step": 47660 }, { "epoch": 0.99, "grad_norm": 0.44921875, "learning_rate": 6.554339395101194e-09, "loss": 2.1537, "step": 47670 }, { "epoch": 0.99, "grad_norm": 0.4609375, "learning_rate": 6.253052138434634e-09, "loss": 2.0739, "step": 47680 }, { "epoch": 0.99, "grad_norm": 0.8203125, "learning_rate": 5.958852046736296e-09, "loss": 2.1077, "step": 47690 }, { "epoch": 0.99, "grad_norm": 0.423828125, "learning_rate": 5.67173925906328e-09, "loss": 2.1207, "step": 47700 }, { "epoch": 0.99, "grad_norm": 0.44140625, "learning_rate": 5.391713911128693e-09, "loss": 2.1074, "step": 47710 }, { "epoch": 0.99, "grad_norm": 0.470703125, "learning_rate": 5.118776135294989e-09, "loss": 2.1124, "step": 47720 }, { "epoch": 0.99, "grad_norm": 0.44921875, "learning_rate": 4.8529260605706396e-09, "loss": 2.116, "step": 47730 }, { "epoch": 0.99, "grad_norm": 0.45703125, "learning_rate": 4.594163812615126e-09, "loss": 2.1099, "step": 47740 }, { "epoch": 0.99, "grad_norm": 0.45703125, "learning_rate": 4.3424895137439415e-09, "loss": 2.1091, "step": 47750 }, { "epoch": 0.99, "grad_norm": 0.427734375, "learning_rate": 4.09790328291193e-09, "loss": 2.1067, "step": 47760 }, { "epoch": 0.99, "grad_norm": 0.447265625, "learning_rate": 3.860405235729947e-09, "loss": 2.0861, "step": 47770 }, { "epoch": 0.99, "grad_norm": 0.41796875, "learning_rate": 3.6299954844598582e-09, "loss": 2.1638, "step": 47780 }, { "epoch": 0.99, "grad_norm": 0.4296875, "learning_rate": 3.4066741380078815e-09, "loss": 2.1137, "step": 47790 }, { "epoch": 0.99, "grad_norm": 0.419921875, "learning_rate": 3.1904413019329114e-09, "loss": 2.1118, "step": 47800 }, { "epoch": 0.99, "grad_norm": 0.412109375, "learning_rate": 2.9812970784448556e-09, "loss": 2.0686, "step": 47810 }, { "epoch": 0.99, "grad_norm": 0.443359375, "learning_rate": 2.779241566397972e-09, "loss": 2.1276, "step": 47820 }, { "epoch": 0.99, "grad_norm": 0.439453125, "learning_rate": 2.5842748612991963e-09, "loss": 2.1231, "step": 47830 }, { "epoch": 0.99, "grad_norm": 0.431640625, "learning_rate": 2.396397055306476e-09, "loss": 2.1088, "step": 47840 }, { "epoch": 0.99, "grad_norm": 0.423828125, "learning_rate": 2.2156082372221108e-09, "loss": 2.116, "step": 47850 }, { "epoch": 1.0, "grad_norm": 0.4375, "learning_rate": 2.041908492502742e-09, "loss": 2.0902, "step": 47860 }, { "epoch": 1.0, "grad_norm": 0.455078125, "learning_rate": 1.8752979032510275e-09, "loss": 2.1146, "step": 47870 }, { "epoch": 1.0, "grad_norm": 0.451171875, "learning_rate": 1.7157765482189724e-09, "loss": 2.1416, "step": 47880 }, { "epoch": 1.0, "grad_norm": 0.423828125, "learning_rate": 1.563344502809594e-09, "loss": 2.1123, "step": 47890 }, { "epoch": 1.0, "grad_norm": 0.435546875, "learning_rate": 1.4180018390735906e-09, "loss": 2.1166, "step": 47900 }, { "epoch": 1.0, "grad_norm": 0.4453125, "learning_rate": 1.2797486257076774e-09, "loss": 2.0844, "step": 47910 }, { "epoch": 1.0, "grad_norm": 0.427734375, "learning_rate": 1.1485849280645777e-09, "loss": 2.1241, "step": 47920 }, { "epoch": 1.0, "grad_norm": 0.439453125, "learning_rate": 1.0245108081413656e-09, "loss": 2.1271, "step": 47930 }, { "epoch": 1.0, "grad_norm": 0.423828125, "learning_rate": 9.075263245827969e-10, "loss": 2.0772, "step": 47940 }, { "epoch": 1.0, "grad_norm": 0.515625, "learning_rate": 7.976315326846396e-10, "loss": 2.1259, "step": 47950 }, { "epoch": 1.0, "grad_norm": 0.423828125, "learning_rate": 6.948264843936735e-10, "loss": 2.1273, "step": 47960 }, { "epoch": 1.0, "grad_norm": 0.427734375, "learning_rate": 5.991112283026956e-10, "loss": 2.1402, "step": 47970 }, { "epoch": 1.0, "grad_norm": 0.451171875, "learning_rate": 5.104858096505183e-10, "loss": 2.1194, "step": 47980 }, { "epoch": 1.0, "grad_norm": 0.455078125, "learning_rate": 4.2895027033196254e-10, "loss": 2.1542, "step": 47990 }, { "epoch": 1.0, "grad_norm": 2.0625, "learning_rate": 3.5450464888620027e-10, "loss": 2.089, "step": 48000 }, { "epoch": 1.0, "eval_accuracy": 0.5590494262221017, "eval_loss": 1.993034839630127, "eval_runtime": 16.4309, "eval_samples_per_second": 36.212, "eval_steps_per_second": 1.156, "step": 48000 }, { "epoch": 1.0, "grad_norm": 0.44140625, "learning_rate": 2.8714898050008485e-10, "loss": 2.1068, "step": 48010 }, { "epoch": 1.0, "grad_norm": 0.42578125, "learning_rate": 2.2688329701314737e-10, "loss": 2.0929, "step": 48020 }, { "epoch": 1.0, "grad_norm": 0.44140625, "learning_rate": 1.7370762690926968e-10, "loss": 2.0989, "step": 48030 }, { "epoch": 1.0, "grad_norm": 0.435546875, "learning_rate": 1.276219953250113e-10, "loss": 2.1373, "step": 48040 }, { "epoch": 1.0, "grad_norm": 0.43359375, "learning_rate": 8.862642404294797e-11, "loss": 2.1411, "step": 48050 }, { "epoch": 1.0, "grad_norm": 0.42578125, "learning_rate": 5.6720931495002346e-11, "loss": 2.1095, "step": 48060 }, { "epoch": 1.0, "grad_norm": 0.42578125, "learning_rate": 3.1905532764109347e-11, "loss": 2.1358, "step": 48070 }, { "epoch": 1.0, "grad_norm": 0.427734375, "learning_rate": 1.418023957588943e-11, "loss": 2.1348, "step": 48080 }, { "epoch": 1.0, "grad_norm": 0.435546875, "learning_rate": 3.545060313636661e-12, "loss": 2.1329, "step": 48090 }, { "epoch": 1.0, "grad_norm": 0.419921875, "learning_rate": 0.0, "loss": 2.1093, "step": 48100 }, { "epoch": 1.0, "step": 48100, "total_flos": 3.101101287470069e+20, "train_loss": 2.1293136361829954, "train_runtime": 442446.2037, "train_samples_per_second": 13.915, "train_steps_per_second": 0.109 } ], "logging_steps": 10, "max_steps": 48100, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 1000, "total_flos": 3.101101287470069e+20, "train_batch_size": 16, "trial_name": null, "trial_params": null }